From 11413893a0d0b19df976eb9a2b600ba56802db2f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 4 Jan 2022 12:24:00 +0100
Subject: [PATCH 0001/1586] EDAC: Use proper list of struct attribute for
 attributes

The EDAC sysfs code is doing some crazy casting of the list of
attributes that is not necessary at all. Instead, properly point to the
correct attribute structure in the lists, which removes the need to
cast anything and the code is now properly typesafe (as much as sysfs
attribute logic is typesafe...)

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220104112401.1067148-1-gregkh@linuxfoundation.org
---
 drivers/edac/edac_device_sysfs.c | 28 ++++++++++++++--------------
 drivers/edac/edac_pci_sysfs.c    | 24 ++++++++++++------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 5e75937537997..f4aefced2ccf8 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -163,11 +163,11 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
 	edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
 
 /* Base Attributes of the EDAC_DEVICE ECC object */
-static struct ctl_info_attribute *device_ctrl_attr[] = {
-	&attr_ctl_info_panic_on_ue,
-	&attr_ctl_info_log_ue,
-	&attr_ctl_info_log_ce,
-	&attr_ctl_info_poll_msec,
+static struct attribute *device_ctrl_attr[] = {
+	&attr_ctl_info_panic_on_ue.attr,
+	&attr_ctl_info_log_ue.attr,
+	&attr_ctl_info_log_ce.attr,
+	&attr_ctl_info_poll_msec.attr,
 	NULL,
 };
 
@@ -217,7 +217,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
 static struct kobj_type ktype_device_ctrl = {
 	.release = edac_device_ctrl_master_release,
 	.sysfs_ops = &device_ctl_info_ops,
-	.default_attrs = (struct attribute **)device_ctrl_attr,
+	.default_attrs = device_ctrl_attr,
 };
 
 /*
@@ -389,9 +389,9 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
 INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
 
 /* list of edac_dev 'instance' attributes */
-static struct instance_attribute *device_instance_attr[] = {
-	&attr_instance_ce_count,
-	&attr_instance_ue_count,
+static struct attribute *device_instance_attr[] = {
+	&attr_instance_ce_count.attr,
+	&attr_instance_ue_count.attr,
 	NULL,
 };
 
@@ -399,7 +399,7 @@ static struct instance_attribute *device_instance_attr[] = {
 static struct kobj_type ktype_instance_ctrl = {
 	.release = edac_device_ctrl_instance_release,
 	.sysfs_ops = &device_instance_ops,
-	.default_attrs = (struct attribute **)device_instance_attr,
+	.default_attrs = device_instance_attr,
 };
 
 /* edac_dev -> instance -> block information */
@@ -487,9 +487,9 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
 BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
 
 /* list of edac_dev 'block' attributes */
-static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
-	&attr_block_ce_count,
-	&attr_block_ue_count,
+static struct attribute *device_block_attr[] = {
+	&attr_block_ce_count.attr,
+	&attr_block_ue_count.attr,
 	NULL,
 };
 
@@ -497,7 +497,7 @@ static struct edac_dev_sysfs_block_attribute *device_block_attr[] = {
 static struct kobj_type ktype_block_ctrl = {
 	.release = edac_device_ctrl_block_release,
 	.sysfs_ops = &device_block_ops,
-	.default_attrs = (struct attribute **)device_block_attr,
+	.default_attrs = device_block_attr,
 };
 
 /* block ctor/dtor  code */
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 53042af7262e2..c041fb8f3ef27 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -135,9 +135,9 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
 INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
 
 /* pci instance attributes */
-static struct instance_attribute *pci_instance_attr[] = {
-	&attr_instance_pe_count,
-	&attr_instance_npe_count,
+static struct attribute *pci_instance_attr[] = {
+	&attr_instance_pe_count.attr,
+	&attr_instance_npe_count.attr,
 	NULL
 };
 
@@ -145,7 +145,7 @@ static struct instance_attribute *pci_instance_attr[] = {
 static struct kobj_type ktype_pci_instance = {
 	.release = edac_pci_instance_release,
 	.sysfs_ops = &pci_instance_ops,
-	.default_attrs = (struct attribute **)pci_instance_attr,
+	.default_attrs = pci_instance_attr,
 };
 
 /*
@@ -292,13 +292,13 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
 EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
 
 /* Base Attributes of the memory ECC object */
-static struct edac_pci_dev_attribute *edac_pci_attr[] = {
-	&edac_pci_attr_check_pci_errors,
-	&edac_pci_attr_edac_pci_log_pe,
-	&edac_pci_attr_edac_pci_log_npe,
-	&edac_pci_attr_edac_pci_panic_on_pe,
-	&edac_pci_attr_pci_parity_count,
-	&edac_pci_attr_pci_nonparity_count,
+static struct attribute *edac_pci_attr[] = {
+	&edac_pci_attr_check_pci_errors.attr,
+	&edac_pci_attr_edac_pci_log_pe.attr,
+	&edac_pci_attr_edac_pci_log_npe.attr,
+	&edac_pci_attr_edac_pci_panic_on_pe.attr,
+	&edac_pci_attr_pci_parity_count.attr,
+	&edac_pci_attr_pci_nonparity_count.attr,
 	NULL,
 };
 
@@ -327,7 +327,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj)
 static struct kobj_type ktype_edac_pci_main_kobj = {
 	.release = edac_pci_release_main_kobj,
 	.sysfs_ops = &edac_pci_sysfs_ops,
-	.default_attrs = (struct attribute **)edac_pci_attr,
+	.default_attrs = edac_pci_attr,
 };
 
 /**
-- 
GitLab


From 625c6b55699777ece6b87235f579e2cfbde049ce Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 4 Jan 2022 12:24:01 +0100
Subject: [PATCH 0002/1586] EDAC: Use default_groups in kobj_type

There are currently 2 ways to create a set of sysfs files for a
kobj_type, through the default_attrs field, and the default_groups
field. Move the edac sysfs code to use default_groups field which has
been the preferred way since

  aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type")

so that the obsolete default_attrs field can be removed soon.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220104112401.1067148-2-gregkh@linuxfoundation.org
---
 drivers/edac/edac_device_sysfs.c | 15 +++++++++------
 drivers/edac/edac_pci_sysfs.c    | 10 ++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index f4aefced2ccf8..9a61d92bdf420 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR,
 	edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store);
 
 /* Base Attributes of the EDAC_DEVICE ECC object */
-static struct attribute *device_ctrl_attr[] = {
+static struct attribute *device_ctrl_attrs[] = {
 	&attr_ctl_info_panic_on_ue.attr,
 	&attr_ctl_info_log_ue.attr,
 	&attr_ctl_info_log_ce.attr,
 	&attr_ctl_info_poll_msec.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_ctrl);
 
 /*
  * edac_device_ctrl_master_release
@@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj)
 static struct kobj_type ktype_device_ctrl = {
 	.release = edac_device_ctrl_master_release,
 	.sysfs_ops = &device_ctl_info_ops,
-	.default_attrs = device_ctrl_attr,
+	.default_groups = device_ctrl_groups,
 };
 
 /*
@@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL);
 INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL);
 
 /* list of edac_dev 'instance' attributes */
-static struct attribute *device_instance_attr[] = {
+static struct attribute *device_instance_attrs[] = {
 	&attr_instance_ce_count.attr,
 	&attr_instance_ue_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_instance);
 
 /* The 'ktype' for each edac_dev 'instance' */
 static struct kobj_type ktype_instance_ctrl = {
 	.release = edac_device_ctrl_instance_release,
 	.sysfs_ops = &device_instance_ops,
-	.default_attrs = device_instance_attr,
+	.default_groups = device_instance_groups,
 };
 
 /* edac_dev -> instance -> block information */
@@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL);
 BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL);
 
 /* list of edac_dev 'block' attributes */
-static struct attribute *device_block_attr[] = {
+static struct attribute *device_block_attrs[] = {
 	&attr_block_ce_count.attr,
 	&attr_block_ue_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(device_block);
 
 /* The 'ktype' for each edac_dev 'block' */
 static struct kobj_type ktype_block_ctrl = {
 	.release = edac_device_ctrl_block_release,
 	.sysfs_ops = &device_block_ops,
-	.default_attrs = device_block_attr,
+	.default_groups = device_block_groups,
 };
 
 /* block ctor/dtor  code */
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c041fb8f3ef27..888d5728ecef7 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL);
 INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL);
 
 /* pci instance attributes */
-static struct attribute *pci_instance_attr[] = {
+static struct attribute *pci_instance_attrs[] = {
 	&attr_instance_pe_count.attr,
 	&attr_instance_npe_count.attr,
 	NULL
 };
+ATTRIBUTE_GROUPS(pci_instance);
 
 /* the ktype for a pci instance */
 static struct kobj_type ktype_pci_instance = {
 	.release = edac_pci_instance_release,
 	.sysfs_ops = &pci_instance_ops,
-	.default_attrs = pci_instance_attr,
+	.default_groups = pci_instance_groups,
 };
 
 /*
@@ -292,7 +293,7 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL);
 EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL);
 
 /* Base Attributes of the memory ECC object */
-static struct attribute *edac_pci_attr[] = {
+static struct attribute *edac_pci_attrs[] = {
 	&edac_pci_attr_check_pci_errors.attr,
 	&edac_pci_attr_edac_pci_log_pe.attr,
 	&edac_pci_attr_edac_pci_log_npe.attr,
@@ -301,6 +302,7 @@ static struct attribute *edac_pci_attr[] = {
 	&edac_pci_attr_pci_nonparity_count.attr,
 	NULL,
 };
+ATTRIBUTE_GROUPS(edac_pci);
 
 /*
  * edac_pci_release_main_kobj
@@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj)
 static struct kobj_type ktype_edac_pci_main_kobj = {
 	.release = edac_pci_release_main_kobj,
 	.sysfs_ops = &edac_pci_sysfs_ops,
-	.default_attrs = edac_pci_attr,
+	.default_groups = edac_pci_groups,
 };
 
 /**
-- 
GitLab


From 4810dd2c943edd98cd41a12b96745b16b1d6b4f5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:24 +0200
Subject: [PATCH 0003/1586] perf/tests: Add AMX instructions to x86 instruction
 decoder test

The x86 instruction decoder is used for both kernel instructions and
user space instructions (e.g. uprobes, perf tools Intel PT), so it is
good to update it with new instructions.

Add AMX instructions to the x86 instruction decoder test.

A subsequent patch adds the instructions to the instruction decoder.

Reference:
Intel Architecture Instruction Set Extensions and Future Features
Programming Reference
May 2021
Document Number: 319433-044

Example:

  $ INSN='ldtilecfg\|sttilecfg\|tdpbf16ps\|tdpbssd\|'
  $ INSN+='tdpbsud\|tdpbusd\|'tdpbuud\|tileloadd\|'
  $ INSN+='tileloaddt1\|tilerelease\|tilestored\|tilezero'
  $ perf test -v "x86 instruction decoder" |& grep -i $INSN
  Failed to decode: c4 e2 78 49 04 c8    	ldtilecfg (%rax,%rcx,8)
  Failed to decode: c4 c2 78 49 04 c8    	ldtilecfg (%r8,%rcx,8)
  Failed to decode: c4 e2 79 49 04 c8    	sttilecfg (%rax,%rcx,8)
  Failed to decode: c4 c2 79 49 04 c8    	sttilecfg (%r8,%rcx,8)
  Failed to decode: c4 e2 7a 5c d1       	tdpbf16ps %tmm0,%tmm1,%tmm2
  Failed to decode: c4 e2 7b 5e d1       	tdpbssd %tmm0,%tmm1,%tmm2
  Failed to decode: c4 e2 7a 5e d1       	tdpbsud %tmm0,%tmm1,%tmm2
  Failed to decode: c4 e2 79 5e d1       	tdpbusd %tmm0,%tmm1,%tmm2
  Failed to decode: c4 e2 78 5e d1       	tdpbuud %tmm0,%tmm1,%tmm2
  Failed to decode: c4 e2 7b 4b 0c c8    	tileloadd (%rax,%rcx,8),%tmm1
  Failed to decode: c4 c2 7b 4b 14 c8    	tileloadd (%r8,%rcx,8),%tmm2
  Failed to decode: c4 e2 79 4b 0c c8    	tileloaddt1 (%rax,%rcx,8),%tmm1
  Failed to decode: c4 c2 79 4b 14 c8    	tileloaddt1 (%r8,%rcx,8),%tmm2
  Failed to decode: c4 e2 78 49 c0       	tilerelease
  Failed to decode: c4 e2 7a 4b 0c c8    	tilestored %tmm1,(%rax,%rcx,8)
  Failed to decode: c4 c2 7a 4b 14 c8    	tilestored %tmm2,(%r8,%rcx,8)
  Failed to decode: c4 e2 7b 49 c0       	tilezero %tmm0
  Failed to decode: c4 e2 7b 49 f8       	tilezero %tmm7

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-2-adrian.hunter@intel.com
---
 tools/perf/arch/x86/tests/insn-x86-dat-64.c  | 36 ++++++++++++++++++++
 tools/perf/arch/x86/tests/insn-x86-dat-src.c | 21 ++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 5da17d41d302b..3548565a1cc55 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2459,6 +2459,42 @@
 "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
 {{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
 "3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 78 49 04 c8    \tldtilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 78 49 04 c8    \tldtilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 49 04 c8    \tsttilecfg (%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 49 04 c8    \tsttilecfg (%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5c d1       \ttdpbf16ps %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7b 5e d1       \ttdpbssd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 7a 5e d1       \ttdpbsud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 79 5e d1       \ttdpbusd %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "",
+"c4 e2 78 5e d1       \ttdpbuud %tmm0,%tmm1,%tmm2",},
+{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7b 4b 0c c8    \ttileloadd (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7b 4b 14 c8    \ttileloadd (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 79 4b 0c c8    \ttileloaddt1 (%rax,%rcx,8),%tmm1",},
+{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 79 4b 14 c8    \ttileloaddt1 (%r8,%rcx,8),%tmm2",},
+{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 78 49 c0       \ttilerelease ",},
+{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "",
+"c4 e2 7a 4b 0c c8    \ttilestored %tmm1,(%rax,%rcx,8)",},
+{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "",
+"c4 c2 7a 4b 14 c8    \ttilestored %tmm2,(%r8,%rcx,8)",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "",
+"c4 e2 7b 49 c0       \ttilezero %tmm0",},
+{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "",
+"c4 e2 7b 49 f8       \ttilezero %tmm7",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index c3808e94c46e0..7906f7b2ffeb9 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1910,6 +1910,27 @@ int main(void)
 	asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)");	/* Expecting: jmp indirect 0 */
 	asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)");	/* Expecting: jmp indirect 0 */
 
+	/* AMX */
+
+	asm volatile("ldtilecfg (%rax,%rcx,8)");
+	asm volatile("ldtilecfg (%r8,%rcx,8)");
+	asm volatile("sttilecfg (%rax,%rcx,8)");
+	asm volatile("sttilecfg (%r8,%rcx,8)");
+	asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbssd %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbsud %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbusd %tmm0, %tmm1, %tmm2");
+	asm volatile("tdpbuud %tmm0, %tmm1, %tmm2");
+	asm volatile("tileloadd (%rax,%rcx,8), %tmm1");
+	asm volatile("tileloadd (%r8,%rcx,8), %tmm2");
+	asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1");
+	asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2");
+	asm volatile("tilerelease");
+	asm volatile("tilestored %tmm1, (%rax,%rcx,8)");
+	asm volatile("tilestored %tmm2, (%r8,%rcx,8)");
+	asm volatile("tilezero %tmm0");
+	asm volatile("tilezero %tmm7");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
-- 
GitLab


From 9dd94df75b30eca03ed2151dd5bbc152a6f19abf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:25 +0200
Subject: [PATCH 0004/1586] x86/insn: Add AMX instructions to the x86
 instruction decoder

The x86 instruction decoder is used for both kernel instructions and
user space instructions (e.g. uprobes, perf tools Intel PT), so it is
good to update it with new instructions.

Add AMX instructions to the x86 instruction decoder.

Reference:
Intel Architecture Instruction Set Extensions and Future Features
Programming Reference
May 2021
Document Number: 319433-044

Example using perf tools' x86 instruction decoder test:

  $ INSN='ldtilecfg\|sttilecfg\|tdpbf16ps\|tdpbssd\|'
  $ INSN+='tdpbsud\|tdpbusd\|'tdpbuud\|tileloadd\|'
  $ INSN+='tileloaddt1\|tilerelease\|tilestored\|tilezero'
  $ perf test -v "x86 instruction decoder" |& grep -i $INSN
  Decoded ok: c4 e2 78 49 04 c8    	ldtilecfg (%rax,%rcx,8)
  Decoded ok: c4 c2 78 49 04 c8    	ldtilecfg (%r8,%rcx,8)
  Decoded ok: c4 e2 79 49 04 c8    	sttilecfg (%rax,%rcx,8)
  Decoded ok: c4 c2 79 49 04 c8    	sttilecfg (%r8,%rcx,8)
  Decoded ok: c4 e2 7a 5c d1       	tdpbf16ps %tmm0,%tmm1,%tmm2
  Decoded ok: c4 e2 7b 5e d1       	tdpbssd %tmm0,%tmm1,%tmm2
  Decoded ok: c4 e2 7a 5e d1       	tdpbsud %tmm0,%tmm1,%tmm2
  Decoded ok: c4 e2 79 5e d1       	tdpbusd %tmm0,%tmm1,%tmm2
  Decoded ok: c4 e2 78 5e d1       	tdpbuud %tmm0,%tmm1,%tmm2
  Decoded ok: c4 e2 7b 4b 0c c8    	tileloadd (%rax,%rcx,8),%tmm1
  Decoded ok: c4 c2 7b 4b 14 c8    	tileloadd (%r8,%rcx,8),%tmm2
  Decoded ok: c4 e2 79 4b 0c c8    	tileloaddt1 (%rax,%rcx,8),%tmm1
  Decoded ok: c4 c2 79 4b 14 c8    	tileloaddt1 (%r8,%rcx,8),%tmm2
  Decoded ok: c4 e2 78 49 c0       	tilerelease
  Decoded ok: c4 e2 7a 4b 0c c8    	tilestored %tmm1,(%rax,%rcx,8)
  Decoded ok: c4 c2 7a 4b 14 c8    	tilestored %tmm2,(%r8,%rcx,8)
  Decoded ok: c4 e2 7b 49 c0       	tilezero %tmm0
  Decoded ok: c4 e2 7b 49 f8       	tilezero %tmm7

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-3-adrian.hunter@intel.com
---
 arch/x86/lib/x86-opcode-map.txt       | 10 ++++++++--
 tools/arch/x86/lib/x86-opcode-map.txt | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323d..b2cc6c04cbfe7 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
 4c: vrcp14ps/d Vpd,Wpd (66),(ev)
 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
 62: vpexpandb/w Vx,Wx (66),(ev)
 63: vpcompressb/w Wx,Vx (66),(ev)
 64: vpblendmd/q Vx,Hx,Wx (66),(ev)
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index ec31f5b60323d..b2cc6c04cbfe7 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -690,7 +690,10 @@ AVXcode: 2
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x4b
+# Skip 0x48
+49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B)
+# Skip 0x4a
+4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v)
 4c: vrcp14ps/d Vpd,Wpd (66),(ev)
 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
@@ -705,7 +708,10 @@ AVXcode: 2
 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
-# Skip 0x5c-0x61
+5c: TDPBF16PS Vt,Wt,Ht (F3),(v1)
+# Skip 0x5d
+5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1)
+# Skip 0x5f-0x61
 62: vpexpandb/w Vx,Wx (66),(ev)
 63: vpcompressb/w Wx,Vx (66),(ev)
 64: vpblendmd/q Vx,Hx,Wx (66),(ev)
-- 
GitLab


From a6ea1142dee66f054a7ce51ebd053ef5ad976227 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:26 +0200
Subject: [PATCH 0005/1586] perf/tests: Add misc instructions to the x86
 instruction decoder test

The x86 instruction decoder is used for both kernel instructions and
user space instructions (e.g. uprobes, perf tools Intel PT), so it is
good to update it with new instructions.

Add the following instructions to the x86 instruction decoder test:

	User Interrupt

		clui
		senduipi
		stui
		testui
		uiret

	Prediction history reset

		hreset

	Serialize instruction execution

		serialize

	TSX suspend load address tracking

		xresldtrk
		xsusldtrk

A subsequent patch adds the instructions to the instruction decoder.

Reference:
Intel Architecture Instruction Set Extensions and Future Features
Programming Reference
May 2021
Document Number: 319433-044

Example:

  $ perf test -v "x86 instruction decoder" |& grep -i hreset
  Failed to decode length (4 vs expected 6): f3 0f 3a f0 c0 00    	hreset $0x0
  Failed to decode length (4 vs expected 6): f3 0f 3a f0 c0 00    	hreset $0x0

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-4-adrian.hunter@intel.com
---
 tools/perf/arch/x86/tests/insn-x86-dat-32.c  |  8 +++++++
 tools/perf/arch/x86/tests/insn-x86-dat-64.c  | 20 ++++++++++++++++++
 tools/perf/arch/x86/tests/insn-x86-dat-src.c | 22 ++++++++++++++++++++
 3 files changed, 50 insertions(+)

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 9708ae8920616..79e2050cd1c26 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2197,6 +2197,14 @@
 "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
 {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
 "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00    \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8             \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9          \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8          \txsusldtrk ",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 3548565a1cc55..b2d0ba45262b3 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2495,6 +2495,26 @@
 "c4 e2 7b 49 c0       \ttilezero %tmm0",},
 {{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "",
 "c4 e2 7b 49 f8       \ttilezero %tmm7",},
+{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "",
+"f3 0f 01 ee          \tclui   ",},
+{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "",
+"f3 0f c7 f0          \tsenduipi %rax",},
+{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "",
+"f3 41 0f c7 f0       \tsenduipi %r8",},
+{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "",
+"f3 0f 01 ef          \tstui   ",},
+{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "",
+"f3 0f 01 ed          \ttestui ",},
+{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "",
+"f3 0f 01 ec          \tuiret  ",},
+{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
+"f3 0f 3a f0 c0 00    \threset $0x0",},
+{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
+"0f 01 e8             \tserialize ",},
+{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "",
+"f2 0f 01 e9          \txresldtrk ",},
+{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f2 0f 01 e8          \txsusldtrk ",},
 {{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
 "0f 01 cf             \tencls  ",},
 {{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 7906f7b2ffeb9..425db6a1b580a 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1931,6 +1931,15 @@ int main(void)
 	asm volatile("tilezero %tmm0");
 	asm volatile("tilezero %tmm7");
 
+	/* User Interrupt */
+
+	asm volatile("clui");
+	asm volatile("senduipi %rax");
+	asm volatile("senduipi %r8");
+	asm volatile("stui");
+	asm volatile("testui");
+	asm volatile("uiret");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -3693,6 +3702,19 @@ int main(void)
 
 #endif /* #ifndef __x86_64__ */
 
+	/* Prediction history reset */
+
+	asm volatile("hreset $0");
+
+	/* Serialize instruction execution */
+
+	asm volatile("serialize");
+
+	/* TSX suspend load address tracking */
+
+	asm volatile("xresldtrk");
+	asm volatile("xsusldtrk");
+
 	/* SGX */
 
 	asm volatile("encls");
-- 
GitLab


From 0153d98f2dd6d5161fc4d496d785c10686d0d7b6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:27 +0200
Subject: [PATCH 0006/1586] x86/insn: Add misc instructions to x86 instruction
 decoder

x86 instruction decoder is used for both kernel instructions and user space
instructions (e.g. uprobes, perf tools Intel PT), so it is good to update
it with new instructions.

Add instructions to x86 instruction decoder:

	User Interrupt

		clui
		senduipi
		stui
		testui
		uiret

	Prediction history reset

		hreset

	Serialize instruction execution

		serialize

	TSX suspend load address tracking

		xresldtrk
		xsusldtrk

Reference:
Intel Architecture Instruction Set Extensions and Future Features
Programming Reference
May 2021
Document Number: 319433-044

Example using perf tools' x86 instruction decoder test:

  $ perf test -v "x86 instruction decoder" |& grep -i hreset
  Decoded ok: f3 0f 3a f0 c0 00           hreset $0x0
  Decoded ok: f3 0f 3a f0 c0 00           hreset $0x0

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-5-adrian.hunter@intel.com
---
 arch/x86/lib/x86-opcode-map.txt       | 6 +++---
 tools/arch/x86/lib/x86-opcode-map.txt | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index b2cc6c04cbfe7..591797a931bfc 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -893,7 +893,7 @@ cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
 EndTable
 
 GrpTable: Grp1
@@ -976,7 +976,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -993,7 +993,7 @@ GrpTable: Grp9
 3: xrstors
 4: xsavec
 5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
 
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index b2cc6c04cbfe7..591797a931bfc 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -893,7 +893,7 @@ cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
-f0: RORX Gy,Ey,Ib (F2),(v)
+f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
 EndTable
 
 GrpTable: Grp1
@@ -976,7 +976,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -993,7 +993,7 @@ GrpTable: Grp9
 3: xrstors
 4: xsavec
 5: xsaves
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3)
 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
 
-- 
GitLab


From cdb63ba98c5d03774bca9789e689fe62be4347b4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:28 +0200
Subject: [PATCH 0007/1586] perf/tests: Add AVX512-FP16 instructions to x86
 instruction decoder test

The x86 instruction decoder is used for both kernel instructions and
user space instructions (e.g. uprobes, perf tools Intel PT), so it is
good to update it with new instructions.

Add AVX512-FP16 instructions to x86 instruction decoder test.

A subsequent patch adds the instructions to the instruction decoder.

Reference:
Intel AVX512-FP16 Architecture Specification
June 2021
Revision 1.0
Document Number: 347407-001US

Example:

  $ perf test -v "x86 instruction decoder" |& grep vfcmaddcph | head -2
  Failed to decode: 62 f6 6f 48 56 cb     vfcmaddcph %zmm3,%zmm2,%zmm1
  Failed to decode: 62 f6 6f 48 56 8c c8 78 56 34 12      vfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-6-adrian.hunter@intel.com
---
 tools/perf/arch/x86/tests/insn-x86-dat-32.c  |  910 ++++++++++++
 tools/perf/arch/x86/tests/insn-x86-dat-64.c  | 1370 ++++++++++++++++++
 tools/perf/arch/x86/tests/insn-x86-dat-src.c | 1146 +++++++++++++++
 3 files changed, 3426 insertions(+)

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 79e2050cd1c26..ba429cadb18f3 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2197,6 +2197,916 @@
 "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
 {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
 "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb    \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb    \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb    \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb    \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca    \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca    \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca    \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca    \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca    \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca    \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca    \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca    \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca    \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca    \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca    \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca    \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca    \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca    \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca    \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca    \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca    \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca    \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca    \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca    \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca    \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca    \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca    \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca    \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca    \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca    \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca    \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca    \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca    \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca    \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca    \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca    \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca    \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca    \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca    \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca    \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca    \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca    \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1    \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8    \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb    \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca    \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca    \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca    \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca    \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca    \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca    \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca    \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca    \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca    \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca    \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca    \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca    \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca    \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca    \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca    \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca    \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca    \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca    \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1    \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1    \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca    \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca    \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca    \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca    \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca    \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca    \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8    \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca    \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca    \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca    \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca    \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca    \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca    \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb    \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb    \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb    \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb    \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb    \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb    \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb    \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb    \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb    \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb    \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb    \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb    \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb    \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb    \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb    \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb    \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb    \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb    \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb    \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb    \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb    \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb    \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb    \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb    \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb    \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb    \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb    \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb    \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb    \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb    \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb    \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb    \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb    \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb    \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb    \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb    \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb    \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb    \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb    \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb    \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb    \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb    \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb    \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb    \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb    \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb    \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb    \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb    \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb    \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb    \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb    \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb    \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb    \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb    \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb    \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb    \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb    \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb    \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb    \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb    \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb    \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb    \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb    \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb    \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb    \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb    \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb    \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb    \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb    \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb    \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb    \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb    \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb    \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb    \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb    \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb    \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb    \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb    \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb    \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb    \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb    \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb    \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb    \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb    \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb    \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb    \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca    \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca    \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca    \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb    \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb    \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb    \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb    \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb    \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb    \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb    \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb    \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb    \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb    \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8    \tvmovw  %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8    \tvmovw  %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb    \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb    \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb    \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb    \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca    \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca    \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca    \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb    \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca    \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca    \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca    \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb    \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb    \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb    \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb    \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb    \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca    \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca    \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca    \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb    \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb    \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb    \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb    \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb    \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca    \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
 {{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
 "f3 0f 3a f0 c0 00    \threset $0x0",},
 {{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index b2d0ba45262b3..3a47e98fec331 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2507,6 +2507,1376 @@
 "f3 0f 01 ed          \ttestui ",},
 {{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "",
 "f3 0f 01 ec          \tuiret  ",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 58 cb    \tvaddph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 58 cb    \tvaddph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 58 cb    \tvaddph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 58 cb    \tvaddsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",},
+{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "",
+"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",},
+{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",},
+{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2f ca    \tvcomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5b ca    \tvcvtdq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5b ca    \tvcvtdq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5b ca    \tvcvtdq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 48 5a ca    \tvcvtpd2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 08 5a ca    \tvcvtpd2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 fd 28 5a ca    \tvcvtpd2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 5b ca    \tvcvtph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 5b ca    \tvcvtph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 5b ca    \tvcvtph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 5a ca    \tvcvtph2pd %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 5a ca    \tvcvtph2pd %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 5a ca    \tvcvtph2pd %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f2 7d 48 13 ca    \tvcvtph2ps %ymm2,%zmm1",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 79 13 ca       \tvcvtph2ps %xmm2,%xmm1",},
+{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "",
+"c4 e2 7d 13 ca       \tvcvtph2ps %xmm2,%ymm1",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 13 ca    \tvcvtph2psx %ymm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 13 ca    \tvcvtph2psx %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 13 ca    \tvcvtph2psx %xmm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7b ca    \tvcvtph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7b ca    \tvcvtph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7b ca    \tvcvtph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 79 ca    \tvcvtph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 79 ca    \tvcvtph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 79 ca    \tvcvtph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 79 ca    \tvcvtph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 79 ca    \tvcvtph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 79 ca    \tvcvtph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7d ca    \tvcvtph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7d ca    \tvcvtph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7d ca    \tvcvtph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7d ca    \tvcvtph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7d ca    \tvcvtph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7d ca    \tvcvtph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 7d 1d d1 12    \tvcvtps2ph $0x12,%ymm2,%xmm1",},
+{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "",
+"c4 e3 79 1d d1 12    \tvcvtps2ph $0x12,%xmm2,%xmm1",},
+{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "",
+"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 1d ca    \tvcvtps2phx %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 1d ca    \tvcvtps2phx %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 1d ca    \tvcvtps2phx %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 48 5b ca    \tvcvtqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 08 5b ca    \tvcvtqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 fc 28 5b ca    \tvcvtqq2ph %ymm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 79 c1    \tvcvtsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 79 c1    \tvcvtsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 2a c8    \tvcvtsi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 2a c8    \tvcvtsi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 1d cb    \tvcvtss2sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 5b ca    \tvcvttph2dq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 5b ca    \tvcvttph2dq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 5b ca    \tvcvttph2dq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7a ca    \tvcvttph2qq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7a ca    \tvcvttph2qq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7a ca    \tvcvttph2qq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 78 ca    \tvcvttph2udq %ymm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 78 ca    \tvcvttph2udq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 78 ca    \tvcvttph2udq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 78 ca    \tvcvttph2uqq %xmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 78 ca    \tvcvttph2uqq %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 78 ca    \tvcvttph2uqq %xmm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 7c ca    \tvcvttph2uw %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 7c ca    \tvcvttph2uw %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 7c ca    \tvcvttph2uw %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 48 7c ca    \tvcvttph2w %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 08 7c ca    \tvcvttph2w %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "",
+"62 f5 7d 28 7c ca    \tvcvttph2w %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 2c c1    \tvcvttsh2si %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 2c c1    \tvcvttsh2si %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 7e 08 78 c1    \tvcvttsh2usi %xmm1,%eax",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "",
+"62 f5 fe 08 78 c1    \tvcvttsh2usi %xmm1,%rax",},
+{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",},
+{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7a ca    \tvcvtudq2ph %zmm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7a ca    \tvcvtudq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7a ca    \tvcvtudq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 48 7a ca    \tvcvtuqq2ph %zmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 08 7a ca    \tvcvtuqq2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "",
+"62 f5 ff 28 7a ca    \tvcvtuqq2ph %ymm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 6e 08 7b c8    \tvcvtusi2sh %eax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "",
+"62 f5 ee 08 7b c8    \tvcvtusi2sh %rax,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 48 7d ca    \tvcvtuw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 08 7d ca    \tvcvtuw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7f 28 7d ca    \tvcvtuw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 48 7d ca    \tvcvtw2ph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 08 7d ca    \tvcvtw2ph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "",
+"62 f5 7e 28 7d ca    \tvcvtw2ph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5e cb    \tvdivph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5e cb    \tvdivph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5e cb    \tvdivph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5e cb    \tvdivsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 56 cb    \tvfcmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 56 cb    \tvfcmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 56 cb    \tvfcmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 57 cb    \tvfcmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 48 d6 cb    \tvfcmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d6 cb    \tvfcmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 28 d6 cb    \tvfcmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6f 08 d7 cb    \tvfcmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 98 cb    \tvfmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 98 cb    \tvfmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 98 cb    \tvfmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 99 cb    \tvfmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a8 cb    \tvfmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a8 cb    \tvfmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a8 cb    \tvfmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a9 cb    \tvfmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b8 cb    \tvfmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b8 cb    \tvfmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b8 cb    \tvfmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b9 cb    \tvfmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 56 cb    \tvfmaddcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 56 cb    \tvfmaddcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 56 cb    \tvfmaddcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 57 cb    \tvfmaddcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 96 cb    \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 96 cb    \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 96 cb    \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a6 cb    \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a6 cb    \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a6 cb    \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b6 cb    \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b6 cb    \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b6 cb    \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9a cb    \tvfmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9a cb    \tvfmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9a cb    \tvfmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9b cb    \tvfmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 aa cb    \tvfmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 aa cb    \tvfmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 aa cb    \tvfmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ab cb    \tvfmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ba cb    \tvfmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ba cb    \tvfmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ba cb    \tvfmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bb cb    \tvfmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 97 cb    \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 97 cb    \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 97 cb    \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 a7 cb    \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 a7 cb    \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 a7 cb    \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 b7 cb    \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 b7 cb    \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 b7 cb    \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 48 d6 cb    \tvfmulcph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d6 cb    \tvfmulcph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 28 d6 cb    \tvfmulcph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "",
+"62 f6 6e 08 d7 cb    \tvfmulcsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9c cb    \tvfnmadd132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9c cb    \tvfnmadd132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9c cb    \tvfnmadd132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9d cb    \tvfnmadd132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ac cb    \tvfnmadd213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ac cb    \tvfnmadd213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ac cb    \tvfnmadd213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ad cb    \tvfnmadd213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 bc cb    \tvfnmadd231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bc cb    \tvfnmadd231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 bc cb    \tvfnmadd231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bd cb    \tvfnmadd231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 9e cb    \tvfnmsub132ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9e cb    \tvfnmsub132ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 9e cb    \tvfnmsub132ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 9f cb    \tvfnmsub132sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 ae cb    \tvfnmsub213ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 ae cb    \tvfnmsub213ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 ae cb    \tvfnmsub213ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 af cb    \tvfnmsub213sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 be cb    \tvfnmsub231ph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 be cb    \tvfnmsub231ph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 be cb    \tvfnmsub231ph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 bf cb    \tvfnmsub231sh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 42 ca    \tvgetexpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 42 ca    \tvgetexpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 42 ca    \tvgetexpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 43 cb    \tvgetexpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5f cb    \tvmaxph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5f cb    \tvmaxph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5f cb    \tvmaxph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5f cb    \tvmaxsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5d cb    \tvminph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5d cb    \tvminph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5d cb    \tvminph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5d cb    \tvminsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 10 cb    \tvmovsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 7e c8    \tvmovw  %xmm1,%eax",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%rax,%rcx,8)",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw  %xmm1,0x12345678(%eax,%ecx,8)",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "",
+"62 f5 7d 08 6e c8    \tvmovw  %eax,%xmm1",},
+{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw  0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 59 cb    \tvmulph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 59 cb    \tvmulph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 59 cb    \tvmulph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 59 cb    \tvmulsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4c ca    \tvrcpph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4c ca    \tvrcpph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4c ca    \tvrcpph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4d cb    \tvrcpsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "",
+"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",},
+{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "",
+"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "",
+"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "",
+"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 48 4e ca    \tvrsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 08 4e ca    \tvrsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "",
+"62 f6 7d 28 4e ca    \tvrsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 4f cb    \tvrsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 48 2c cb    \tvscalefph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2c cb    \tvscalefph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 28 2c cb    \tvscalefph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "",
+"62 f6 6d 08 2d cb    \tvscalefsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 48 51 ca    \tvsqrtph %zmm2,%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 51 ca    \tvsqrtph %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "",
+"62 f5 7c 28 51 ca    \tvsqrtph %ymm2,%ymm1",},
+{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 51 cb    \tvsqrtsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 48 5c cb    \tvsubph %zmm3,%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 08 5c cb    \tvsubph %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6c 28 5c cb    \tvsubph %ymm3,%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",},
+{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "",
+"62 f5 6e 08 5c cb    \tvsubsh %xmm3,%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",},
+{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "",
+"62 f5 7c 08 2e ca    \tvucomish %xmm2,%xmm1",},
+{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "",
+"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
 {{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
 "f3 0f 3a f0 c0 00    \threset $0x0",},
 {{0x0f, 0x01, 0xe8, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 425db6a1b580a..a391464c8dee7 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1940,6 +1940,694 @@ int main(void)
 	asm volatile("testui");
 	asm volatile("uiret");
 
+	/* AVX512-FP16 */
+
+	asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+	asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcomish %xmm2, %xmm1");
+	asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtdq2ph %zmm2, %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtdq2ph %xmm2, %xmm1");
+	asm volatile("vcvtdq2ph %ymm2, %xmm1");
+	asm volatile("vcvtpd2ph %zmm2, %xmm1");
+	asm volatile("vcvtpd2ph %xmm2, %xmm1");
+	asm volatile("vcvtpd2ph %ymm2, %xmm1");
+	asm volatile("vcvtph2dq %ymm2, %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2dq %xmm2, %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2dq %xmm2, %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2pd %xmm2, %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2pd %xmm2, %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2pd %xmm2, %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %ymm2, %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2psx %ymm2, %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2psx %xmm2, %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2psx %xmm2, %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2qq %xmm2, %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2qq %xmm2, %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2qq %xmm2, %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2udq %ymm2, %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2udq %xmm2, %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2udq %xmm2, %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uqq %xmm2, %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uqq %xmm2, %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uqq %xmm2, %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uw %zmm2, %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uw %xmm2, %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uw %ymm2, %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2w %zmm2, %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2w %xmm2, %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2w %ymm2, %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2phx %zmm2, %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2phx %xmm2, %xmm1");
+	asm volatile("vcvtps2phx %ymm2, %xmm1");
+	asm volatile("vcvtqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2usi %xmm1, %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2usi %xmm1, %rax");
+	asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvttph2dq %ymm2, %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2dq %xmm2, %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2dq %xmm2, %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2qq %xmm2, %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2qq %xmm2, %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2qq %xmm2, %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2udq %ymm2, %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2udq %xmm2, %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2udq %xmm2, %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uqq %xmm2, %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uqq %xmm2, %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uqq %xmm2, %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uw %zmm2, %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uw %xmm2, %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uw %ymm2, %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2w %zmm2, %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2w %xmm2, %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2w %ymm2, %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttsh2si %xmm1, %eax");
+	asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2si %xmm1, %rax");
+	asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvttsh2usi %xmm1, %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2usi %xmm1, %rax");
+	asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax");
+	asm volatile("vcvtudq2ph %zmm2, %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtudq2ph %xmm2, %xmm1");
+	asm volatile("vcvtudq2ph %ymm2, %xmm1");
+	asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph %zmm2, %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtuw2ph %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtuw2ph %ymm2, %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtw2ph %zmm2, %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtw2ph %xmm2, %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtw2ph %ymm2, %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfpclassph $0x12, %zmm1, %k5");
+	asm volatile("vfpclassph $0x12, %xmm1, %k5");
+	asm volatile("vfpclassph $0x12, %ymm1, %k5");
+	asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+	asm volatile("vgetexpph %zmm2, %zmm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetexpph %xmm2, %xmm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetexpph %ymm2, %ymm1");
+	asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %zmm3, %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vminph %xmm3, %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %ymm3, %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmovw %xmm1, %eax");
+	asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)");
+	asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovw %eax, %xmm1");
+	asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrcpph %zmm2, %zmm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrcpph %xmm2, %xmm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrcpph %ymm2, %ymm1");
+	asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtph %zmm2, %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrsqrtph %xmm2, %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrsqrtph %ymm2, %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtph %zmm2, %zmm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vsqrtph %xmm2, %xmm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vsqrtph %ymm2, %ymm1");
+	asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vucomish %xmm2, %xmm1");
+	asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -3700,6 +4388,464 @@ int main(void)
 	asm volatile("notrack bnd jmp *(0x12345678)");		/* Expecting: jmp indirect 0 */
 	asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
 
+	/* AVX512-FP16 */
+
+	asm volatile("vaddph %zmm3, %zmm2, %zmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vaddph %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vaddph %ymm3, %ymm2, %ymm1");
+	asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vaddsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5");
+	asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5");
+	asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5");
+	asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5");
+	asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5");
+	asm volatile("vcomish %xmm2, %xmm1");
+	asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtdq2ph %zmm2, %ymm1");
+	asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtdq2ph %xmm2, %xmm1");
+	asm volatile("vcvtdq2ph %ymm2, %xmm1");
+	asm volatile("vcvtpd2ph %zmm2, %xmm1");
+	asm volatile("vcvtpd2ph %xmm2, %xmm1");
+	asm volatile("vcvtpd2ph %ymm2, %xmm1");
+	asm volatile("vcvtph2dq %ymm2, %zmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2dq %xmm2, %xmm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2dq %xmm2, %ymm1");
+	asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2pd %xmm2, %zmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2pd %xmm2, %xmm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2pd %xmm2, %ymm1");
+	asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %ymm2, %zmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2ps %xmm2, %xmm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2ps %xmm2, %ymm1");
+	asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2psx %ymm2, %zmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2psx %xmm2, %xmm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2psx %xmm2, %ymm1");
+	asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2qq %xmm2, %zmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2qq %xmm2, %xmm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2qq %xmm2, %ymm1");
+	asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2udq %ymm2, %zmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2udq %xmm2, %xmm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2udq %xmm2, %ymm1");
+	asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uqq %xmm2, %zmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uqq %xmm2, %xmm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uqq %xmm2, %ymm1");
+	asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2uw %zmm2, %zmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2uw %xmm2, %xmm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2uw %ymm2, %ymm1");
+	asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtph2w %zmm2, %zmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtph2w %xmm2, %xmm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtph2w %ymm2, %ymm1");
+	asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1");
+	asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1");
+	asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vcvtps2phx %zmm2, %ymm1");
+	asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtps2phx %xmm2, %xmm1");
+	asm volatile("vcvtps2phx %ymm2, %xmm1");
+	asm volatile("vcvtqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsh2usi %xmm1, %eax");
+	asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvttph2dq %ymm2, %zmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2dq %xmm2, %xmm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2dq %xmm2, %ymm1");
+	asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2qq %xmm2, %zmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2qq %xmm2, %xmm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2qq %xmm2, %ymm1");
+	asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2udq %ymm2, %zmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2udq %xmm2, %xmm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2udq %xmm2, %ymm1");
+	asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uqq %xmm2, %zmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uqq %xmm2, %xmm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uqq %xmm2, %ymm1");
+	asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2uw %zmm2, %zmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2uw %xmm2, %xmm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2uw %ymm2, %ymm1");
+	asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttph2w %zmm2, %zmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvttph2w %xmm2, %xmm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvttph2w %ymm2, %ymm1");
+	asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvttsh2si %xmm1, %eax");
+	asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvttsh2usi %xmm1, %eax");
+	asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax");
+	asm volatile("vcvtudq2ph %zmm2, %ymm1");
+	asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtudq2ph %xmm2, %xmm1");
+	asm volatile("vcvtudq2ph %ymm2, %xmm1");
+	asm volatile("vcvtuqq2ph %zmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %xmm2, %xmm1");
+	asm volatile("vcvtuqq2ph %ymm2, %xmm1");
+	asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph %zmm2, %zmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtuw2ph %xmm2, %xmm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtuw2ph %ymm2, %ymm1");
+	asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vcvtw2ph %zmm2, %zmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vcvtw2ph %xmm2, %xmm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vcvtw2ph %ymm2, %ymm1");
+	asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vdivph %zmm3, %zmm2, %zmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vdivph %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vdivph %ymm3, %ymm2, %ymm1");
+	asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vdivsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfmulcph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfmulcph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1");
+	asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1");
+	asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vfpclassph $0x12, %zmm1, %k5");
+	asm volatile("vfpclassph $0x12, %xmm1, %k5");
+	asm volatile("vfpclassph $0x12, %ymm1, %k5");
+	asm volatile("vfpclasssh $0x12, %xmm1, %k5");
+	asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5");
+	asm volatile("vgetexpph %zmm2, %zmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetexpph %xmm2, %xmm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetexpph %ymm2, %ymm1");
+	asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, %zmm2, %zmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vgetmantph $0x12, %xmm2, %xmm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vgetmantph $0x12, %ymm2, %ymm1");
+	asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmaxph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmaxph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmaxsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %zmm3, %zmm2, %zmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vminph %xmm3, %xmm2, %xmm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vminph %ymm3, %ymm2, %ymm1");
+	asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vminsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmovsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmovw %xmm1, %eax");
+	asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)");
+	asm volatile("vmovw %eax, %xmm1");
+	asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vmulph %zmm3, %zmm2, %zmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vmulph %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vmulph %ymm3, %ymm2, %ymm1");
+	asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vmulsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrcpph %zmm2, %zmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrcpph %xmm2, %xmm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrcpph %ymm2, %ymm1");
+	asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrcpsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, %zmm2, %zmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vreduceph $0x12, %xmm2, %xmm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vreduceph $0x12, %ymm2, %ymm1");
+	asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, %zmm2, %zmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrndscaleph $0x12, %xmm2, %xmm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrndscaleph $0x12, %ymm2, %ymm1");
+	asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1");
+	asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vrsqrtph %zmm2, %zmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vrsqrtph %xmm2, %xmm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vrsqrtph %ymm2, %ymm1");
+	asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %zmm3, %zmm2, %zmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vscalefph %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vscalefph %ymm3, %ymm2, %ymm1");
+	asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vscalefsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsqrtph %zmm2, %zmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1");
+	asm volatile("vsqrtph %xmm2, %xmm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1");
+	asm volatile("vsqrtph %ymm2, %ymm1");
+	asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1");
+	asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %zmm3, %zmm2, %zmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1");
+	asm volatile("vsubph %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vsubph %ymm3, %ymm2, %ymm1");
+	asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1");
+	asm volatile("vsubsh %xmm3, %xmm2, %xmm1");
+	asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1");
+	asm volatile("vucomish %xmm2, %xmm1");
+	asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+
 #endif /* #ifndef __x86_64__ */
 
 	/* Prediction history reset */
-- 
GitLab


From 16273fa4f3a2dc2c64dd8a28fe30f255a4de0e4c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 2 Dec 2021 11:50:29 +0200
Subject: [PATCH 0008/1586] x86/insn: Add AVX512-FP16 instructions to the x86
 instruction decoder

The x86 instruction decoder is used for both kernel instructions and
user space instructions (e.g. uprobes, perf tools Intel PT), so it is
good to update it with new instructions.

Add AVX512-FP16 instructions to x86 instruction decoder.

Note the EVEX map field is extended by 1 bit, and most instructions are in
map 5 and map 6.

Reference:
Intel AVX512-FP16 Architecture Specification
June 2021
Revision 1.0
Document Number: 347407-001US

Example using perf tools' x86 instruction decoder test:

  $ perf test -v "x86 instruction decoder" |& grep vfcmaddcph | head -2
  Decoded ok: 62 f6 6f 48 56 cb           vfcmaddcph %zmm3,%zmm2,%zmm1
  Decoded ok: 62 f6 6f 48 56 8c c8 78 56 34 12    vfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20211202095029.2165714-7-adrian.hunter@intel.com
---
 arch/x86/include/asm/insn.h           |  2 +-
 arch/x86/lib/x86-opcode-map.txt       | 95 ++++++++++++++++++++++++---
 tools/arch/x86/include/asm/insn.h     |  2 +-
 tools/arch/x86/lib/x86-opcode-map.txt | 95 ++++++++++++++++++++++++---
 4 files changed, 176 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 05a6ab940f452..1b29f58f730fd 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
-#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_EVEX_M(vex)	((vex) & 0x07)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 591797a931bfc..d12d1358f96d2 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -828,9 +828,9 @@ AVXcode: 3
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -852,8 +852,8 @@ AVXcode: 3
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
 30: kshiftrb/w Vk,Uk,Ib (66),(v)
 31: kshiftrd/q Vk,Uk,Ib (66),(v)
 32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -877,18 +877,19 @@ AVXcode: 3
 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
 70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
@@ -896,6 +897,84 @@ df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
 EndTable
 
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
+EndTable
+
 GrpTable: Grp1
 0: ADD
 1: OR
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index dc632b41f1356..65c0d9ce1e295 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -124,7 +124,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
-#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_EVEX_M(vex)	((vex) & 0x07)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 591797a931bfc..d12d1358f96d2 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -828,9 +828,9 @@ AVXcode: 3
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo)
 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
-0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo)
 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
@@ -852,8 +852,8 @@ AVXcode: 3
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
-26: vgetmantps/d Vx,Wx,Ib (66),(ev)
-27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev)
 30: kshiftrb/w Vk,Uk,Ib (66),(v)
 31: kshiftrd/q Vk,Uk,Ib (66),(v)
 32: kshiftlb/w Vk,Uk,Ib (66),(v)
@@ -877,18 +877,19 @@ AVXcode: 3
 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
-56: vreduceps/d Vx,Wx,Ib (66),(ev)
-57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
-66: vfpclassps/d Vk,Wx,Ib (66),(ev)
-67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev)
 70: vpshldw Vx,Hx,Wx,Ib (66),(ev)
 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev)
 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev)
 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev)
+c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 ce: vgf2p8affineqb Vx,Wx,Ib (66)
 cf: vgf2p8affineinvqb Vx,Wx,Ib (66)
@@ -896,6 +897,84 @@ df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B)
 EndTable
 
+Table: EVEX map 5
+Referrer:
+AVXcode: 5
+10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev)
+11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev)
+1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev)
+2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev)
+2c: vcvttsh2si Vx,Wx (F3),(ev)
+2d: vcvtsh2si Vx,Wx (F3),(ev)
+2e: vucomish Vx,Wx (ev)
+2f: vcomish Vx,Wx (ev)
+51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev)
+58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev)
+59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev)
+5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev)
+5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev)
+5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev)
+5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev)
+5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev)
+5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev)
+6e: vmovw Vx,Wx (66),(ev)
+78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev)
+79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev)
+7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev)
+7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev)
+7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev)
+7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev)
+7e: vmovw Wx,Vx (66),(ev)
+EndTable
+
+Table: EVEX map 6
+Referrer:
+AVXcode: 6
+13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev)
+2c: vscalefph Vx,Hx,Wx (66),(ev)
+2d: vscalefsh Vx,Hx,Wx (66),(ev)
+42: vgetexpph Vx,Wx (66),(ev)
+43: vgetexpsh Vx,Hx,Wx (66),(ev)
+4c: vrcpph Vx,Wx (66),(ev)
+4d: vrcpsh Vx,Hx,Wx (66),(ev)
+4e: vrsqrtph Vx,Wx (66),(ev)
+4f: vrsqrtsh Vx,Hx,Wx (66),(ev)
+56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev)
+57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev)
+96: vfmaddsub132ph Vx,Hx,Wx (66),(ev)
+97: vfmsubadd132ph Vx,Hx,Wx (66),(ev)
+98: vfmadd132ph Vx,Hx,Wx (66),(ev)
+99: vfmadd132sh Vx,Hx,Wx (66),(ev)
+9a: vfmsub132ph Vx,Hx,Wx (66),(ev)
+9b: vfmsub132sh Vx,Hx,Wx (66),(ev)
+9c: vfnmadd132ph Vx,Hx,Wx (66),(ev)
+9d: vfnmadd132sh Vx,Hx,Wx (66),(ev)
+9e: vfnmsub132ph Vx,Hx,Wx (66),(ev)
+9f: vfnmsub132sh Vx,Hx,Wx (66),(ev)
+a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev)
+a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev)
+a8: vfmadd213ph Vx,Hx,Wx (66),(ev)
+a9: vfmadd213sh Vx,Hx,Wx (66),(ev)
+aa: vfmsub213ph Vx,Hx,Wx (66),(ev)
+ab: vfmsub213sh Vx,Hx,Wx (66),(ev)
+ac: vfnmadd213ph Vx,Hx,Wx (66),(ev)
+ad: vfnmadd213sh Vx,Hx,Wx (66),(ev)
+ae: vfnmsub213ph Vx,Hx,Wx (66),(ev)
+af: vfnmsub213sh Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev)
+b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev)
+b8: vfmadd231ph Vx,Hx,Wx (66),(ev)
+b9: vfmadd231sh Vx,Hx,Wx (66),(ev)
+ba: vfmsub231ph Vx,Hx,Wx (66),(ev)
+bb: vfmsub231sh Vx,Hx,Wx (66),(ev)
+bc: vfnmadd231ph Vx,Hx,Wx (66),(ev)
+bd: vfnmadd231sh Vx,Hx,Wx (66),(ev)
+be: vfnmsub231ph Vx,Hx,Wx (66),(ev)
+bf: vfnmsub231sh Vx,Hx,Wx (66),(ev)
+d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev)
+d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev)
+EndTable
+
 GrpTable: Grp1
 0: ADD
 1: OR
-- 
GitLab


From 4eda2bc3431ebe5c8361580877a4666d10ea28a7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 Sep 2021 16:43:21 +0200
Subject: [PATCH 0009/1586] x86/Kconfig: Select ARCH_SELECT_MEMORY_MODEL only
 if FLATMEM and SPARSEMEM are possible

x86-64 supports only CONFIG_SPARSEMEM; there is nothing users can select.
So enable the memory model selection (via CONFIG_ARCH_SELECT_MEMORY_MODEL)
only if both, SPARSEMEM and FLATMEM are possible, which isn't the case
on x86-64.

Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20210929144321.50411-1-david@redhat.com
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ebe8fc76949af..a7e0bf47f2dbb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1637,7 +1637,7 @@ config ARCH_SPARSEMEM_DEFAULT
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
-	depends on ARCH_SPARSEMEM_ENABLE
+	depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
 	bool "Enable sysfs memory/probe interface"
-- 
GitLab


From 1056c41634d400cf88d30580dc53270030cb0ace Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Thu, 13 Jan 2022 15:42:59 +0100
Subject: [PATCH 0010/1586] regmap-irq: Fix typo in comment

This is not a resource manager, it is a "Resource managed version of
regmap_add_irq_chip()". Fix the comment.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Link: https://lore.kernel.org/r/20220113144259.355845-1-luca@lucaceresoli.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index d2656581a6085..d5604f4972967 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -1053,7 +1053,7 @@ int devm_regmap_add_irq_chip_fwnode(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip_fwnode);
 
 /**
- * devm_regmap_add_irq_chip() - Resource manager regmap_add_irq_chip()
+ * devm_regmap_add_irq_chip() - Resource managed regmap_add_irq_chip()
  *
  * @dev: The device pointer on which irq_chip belongs to.
  * @map: The regmap for the device.
-- 
GitLab


From 6390d42c21efff0b4c10956a38e341f4e84ecd3d Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Sat, 15 Jan 2022 12:11:38 +0100
Subject: [PATCH 0011/1586] regulator: qcom_smd: fix for_each_child.cocci
 warnings

drivers/regulator/qcom_smd-regulator.c:1318:1-33: WARNING: Function "for_each_available_child_of_node" should have of_node_put() before return around line 1321.

Semantic patch information:
 False positives can be due to function calls within the for_each
 loop that may encapsulate an of_node_put.

Generated by: scripts/coccinelle/iterators/for_each_child.cocci

Fixes: 14e2976fbabd ("regulator: qcom_smd: Align probe function with rpmh-regulator")
CC: Konrad Dybcio <konrad.dybcio@somainline.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Julia Lawall <julia.lawall@inria.fr>
Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2201151210170.3051@hadrien
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom_smd-regulator.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 9fc666107a06c..8490aa8eecb1a 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -1317,8 +1317,10 @@ static int rpm_reg_probe(struct platform_device *pdev)
 
 	for_each_available_child_of_node(dev->of_node, node) {
 		vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
-		if (!vreg)
+		if (!vreg) {
+			of_node_put(node);
 			return -ENOMEM;
+		}
 
 		ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data);
 
-- 
GitLab


From 8e9977e48c7c2a49e09859456dcba12a8d804a51 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Wed, 19 Jan 2022 11:37:47 +0100
Subject: [PATCH 0012/1586] regulator: pfuze100: Add missing regulator names

The valid regulator names for the subnodes does not match the ones
mentioned in the description.
PFUZE3000 & PFUZE3001 uses 'v33', 'vccsd' and 'vldo[1-4]' as well, so
add them to the allowed node names.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20220119103747.58305-1-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/pfuze100.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.yaml b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
index f578e72778a7c..a26bbd68b7296 100644
--- a/Documentation/devicetree/bindings/regulator/pfuze100.yaml
+++ b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
@@ -70,7 +70,11 @@ properties:
         $ref: "regulator.yaml#"
         type: object
 
-      "^(vsnvs|vref|vrefddr|swbst|coin)$":
+      "^vldo[1-4]$":
+        $ref: "regulator.yaml#"
+        type: object
+
+      "^(vsnvs|vref|vrefddr|swbst|coin|v33|vccsd)$":
         $ref: "regulator.yaml#"
         type: object
 
-- 
GitLab


From 2da187304e556ac59cf2dacb323cc78ded988169 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Mon, 17 Jan 2022 11:07:47 +0000
Subject: [PATCH 0013/1586] spi: add bindings for microchip mpfs spi

Add device tree bindings for the {q,}spi controller on
the Microchip PolarFire SoC.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20220117110755.3433142-7-conor.dooley@microchip.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/microchip,mpfs-spi.yaml      | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml

diff --git a/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
new file mode 100644
index 0000000000000..ece261b8e963f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/microchip,mpfs-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip MPFS {Q,}SPI Controller Device Tree Bindings
+
+maintainers:
+  - Conor Dooley <conor.dooley@microchip.com>
+
+allOf:
+  - $ref: spi-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - microchip,mpfs-spi
+      - microchip,mpfs-qspi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clock-names:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include "dt-bindings/clock/microchip,mpfs-clock.h"
+    spi@20108000 {
+        compatible = "microchip,mpfs-spi";
+        reg = <0x20108000 0x1000>;
+        clocks = <&clkcfg CLK_SPI0>;
+        interrupt-parent = <&plic>;
+        interrupts = <54>;
+        spi-max-frequency = <25000000>;
+    };
+...
-- 
GitLab


From 5b177234e9fde7d4208e8163debc109b86e3f68d Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Tue, 18 Jan 2022 22:28:18 +0800
Subject: [PATCH 0014/1586] spi: spi-mtk-nor: improve device table for adding
 more capabilities

Define a structure for adding more capabilities.
Add a item extra_dummy_bit for new SoCs, due to design changed.

Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Signed-off-by: Zhen Zhang <zhen.zhang@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220118142820.2729-3-guochun.mao@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mtk-nor.c | 48 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 5c93730615f8d..f5ff01f61f42a 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -95,6 +95,17 @@
 
 #define CLK_TO_US(sp, clkcnt)		DIV_ROUND_UP(clkcnt, sp->spi_freq / 1000000)
 
+struct mtk_nor_caps {
+	u8 dma_bits;
+
+	/* extra_dummy_bit is adding for the IP of new SoCs.
+	 * Some new SoCs modify the timing of fetching registers' values
+	 * and IDs of nor flash, they need a extra_dummy_bit which can add
+	 * more clock cycles for fetching data.
+	 */
+	u8 extra_dummy_bit;
+};
+
 struct mtk_nor {
 	struct spi_controller *ctlr;
 	struct device *dev;
@@ -109,6 +120,7 @@ struct mtk_nor {
 	bool has_irq;
 	bool high_dma;
 	struct completion op_done;
+	const struct mtk_nor_caps *caps;
 };
 
 static inline void mtk_nor_rmw(struct mtk_nor *sp, u32 reg, u32 set, u32 clr)
@@ -554,7 +566,12 @@ static int mtk_nor_spi_mem_prg(struct mtk_nor *sp, const struct spi_mem_op *op)
 	}
 
 	// trigger op
-	writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+	if (rx_len)
+		writel(prg_len * BITS_PER_BYTE + sp->caps->extra_dummy_bit,
+		       sp->base + MTK_NOR_REG_PRG_CNT);
+	else
+		writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+
 	ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM,
 			       prg_len * BITS_PER_BYTE);
 	if (ret)
@@ -743,9 +760,19 @@ static const struct spi_controller_mem_ops mtk_nor_mem_ops = {
 	.exec_op = mtk_nor_exec_op
 };
 
+const struct mtk_nor_caps mtk_nor_caps_mt8173 = {
+	.dma_bits = 32,
+	.extra_dummy_bit = 0,
+};
+
+const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
+	.dma_bits = 36,
+	.extra_dummy_bit = 0,
+};
+
 static const struct of_device_id mtk_nor_match[] = {
-	{ .compatible = "mediatek,mt8192-nor", .data = (void *)36 },
-	{ .compatible = "mediatek,mt8173-nor", .data = (void *)32 },
+	{ .compatible = "mediatek,mt8173-nor", .data = &mtk_nor_caps_mt8173 },
+	{ .compatible = "mediatek,mt8192-nor", .data = &mtk_nor_caps_mt8192 },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mtk_nor_match);
@@ -754,10 +781,10 @@ static int mtk_nor_probe(struct platform_device *pdev)
 {
 	struct spi_controller *ctlr;
 	struct mtk_nor *sp;
+	struct mtk_nor_caps *caps;
 	void __iomem *base;
 	struct clk *spi_clk, *ctlr_clk, *axi_clk;
 	int ret, irq;
-	unsigned long dma_bits;
 
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
@@ -775,10 +802,12 @@ static int mtk_nor_probe(struct platform_device *pdev)
 	if (IS_ERR(axi_clk))
 		return PTR_ERR(axi_clk);
 
-	dma_bits = (unsigned long)of_device_get_match_data(&pdev->dev);
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_bits))) {
-		dev_err(&pdev->dev, "failed to set dma mask(%lu)\n", dma_bits);
-		return -EINVAL;
+	caps = (struct mtk_nor_caps *)of_device_get_match_data(&pdev->dev);
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(caps->dma_bits));
+	if (ret) {
+		dev_err(&pdev->dev, "failed to set dma mask(%u)\n", caps->dma_bits);
+		return ret;
 	}
 
 	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp));
@@ -808,7 +837,8 @@ static int mtk_nor_probe(struct platform_device *pdev)
 	sp->spi_clk = spi_clk;
 	sp->ctlr_clk = ctlr_clk;
 	sp->axi_clk = axi_clk;
-	sp->high_dma = (dma_bits > 32);
+	sp->caps = caps;
+	sp->high_dma = caps->dma_bits > 32;
 	sp->buffer = dmam_alloc_coherent(&pdev->dev,
 				MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN,
 				&sp->buffer_dma, GFP_KERNEL);
-- 
GitLab


From 4e8bfe5cdf77621cb4e7b196448ceeff20d9d6a6 Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Tue, 18 Jan 2022 22:28:19 +0800
Subject: [PATCH 0015/1586] spi: spi-mtk-nor: add new soc mt8186 support

Add compatible mediatek,mt8186-nor implementation.

Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Signed-off-by: Zhen Zhang <zhen.zhang@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220118142820.2729-4-guochun.mao@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mtk-nor.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index f5ff01f61f42a..e44fdf7c9e4ba 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -765,6 +765,11 @@ const struct mtk_nor_caps mtk_nor_caps_mt8173 = {
 	.extra_dummy_bit = 0,
 };
 
+const struct mtk_nor_caps mtk_nor_caps_mt8186 = {
+	.dma_bits = 32,
+	.extra_dummy_bit = 1,
+};
+
 const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
 	.dma_bits = 36,
 	.extra_dummy_bit = 0,
@@ -772,6 +777,7 @@ const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
 
 static const struct of_device_id mtk_nor_match[] = {
 	{ .compatible = "mediatek,mt8173-nor", .data = &mtk_nor_caps_mt8173 },
+	{ .compatible = "mediatek,mt8186-nor", .data = &mtk_nor_caps_mt8186 },
 	{ .compatible = "mediatek,mt8192-nor", .data = &mtk_nor_caps_mt8192 },
 	{ /* sentinel */ }
 };
-- 
GitLab


From 58b0a653b8dac40bbeb01a2c8a230aa8f84a7530 Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Tue, 18 Jan 2022 22:28:20 +0800
Subject: [PATCH 0016/1586] spi: spi-mtk-nor: add axi_s clock for mt8186

MT8186 needs axi_s clock for DMA feature.

Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Signed-off-by: Zhen Zhang <zhen.zhang@mediatek.com>
Acked-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220118142820.2729-5-guochun.mao@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mtk-nor.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index e44fdf7c9e4ba..455b4dcb26e9a 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -115,6 +115,7 @@ struct mtk_nor {
 	struct clk *spi_clk;
 	struct clk *ctlr_clk;
 	struct clk *axi_clk;
+	struct clk *axi_s_clk;
 	unsigned int spi_freq;
 	bool wbuf_en;
 	bool has_irq;
@@ -691,6 +692,7 @@ static void mtk_nor_disable_clk(struct mtk_nor *sp)
 	clk_disable_unprepare(sp->spi_clk);
 	clk_disable_unprepare(sp->ctlr_clk);
 	clk_disable_unprepare(sp->axi_clk);
+	clk_disable_unprepare(sp->axi_s_clk);
 }
 
 static int mtk_nor_enable_clk(struct mtk_nor *sp)
@@ -714,6 +716,14 @@ static int mtk_nor_enable_clk(struct mtk_nor *sp)
 		return ret;
 	}
 
+	ret = clk_prepare_enable(sp->axi_s_clk);
+	if (ret) {
+		clk_disable_unprepare(sp->spi_clk);
+		clk_disable_unprepare(sp->ctlr_clk);
+		clk_disable_unprepare(sp->axi_clk);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -789,7 +799,7 @@ static int mtk_nor_probe(struct platform_device *pdev)
 	struct mtk_nor *sp;
 	struct mtk_nor_caps *caps;
 	void __iomem *base;
-	struct clk *spi_clk, *ctlr_clk, *axi_clk;
+	struct clk *spi_clk, *ctlr_clk, *axi_clk, *axi_s_clk;
 	int ret, irq;
 
 	base = devm_platform_ioremap_resource(pdev, 0);
@@ -808,6 +818,10 @@ static int mtk_nor_probe(struct platform_device *pdev)
 	if (IS_ERR(axi_clk))
 		return PTR_ERR(axi_clk);
 
+	axi_s_clk = devm_clk_get_optional(&pdev->dev, "axi_s");
+	if (IS_ERR(axi_s_clk))
+		return PTR_ERR(axi_s_clk);
+
 	caps = (struct mtk_nor_caps *)of_device_get_match_data(&pdev->dev);
 
 	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(caps->dma_bits));
@@ -843,6 +857,7 @@ static int mtk_nor_probe(struct platform_device *pdev)
 	sp->spi_clk = spi_clk;
 	sp->ctlr_clk = ctlr_clk;
 	sp->axi_clk = axi_clk;
+	sp->axi_s_clk = axi_s_clk;
 	sp->caps = caps;
 	sp->high_dma = caps->dma_bits > 32;
 	sp->buffer = dmam_alloc_coherent(&pdev->dev,
-- 
GitLab


From ceab11a3c0d620d9ec2c032fd8014615cf7934ec Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Tue, 18 Jan 2022 22:28:17 +0800
Subject: [PATCH 0017/1586] spi: add mt8186-nor compatible string

Add MT8186 spi-nor controller support.
MT8186 needs a new clock name, axi_s, for spi nor axi slave bus clock.

Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Signed-off-by: Zhen Zhang <zhen.zhang@mediatek.com>
Link: https://lore.kernel.org/r/20220118142820.2729-2-guochun.mao@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml         | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
index 4e4694e3d5390..be3cc7faed534 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mtk-nor.yaml
@@ -30,6 +30,7 @@ properties:
               - mediatek,mt7622-nor
               - mediatek,mt7623-nor
               - mediatek,mt7629-nor
+              - mediatek,mt8186-nor
               - mediatek,mt8192-nor
               - mediatek,mt8195-nor
           - enum:
@@ -49,6 +50,8 @@ properties:
       - description: clock used for controller
       - description: clock used for nor dma bus. this depends on hardware
                      design, so this is optional.
+      - description: clock used for controller axi slave bus.
+                     this depends on hardware design, so it is optional.
 
   clock-names:
     minItems: 2
@@ -56,6 +59,7 @@ properties:
       - const: spi
       - const: sf
       - const: axi
+      - const: axi_s
 
 required:
   - compatible
-- 
GitLab


From f1ba938e4f98941dc2b77795062e49444ec1fee1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Jan 2022 00:09:13 +0100
Subject: [PATCH 0018/1586] spi: s3c64xx: Delete unused boardfile helpers

The helpers to use SPI host 1 and 2 are unused in the kernel
and taking up space and maintenance hours. New systems should
use device tree and not this, so delete the code.

Cc: linux-samsung-soc@vger.kernel.org
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220118230915.157797-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-s3c/Kconfig                 | 12 ----
 arch/arm/mach-s3c/devs.c                  | 72 -----------------------
 arch/arm/mach-s3c/setup-spi-s3c64xx.c     |  9 ---
 arch/arm/mach-s3c/spi-core-s3c24xx.h      |  6 --
 include/linux/platform_data/spi-s3c64xx.h |  8 ---
 5 files changed, 107 deletions(-)

diff --git a/arch/arm/mach-s3c/Kconfig b/arch/arm/mach-s3c/Kconfig
index 25606e668cf93..1899fc3f44fd7 100644
--- a/arch/arm/mach-s3c/Kconfig
+++ b/arch/arm/mach-s3c/Kconfig
@@ -191,18 +191,6 @@ config S3C64XX_DEV_SPI0
 	  Compile in platform device definitions for S3C64XX's type
 	  SPI controller 0
 
-config S3C64XX_DEV_SPI1
-	bool
-	help
-	  Compile in platform device definitions for S3C64XX's type
-	  SPI controller 1
-
-config S3C64XX_DEV_SPI2
-	bool
-	help
-	  Compile in platform device definitions for S3C64XX's type
-	  SPI controller 2
-
 config SAMSUNG_DEV_TS
 	bool
 	help
diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c
index 06dec64848f9a..9f086aee862bc 100644
--- a/arch/arm/mach-s3c/devs.c
+++ b/arch/arm/mach-s3c/devs.c
@@ -1125,75 +1125,3 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0);
 }
 #endif /* CONFIG_S3C64XX_DEV_SPI0 */
-
-#ifdef CONFIG_S3C64XX_DEV_SPI1
-static struct resource s3c64xx_spi1_resource[] = {
-	[0] = DEFINE_RES_MEM(S3C_PA_SPI1, SZ_256),
-	[1] = DEFINE_RES_IRQ(IRQ_SPI1),
-};
-
-struct platform_device s3c64xx_device_spi1 = {
-	.name		= "s3c6410-spi",
-	.id		= 1,
-	.num_resources	= ARRAY_SIZE(s3c64xx_spi1_resource),
-	.resource	= s3c64xx_spi1_resource,
-	.dev = {
-		.dma_mask		= &samsung_device_dma_mask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
-	},
-};
-
-void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs)
-{
-	struct s3c64xx_spi_info pd;
-
-	/* Reject invalid configuration */
-	if (!num_cs || src_clk_nr < 0) {
-		pr_err("%s: Invalid SPI configuration\n", __func__);
-		return;
-	}
-
-	pd.num_cs = num_cs;
-	pd.src_clk_nr = src_clk_nr;
-	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio;
-
-	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1);
-}
-#endif /* CONFIG_S3C64XX_DEV_SPI1 */
-
-#ifdef CONFIG_S3C64XX_DEV_SPI2
-static struct resource s3c64xx_spi2_resource[] = {
-	[0] = DEFINE_RES_MEM(S3C_PA_SPI2, SZ_256),
-	[1] = DEFINE_RES_IRQ(IRQ_SPI2),
-};
-
-struct platform_device s3c64xx_device_spi2 = {
-	.name		= "s3c6410-spi",
-	.id		= 2,
-	.num_resources	= ARRAY_SIZE(s3c64xx_spi2_resource),
-	.resource	= s3c64xx_spi2_resource,
-	.dev = {
-		.dma_mask		= &samsung_device_dma_mask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
-	},
-};
-
-void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs)
-{
-	struct s3c64xx_spi_info pd;
-
-	/* Reject invalid configuration */
-	if (!num_cs || src_clk_nr < 0) {
-		pr_err("%s: Invalid SPI configuration\n", __func__);
-		return;
-	}
-
-	pd.num_cs = num_cs;
-	pd.src_clk_nr = src_clk_nr;
-	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio;
-
-	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi2);
-}
-#endif /* CONFIG_S3C64XX_DEV_SPI2 */
diff --git a/arch/arm/mach-s3c/setup-spi-s3c64xx.c b/arch/arm/mach-s3c/setup-spi-s3c64xx.c
index efcf78d415853..497aff71c29cc 100644
--- a/arch/arm/mach-s3c/setup-spi-s3c64xx.c
+++ b/arch/arm/mach-s3c/setup-spi-s3c64xx.c
@@ -16,12 +16,3 @@ int s3c64xx_spi0_cfg_gpio(void)
 	return 0;
 }
 #endif
-
-#ifdef CONFIG_S3C64XX_DEV_SPI1
-int s3c64xx_spi1_cfg_gpio(void)
-{
-	s3c_gpio_cfgall_range(S3C64XX_GPC(4), 3,
-				S3C_GPIO_SFN(2), S3C_GPIO_PULL_UP);
-	return 0;
-}
-#endif
diff --git a/arch/arm/mach-s3c/spi-core-s3c24xx.h b/arch/arm/mach-s3c/spi-core-s3c24xx.h
index 057667469cc3d..919c5fd0c9afa 100644
--- a/arch/arm/mach-s3c/spi-core-s3c24xx.h
+++ b/arch/arm/mach-s3c/spi-core-s3c24xx.h
@@ -16,12 +16,6 @@ static inline void s3c24xx_spi_setname(char *name)
 #ifdef CONFIG_S3C64XX_DEV_SPI0
 	s3c64xx_device_spi0.name = name;
 #endif
-#ifdef CONFIG_S3C64XX_DEV_SPI1
-	s3c64xx_device_spi1.name = name;
-#endif
-#ifdef CONFIG_S3C64XX_DEV_SPI2
-	s3c64xx_device_spi2.name = name;
-#endif
 }
 
 #endif /* __PLAT_S3C_SPI_CORE_S3C24XX_H */
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 773daf7915a37..19d690f34670f 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -52,17 +52,9 @@ struct s3c64xx_spi_info {
  */
 extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 						int num_cs);
-extern void s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs);
-extern void s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs);
 
 /* defined by architecture to configure gpio */
 extern int s3c64xx_spi0_cfg_gpio(void);
-extern int s3c64xx_spi1_cfg_gpio(void);
-extern int s3c64xx_spi2_cfg_gpio(void);
 
 extern struct s3c64xx_spi_info s3c64xx_spi0_pdata;
-extern struct s3c64xx_spi_info s3c64xx_spi1_pdata;
-extern struct s3c64xx_spi_info s3c64xx_spi2_pdata;
 #endif /*__SPI_S3C64XX_H */
-- 
GitLab


From 3b5529ae7f3578da633e8ae2ec0715a55a248f9f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Jan 2022 00:09:14 +0100
Subject: [PATCH 0019/1586] spi: s3c64xx: Drop custom gpio setup argument

The SPI0 platform population function was taking a custom
gpio setup callback but the only user pass NULL as
argument so drop this argument.

Cc: linux-samsung-soc@vger.kernel.org
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Link: https://lore.kernel.org/r/20220118230915.157797-2-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-s3c/devs.c                  | 5 ++---
 arch/arm/mach-s3c/mach-crag6410.c         | 2 +-
 include/linux/platform_data/spi-s3c64xx.h | 4 +---
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c
index 9f086aee862bc..1e266fc24f9b7 100644
--- a/arch/arm/mach-s3c/devs.c
+++ b/arch/arm/mach-s3c/devs.c
@@ -1107,8 +1107,7 @@ struct platform_device s3c64xx_device_spi0 = {
 	},
 };
 
-void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs)
+void __init s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs)
 {
 	struct s3c64xx_spi_info pd;
 
@@ -1120,7 +1119,7 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
 
 	pd.num_cs = num_cs;
 	pd.src_clk_nr = src_clk_nr;
-	pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio;
+	pd.cfg_gpio = s3c64xx_spi0_cfg_gpio;
 
 	s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0);
 }
diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c
index 4a12c75d407fc..41f0aba2d2fd0 100644
--- a/arch/arm/mach-s3c/mach-crag6410.c
+++ b/arch/arm/mach-s3c/mach-crag6410.c
@@ -856,7 +856,7 @@ static void __init crag6410_machine_init(void)
 	i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
 
 	samsung_keypad_set_platdata(&crag6410_keypad_data);
-	s3c64xx_spi0_set_platdata(NULL, 0, 2);
+	s3c64xx_spi0_set_platdata(0, 2);
 
 	pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup));
 	platform_add_devices(crag6410_devices, ARRAY_SIZE(crag6410_devices));
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 19d690f34670f..10890a4b55b9a 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -43,15 +43,13 @@ struct s3c64xx_spi_info {
 /**
  * s3c64xx_spi_set_platdata - SPI Controller configure callback by the board
  *				initialization code.
- * @cfg_gpio: Pointer to gpio setup function.
  * @src_clk_nr: Clock the SPI controller is to use to generate SPI clocks.
  * @num_cs: Number of elements in the 'cs' array.
  *
  * Call this from machine init code for each SPI Controller that
  * has some chips attached to it.
  */
-extern void s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr,
-						int num_cs);
+extern void s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs);
 
 /* defined by architecture to configure gpio */
 extern int s3c64xx_spi0_cfg_gpio(void);
-- 
GitLab


From a45cf3cc72dd9cfde9db8af32cdf9c431f53f9bc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Jan 2022 00:09:15 +0100
Subject: [PATCH 0020/1586] spi: s3c64xx: Convert to use GPIO descriptors

Convert the S3C64xx SPI host to use GPIO descriptors.

Provide GPIO descriptor tables for the one user with CS
0 and 1.

Cc: linux-samsung-soc@vger.kernel.org
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220118230915.157797-3-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-s3c/mach-crag6410-module.c  | 13 ------
 arch/arm/mach-s3c/mach-crag6410.c         | 11 +++++
 drivers/spi/spi-s3c64xx.c                 | 53 ++++++-----------------
 include/linux/platform_data/spi-s3c64xx.h |  2 -
 4 files changed, 24 insertions(+), 55 deletions(-)

diff --git a/arch/arm/mach-s3c/mach-crag6410-module.c b/arch/arm/mach-s3c/mach-crag6410-module.c
index 407ad493493e3..5d1d4b67a4b77 100644
--- a/arch/arm/mach-s3c/mach-crag6410-module.c
+++ b/arch/arm/mach-s3c/mach-crag6410-module.c
@@ -32,10 +32,6 @@
 
 #include "crag6410.h"
 
-static struct s3c64xx_spi_csinfo wm0010_spi_csinfo = {
-	.line = S3C64XX_GPC(3),
-};
-
 static struct wm0010_pdata wm0010_pdata = {
 	.gpio_reset = S3C64XX_GPN(6),
 	.reset_active_high = 1, /* Active high for Glenfarclas Rev 2 */
@@ -49,7 +45,6 @@ static struct spi_board_info wm1253_devs[] = {
 		.chip_select	= 0,
 		.mode		= SPI_MODE_0,
 		.irq		= S3C_EINT(4),
-		.controller_data = &wm0010_spi_csinfo,
 		.platform_data = &wm0010_pdata,
 	},
 };
@@ -62,7 +57,6 @@ static struct spi_board_info balblair_devs[] = {
 		.chip_select	= 0,
 		.mode		= SPI_MODE_0,
 		.irq		= S3C_EINT(4),
-		.controller_data = &wm0010_spi_csinfo,
 		.platform_data = &wm0010_pdata,
 	},
 };
@@ -229,10 +223,6 @@ static struct arizona_pdata wm5102_reva_pdata = {
 	},
 };
 
-static struct s3c64xx_spi_csinfo codec_spi_csinfo = {
-	.line = S3C64XX_GPN(5),
-};
-
 static struct spi_board_info wm5102_reva_spi_devs[] = {
 	[0] = {
 		.modalias	= "wm5102",
@@ -242,7 +232,6 @@ static struct spi_board_info wm5102_reva_spi_devs[] = {
 		.mode		= SPI_MODE_0,
 		.irq		= GLENFARCLAS_PMIC_IRQ_BASE +
 				  WM831X_IRQ_GPIO_2,
-		.controller_data = &codec_spi_csinfo,
 		.platform_data = &wm5102_reva_pdata,
 	},
 };
@@ -275,7 +264,6 @@ static struct spi_board_info wm5102_spi_devs[] = {
 		.mode		= SPI_MODE_0,
 		.irq		= GLENFARCLAS_PMIC_IRQ_BASE +
 				  WM831X_IRQ_GPIO_2,
-		.controller_data = &codec_spi_csinfo,
 		.platform_data = &wm5102_pdata,
 	},
 };
@@ -298,7 +286,6 @@ static struct spi_board_info wm5110_spi_devs[] = {
 		.mode		= SPI_MODE_0,
 		.irq		= GLENFARCLAS_PMIC_IRQ_BASE +
 				  WM831X_IRQ_GPIO_2,
-		.controller_data = &codec_spi_csinfo,
 		.platform_data = &wm5102_reva_pdata,
 	},
 };
diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c
index 41f0aba2d2fd0..e3e0fe897bccb 100644
--- a/arch/arm/mach-s3c/mach-crag6410.c
+++ b/arch/arm/mach-s3c/mach-crag6410.c
@@ -825,6 +825,15 @@ static const struct gpio_led_platform_data gpio_leds_pdata = {
 
 static struct dwc2_hsotg_plat crag6410_hsotg_pdata;
 
+static struct gpiod_lookup_table crag_spi0_gpiod_table = {
+	.dev_id = "s3c6410-spi.0",
+	.table = {
+		GPIO_LOOKUP_IDX("GPIOC", 3, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("GPION", 5, "cs", 1, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static void __init crag6410_machine_init(void)
 {
 	/* Open drain IRQs need pullups */
@@ -856,6 +865,8 @@ static void __init crag6410_machine_init(void)
 	i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
 
 	samsung_keypad_set_platdata(&crag6410_keypad_data);
+
+	gpiod_add_lookup_table(&crag_spi0_gpiod_table);
 	s3c64xx_spi0_set_platdata(0, 2);
 
 	pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup));
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 8755cd85e83ce..3e42cdb19d272 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -13,10 +13,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 
 #include <linux/platform_data/spi-s3c64xx.h>
 
@@ -656,7 +654,11 @@ static int s3c64xx_spi_prepare_message(struct spi_master *master,
 	struct s3c64xx_spi_csinfo *cs = spi->controller_data;
 
 	/* Configure feedback delay */
-	writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK);
+	if (!cs)
+		/* No delay if not defined */
+		writel(0, sdd->regs + S3C64XX_SPI_FB_CLK);
+	else
+		writel(cs->fb_delay & 0x3, sdd->regs + S3C64XX_SPI_FB_CLK);
 
 	return 0;
 }
@@ -830,34 +832,16 @@ static int s3c64xx_spi_setup(struct spi_device *spi)
 	if (spi->dev.of_node) {
 		cs = s3c64xx_get_slave_ctrldata(spi);
 		spi->controller_data = cs;
-	} else if (cs) {
-		/* On non-DT platforms the SPI core will set spi->cs_gpio
-		 * to -ENOENT. The GPIO pin used to drive the chip select
-		 * is defined by using platform data so spi->cs_gpio value
-		 * has to be override to have the proper GPIO pin number.
-		 */
-		spi->cs_gpio = cs->line;
 	}
 
-	if (IS_ERR_OR_NULL(cs)) {
+	/* NULL is fine, we just avoid using the FB delay (=0) */
+	if (IS_ERR(cs)) {
 		dev_err(&spi->dev, "No CS for SPI(%d)\n", spi->chip_select);
 		return -ENODEV;
 	}
 
-	if (!spi_get_ctldata(spi)) {
-		if (gpio_is_valid(spi->cs_gpio)) {
-			err = gpio_request_one(spi->cs_gpio, GPIOF_OUT_INIT_HIGH,
-					       dev_name(&spi->dev));
-			if (err) {
-				dev_err(&spi->dev,
-					"Failed to get /CS gpio [%d]: %d\n",
-					spi->cs_gpio, err);
-				goto err_gpio_req;
-			}
-		}
-
+	if (!spi_get_ctldata(spi))
 		spi_set_ctldata(spi, cs);
-	}
 
 	pm_runtime_get_sync(&sdd->pdev->dev);
 
@@ -909,11 +893,9 @@ setup_exit:
 	/* setup() returns with device de-selected */
 	s3c64xx_spi_set_cs(spi, false);
 
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
 	spi_set_ctldata(spi, NULL);
 
-err_gpio_req:
+	/* This was dynamically allocated on the DT path */
 	if (spi->dev.of_node)
 		kfree(cs);
 
@@ -924,19 +906,9 @@ static void s3c64xx_spi_cleanup(struct spi_device *spi)
 {
 	struct s3c64xx_spi_csinfo *cs = spi_get_ctldata(spi);
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_free(spi->cs_gpio);
-		if (spi->dev.of_node)
-			kfree(cs);
-		else {
-			/* On non-DT platforms, the SPI core sets
-			 * spi->cs_gpio to -ENOENT and .setup()
-			 * overrides it with the GPIO pin value
-			 * passed using platform data.
-			 */
-			spi->cs_gpio = -ENOENT;
-		}
-	}
+	/* This was dynamically allocated on the DT path */
+	if (spi->dev.of_node)
+		kfree(cs);
 
 	spi_set_ctldata(spi, NULL);
 }
@@ -1131,6 +1103,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	master->prepare_message = s3c64xx_spi_prepare_message;
 	master->transfer_one = s3c64xx_spi_transfer_one;
 	master->num_chipselect = sci->num_cs;
+	master->use_gpio_descriptors = true;
 	master->dma_alignment = 8;
 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
 					SPI_BPW_MASK(8);
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 10890a4b55b9a..5df1ace6d2c98 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -16,7 +16,6 @@ struct platform_device;
  * struct s3c64xx_spi_csinfo - ChipSelect description
  * @fb_delay: Slave specific feedback delay.
  *            Refer to FB_CLK_SEL register definition in SPI chapter.
- * @line: Custom 'identity' of the CS line.
  *
  * This is per SPI-Slave Chipselect information.
  * Allocate and initialize one in machine init code and make the
@@ -24,7 +23,6 @@ struct platform_device;
  */
 struct s3c64xx_spi_csinfo {
 	u8 fb_delay;
-	unsigned line;
 };
 
 /**
-- 
GitLab


From 7f2a3cf4e6077a1525092f114be7819e505773a1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Jan 2022 01:09:14 +0100
Subject: [PATCH 0021/1586] spi: s3c24xx: Convert to GPIO descriptors

This driver has a bunch of custom oldstyle GPIO number-passing
fields and a custom set-up callback.

The good thing is: nothing in the kernel is using it.

Convert the driver to use GPIO descriptors with a SPI_MASTER_GPIO_SS
flag so that the local CS callback also get invoked as the hardware
needs this.

New users of this driver can provide GPIO descriptor tables like
the other converted drivers.

Cc: linux-samsung-soc@vger.kernel.org
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220119000914.192553-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-s3c24xx.c   | 47 +++----------------------------------
 include/linux/spi/s3c24xx.h |  5 ----
 2 files changed, 3 insertions(+), 49 deletions(-)

diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index d6f51695ca5b2..660aa866af06f 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -12,7 +12,6 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
@@ -62,9 +61,6 @@ struct s3c24xx_spi {
 	unsigned char		 fiq_inuse;
 	unsigned char		 fiq_claimed;
 
-	void			(*set_cs)(struct s3c2410_spi_info *spi,
-					  int cs, int pol);
-
 	/* data buffers */
 	const unsigned char	*tx;
 	unsigned char		*rx;
@@ -84,29 +80,21 @@ static inline struct s3c24xx_spi *to_hw(struct spi_device *sdev)
 	return spi_master_get_devdata(sdev->master);
 }
 
-static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol)
-{
-	gpio_set_value(spi->pin_cs, pol);
-}
-
 static void s3c24xx_spi_chipsel(struct spi_device *spi, int value)
 {
 	struct s3c24xx_spi_devstate *cs = spi->controller_state;
 	struct s3c24xx_spi *hw = to_hw(spi);
-	unsigned int cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
 
 	/* change the chipselect state and the state of the spi engine clock */
 
 	switch (value) {
 	case BITBANG_CS_INACTIVE:
-		hw->set_cs(hw->pdata, spi->chip_select, cspol^1);
 		writeb(cs->spcon, hw->regs + S3C2410_SPCON);
 		break;
 
 	case BITBANG_CS_ACTIVE:
 		writeb(cs->spcon | S3C2410_SPCON_ENSCK,
 		       hw->regs + S3C2410_SPCON);
-		hw->set_cs(hw->pdata, spi->chip_select, cspol);
 		break;
 	}
 }
@@ -452,14 +440,6 @@ static void s3c24xx_spi_initialsetup(struct s3c24xx_spi *hw)
 	writeb(0xff, hw->regs + S3C2410_SPPRE);
 	writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN);
 	writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON);
-
-	if (hw->pdata) {
-		if (hw->set_cs == s3c24xx_spi_gpiocs)
-			gpio_direction_output(hw->pdata->pin_cs, 1);
-
-		if (hw->pdata->gpio_setup)
-			hw->pdata->gpio_setup(hw->pdata, 1);
-	}
 }
 
 static int s3c24xx_spi_probe(struct platform_device *pdev)
@@ -502,6 +482,9 @@ static int s3c24xx_spi_probe(struct platform_device *pdev)
 	master->num_chipselect = hw->pdata->num_cs;
 	master->bus_num = pdata->bus_num;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	/* we need to call the local chipselect callback */
+	master->flags = SPI_MASTER_GPIO_SS;
+	master->use_gpio_descriptors = true;
 
 	/* setup the state for the bitbang driver */
 
@@ -541,27 +524,6 @@ static int s3c24xx_spi_probe(struct platform_device *pdev)
 		goto err_no_pdata;
 	}
 
-	/* setup any gpio we can */
-
-	if (!pdata->set_cs) {
-		if (pdata->pin_cs < 0) {
-			dev_err(&pdev->dev, "No chipselect pin\n");
-			err = -EINVAL;
-			goto err_register;
-		}
-
-		err = devm_gpio_request(&pdev->dev, pdata->pin_cs,
-					dev_name(&pdev->dev));
-		if (err) {
-			dev_err(&pdev->dev, "Failed to get gpio for cs\n");
-			goto err_register;
-		}
-
-		hw->set_cs = s3c24xx_spi_gpiocs;
-		gpio_direction_output(pdata->pin_cs, 1);
-	} else
-		hw->set_cs = pdata->set_cs;
-
 	s3c24xx_spi_initialsetup(hw);
 
 	/* register our spi controller */
@@ -604,9 +566,6 @@ static int s3c24xx_spi_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (hw->pdata && hw->pdata->gpio_setup)
-		hw->pdata->gpio_setup(hw->pdata, 0);
-
 	clk_disable(hw->clk);
 	return 0;
 }
diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h
index 440a715931629..9b8bb22d5b0cf 100644
--- a/include/linux/spi/s3c24xx.h
+++ b/include/linux/spi/s3c24xx.h
@@ -10,14 +10,9 @@
 #define __LINUX_SPI_S3C24XX_H __FILE__
 
 struct s3c2410_spi_info {
-	int			 pin_cs;	/* simple gpio cs */
 	unsigned int		 num_cs;	/* total chipselects */
 	int			 bus_num;       /* bus number to use. */
-
 	unsigned int		 use_fiq:1;	/* use fiq */
-
-	void (*gpio_setup)(struct s3c2410_spi_info *spi, int enable);
-	void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol);
 };
 
 extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on);
-- 
GitLab


From 9d7c48506518684847bc17d4e3f0a103d83aa5c8 Mon Sep 17 00:00:00 2001
From: Andreas Rammhold <andreas@rammhold.de>
Date: Fri, 16 Jul 2021 22:00:34 +0200
Subject: [PATCH 0022/1586] tools: cpupower: fix typo in cpupower-idle-set(1)
 manpage

The tools name was wrong in the SYNTAX section of the manpage it should
read "idle-set" instead of "idle-info".

Signed-off-by: Andreas Rammhold <andreas@rammhold.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/man/cpupower-idle-set.1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 21916cff7516a..8cef3c71e19e7 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -4,7 +4,7 @@
 cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
 .SH "SYNTAX"
 .LP
-cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP
 The cpupower idle\-set subcommand allows to set cpu idle, also called cpu
-- 
GitLab


From 101025ff8e47d3c938ad2ae646a1794b9a8aa730 Mon Sep 17 00:00:00 2001
From: ozkanonur <onurozkan.dev@gmail.com>
Date: Thu, 13 Jan 2022 00:04:21 +0300
Subject: [PATCH 0023/1586] tools/power/cpupower/{ToDo => TODO}: Rename the
 todo file

Renamed the to-do file to 'TODO' instead of 'ToDo' to
comply with the naming standard.

Signed-off-by: ozkanonur <onurozkan.dev@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/{ToDo => TODO} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tools/power/cpupower/{ToDo => TODO} (100%)

diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/TODO
similarity index 100%
rename from tools/power/cpupower/ToDo
rename to tools/power/cpupower/TODO
-- 
GitLab


From 2e1f8e55f9e054b4a49ffc06c7e33b5d4725f05e Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 30 Sep 2021 14:40:38 +0200
Subject: [PATCH 0024/1586] x86/paravirt: Use %rip-relative addressing in hook
 calls

While using a plain (constant) address works, its use needlessly invokes
a SIB addressing mode, making every call site one byte larger than
necessary:

  ff 14 25 98 89 42 82    call   *0xffffffff82428998

Instead of using an "i" constraint with address-of operator and a 'c'
operand modifier, simply use an ordinary "m" constraint, which the
64-bit compiler will translate to %rip-relative addressing:

  ff 15 62 fb d2 00       call   *0xd2fb62(%rip)	# ffffffff82428998 <pv_ops+0x18>

This way the compiler is also told the truth about operand usage - the
memory location gets actually read, after all.

32-bit code generation is unaffected by the change.

  [ bp: Remove "we", add examples. ]

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/b8192e8a-13ef-6ac6-6364-8ba58992cd1d@suse.com
---
 arch/x86/include/asm/paravirt_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a69012e1903f1..e1591467668ed 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void);
 
 #define paravirt_type(op)				\
 	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(pv_ops.op))
+	[paravirt_opptr] "m" (pv_ops.op)
 #define paravirt_clobber(clobber)		\
 	[paravirt_clobber] "i" (clobber)
 
@@ -316,7 +316,7 @@ int paravirt_disable_iospace(void);
  */
 #define PARAVIRT_CALL					\
 	ANNOTATE_RETPOLINE_SAFE				\
-	"call *%c[paravirt_opptr];"
+	"call *%[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
-- 
GitLab


From e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae Mon Sep 17 00:00:00 2001
From: Tim Murray <timmurray@google.com>
Date: Fri, 7 Jan 2022 12:48:44 -0800
Subject: [PATCH 0025/1586] f2fs: move f2fs to use reader-unfair rwsems

f2fs rw_semaphores work better if writers can starve readers,
especially for the checkpoint thread, because writers are strictly
more important than reader threads. This prevents significant priority
inversion between low-priority readers that blocked while trying to
acquire the read lock and a second acquisition of the write lock that
might be blocking high priority work.

Signed-off-by: Tim Murray <timmurray@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  34 ++++++-------
 fs/f2fs/compress.c   |   6 +--
 fs/f2fs/data.c       |  50 +++++++++----------
 fs/f2fs/dir.c        |  12 ++---
 fs/f2fs/f2fs.h       | 110 ++++++++++++++++++++++++++++++++++--------
 fs/f2fs/file.c       | 112 +++++++++++++++++++++----------------------
 fs/f2fs/gc.c         |  46 +++++++++---------
 fs/f2fs/inline.c     |   4 +-
 fs/f2fs/namei.c      |  34 ++++++-------
 fs/f2fs/node.c       |  84 ++++++++++++++++----------------
 fs/f2fs/recovery.c   |   4 +-
 fs/f2fs/segment.c    |  44 ++++++++---------
 fs/f2fs/super.c      |  56 +++++++++++-----------
 fs/f2fs/sysfs.c      |   4 +-
 fs/f2fs/verity.c     |   4 +-
 fs/f2fs/xattr.c      |  12 ++---
 16 files changed, 342 insertions(+), 274 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 982f0170639fc..deeda95688f03 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -351,13 +351,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 		goto skip_write;
 
 	/* if locked failed, cp will flush dirty pages instead */
-	if (!down_write_trylock(&sbi->cp_global_sem))
+	if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
 		goto skip_write;
 
 	trace_f2fs_writepages(mapping->host, wbc, META);
 	diff = nr_pages_to_write(sbi, META, wbc);
 	written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
-	up_write(&sbi->cp_global_sem);
+	f2fs_up_write(&sbi->cp_global_sem);
 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
 	return 0;
 
@@ -1159,7 +1159,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
 	if (!is_journalled_quota(sbi))
 		return false;
 
-	if (!down_write_trylock(&sbi->quota_sem))
+	if (!f2fs_down_write_trylock(&sbi->quota_sem))
 		return true;
 	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
 		ret = false;
@@ -1171,7 +1171,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
 	} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
 		ret = true;
 	}
-	up_write(&sbi->quota_sem);
+	f2fs_up_write(&sbi->quota_sem);
 	return ret;
 }
 
@@ -1228,10 +1228,10 @@ retry_flush_dents:
 	 * POR: we should ensure that there are no dirty node pages
 	 * until finishing nat/sit flush. inode->i_blocks can be updated.
 	 */
-	down_write(&sbi->node_change);
+	f2fs_down_write(&sbi->node_change);
 
 	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
-		up_write(&sbi->node_change);
+		f2fs_up_write(&sbi->node_change);
 		f2fs_unlock_all(sbi);
 		err = f2fs_sync_inode_meta(sbi);
 		if (err)
@@ -1241,15 +1241,15 @@ retry_flush_dents:
 	}
 
 retry_flush_nodes:
-	down_write(&sbi->node_write);
+	f2fs_down_write(&sbi->node_write);
 
 	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
-		up_write(&sbi->node_write);
+		f2fs_up_write(&sbi->node_write);
 		atomic_inc(&sbi->wb_sync_req[NODE]);
 		err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
 		atomic_dec(&sbi->wb_sync_req[NODE]);
 		if (err) {
-			up_write(&sbi->node_change);
+			f2fs_up_write(&sbi->node_change);
 			f2fs_unlock_all(sbi);
 			return err;
 		}
@@ -1262,13 +1262,13 @@ retry_flush_nodes:
 	 * dirty node blocks and some checkpoint values by block allocation.
 	 */
 	__prepare_cp_block(sbi);
-	up_write(&sbi->node_change);
+	f2fs_up_write(&sbi->node_change);
 	return err;
 }
 
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
-	up_write(&sbi->node_write);
+	f2fs_up_write(&sbi->node_write);
 	f2fs_unlock_all(sbi);
 }
 
@@ -1612,7 +1612,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_warn(sbi, "Start checkpoint disabled!");
 	}
 	if (cpc->reason != CP_RESIZE)
-		down_write(&sbi->cp_global_sem);
+		f2fs_down_write(&sbi->cp_global_sem);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
 		((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1693,7 +1693,7 @@ stop:
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
 out:
 	if (cpc->reason != CP_RESIZE)
-		up_write(&sbi->cp_global_sem);
+		f2fs_up_write(&sbi->cp_global_sem);
 	return err;
 }
 
@@ -1741,9 +1741,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
 	struct cp_control cpc = { .reason = CP_SYNC, };
 	int err;
 
-	down_write(&sbi->gc_lock);
+	f2fs_down_write(&sbi->gc_lock);
 	err = f2fs_write_checkpoint(sbi, &cpc);
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 
 	return err;
 }
@@ -1831,9 +1831,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
 	if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
 		int ret;
 
-		down_write(&sbi->gc_lock);
+		f2fs_down_write(&sbi->gc_lock);
 		ret = f2fs_write_checkpoint(sbi, &cpc);
-		up_write(&sbi->gc_lock);
+		f2fs_up_write(&sbi->gc_lock);
 
 		return ret;
 	}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d0c3aeba59454..67bac2792e571 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1267,7 +1267,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
 		 * checkpoint. This can only happen to quota writes which can cause
 		 * the below discard race condition.
 		 */
-		down_read(&sbi->node_write);
+		f2fs_down_read(&sbi->node_write);
 	} else if (!f2fs_trylock_op(sbi)) {
 		goto out_free;
 	}
@@ -1384,7 +1384,7 @@ unlock_continue:
 
 	f2fs_put_dnode(&dn);
 	if (IS_NOQUOTA(inode))
-		up_read(&sbi->node_write);
+		f2fs_up_read(&sbi->node_write);
 	else
 		f2fs_unlock_op(sbi);
 
@@ -1410,7 +1410,7 @@ out_put_dnode:
 	f2fs_put_dnode(&dn);
 out_unlock_op:
 	if (IS_NOQUOTA(inode))
-		up_read(&sbi->node_write);
+		f2fs_up_read(&sbi->node_write);
 	else
 		f2fs_unlock_op(sbi);
 out_free:
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c417864c66ae..0f124e8de1d4e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -590,7 +590,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
 	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
-	down_write(&io->io_rwsem);
+	f2fs_down_write(&io->io_rwsem);
 
 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
@@ -601,7 +601,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
-	up_write(&io->io_rwsem);
+	f2fs_up_write(&io->io_rwsem);
 }
 
 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -616,9 +616,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 			enum page_type btype = PAGE_TYPE_OF_BIO(type);
 			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
-			down_read(&io->io_rwsem);
+			f2fs_down_read(&io->io_rwsem);
 			ret = __has_merged_page(io->bio, inode, page, ino);
-			up_read(&io->io_rwsem);
+			f2fs_up_read(&io->io_rwsem);
 		}
 		if (ret)
 			__f2fs_submit_merged_write(sbi, type, temp);
@@ -742,9 +742,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 	if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
 		f2fs_bug_on(sbi, 1);
 
-	down_write(&io->bio_list_lock);
+	f2fs_down_write(&io->bio_list_lock);
 	list_add_tail(&be->list, &io->bio_list);
-	up_write(&io->bio_list_lock);
+	f2fs_up_write(&io->bio_list_lock);
 }
 
 static void del_bio_entry(struct bio_entry *be)
@@ -766,7 +766,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 		struct list_head *head = &io->bio_list;
 		struct bio_entry *be;
 
-		down_write(&io->bio_list_lock);
+		f2fs_down_write(&io->bio_list_lock);
 		list_for_each_entry(be, head, list) {
 			if (be->bio != *bio)
 				continue;
@@ -790,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 			__submit_bio(sbi, *bio, DATA);
 			break;
 		}
-		up_write(&io->bio_list_lock);
+		f2fs_up_write(&io->bio_list_lock);
 	}
 
 	if (ret) {
@@ -816,7 +816,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 		if (list_empty(head))
 			continue;
 
-		down_read(&io->bio_list_lock);
+		f2fs_down_read(&io->bio_list_lock);
 		list_for_each_entry(be, head, list) {
 			if (target)
 				found = (target == be->bio);
@@ -826,14 +826,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 			if (found)
 				break;
 		}
-		up_read(&io->bio_list_lock);
+		f2fs_up_read(&io->bio_list_lock);
 
 		if (!found)
 			continue;
 
 		found = false;
 
-		down_write(&io->bio_list_lock);
+		f2fs_down_write(&io->bio_list_lock);
 		list_for_each_entry(be, head, list) {
 			if (target)
 				found = (target == be->bio);
@@ -846,7 +846,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 				break;
 			}
 		}
-		up_write(&io->bio_list_lock);
+		f2fs_up_write(&io->bio_list_lock);
 	}
 
 	if (found)
@@ -906,7 +906,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
 
 	f2fs_bug_on(sbi, is_read_io(fio->op));
 
-	down_write(&io->io_rwsem);
+	f2fs_down_write(&io->io_rwsem);
 next:
 	if (fio->in_list) {
 		spin_lock(&io->io_lock);
@@ -973,7 +973,7 @@ out:
 	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
 				!f2fs_is_checkpoint_ready(sbi))
 		__submit_merged_bio(io);
-	up_write(&io->io_rwsem);
+	f2fs_up_write(&io->io_rwsem);
 }
 
 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
@@ -1383,9 +1383,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
 	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
 		if (lock)
-			down_read(&sbi->node_change);
+			f2fs_down_read(&sbi->node_change);
 		else
-			up_read(&sbi->node_change);
+			f2fs_up_read(&sbi->node_change);
 	} else {
 		if (lock)
 			f2fs_lock_op(sbi);
@@ -2749,13 +2749,13 @@ write:
 		 * the below discard race condition.
 		 */
 		if (IS_NOQUOTA(inode))
-			down_read(&sbi->node_write);
+			f2fs_down_read(&sbi->node_write);
 
 		fio.need_lock = LOCK_DONE;
 		err = f2fs_do_write_data_page(&fio);
 
 		if (IS_NOQUOTA(inode))
-			up_read(&sbi->node_write);
+			f2fs_up_read(&sbi->node_write);
 
 		goto done;
 	}
@@ -3213,14 +3213,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to)
 
 	/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
 	if (to > i_size && !f2fs_verity_in_progress(inode)) {
-		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		filemap_invalidate_lock(inode->i_mapping);
 
 		truncate_pagecache(inode, i_size);
 		f2fs_truncate_blocks(inode, i_size, true);
 
 		filemap_invalidate_unlock(inode->i_mapping);
-		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	}
 }
 
@@ -3721,13 +3721,13 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
 	int ret = 0;
 
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(inode->i_mapping);
 
 	set_inode_flag(inode, FI_ALIGNED_WRITE);
 
 	for (; secidx < end_sec; secidx++) {
-		down_write(&sbi->pin_sem);
+		f2fs_down_write(&sbi->pin_sem);
 
 		f2fs_lock_op(sbi);
 		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
@@ -3741,7 +3741,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 
 			page = f2fs_get_lock_data_page(inode, blkidx, true);
 			if (IS_ERR(page)) {
-				up_write(&sbi->pin_sem);
+				f2fs_up_write(&sbi->pin_sem);
 				ret = PTR_ERR(page);
 				goto done;
 			}
@@ -3754,7 +3754,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 
 		ret = filemap_fdatawrite(inode->i_mapping);
 
-		up_write(&sbi->pin_sem);
+		f2fs_up_write(&sbi->pin_sem);
 
 		if (ret)
 			break;
@@ -3765,7 +3765,7 @@ done:
 	clear_inode_flag(inode, FI_ALIGNED_WRITE);
 
 	filemap_invalidate_unlock(inode->i_mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	return ret;
 }
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 1820e9c106f7d..011df7058c42b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -766,7 +766,7 @@ add_dentry:
 	f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
 
 	if (inode) {
-		down_write(&F2FS_I(inode)->i_sem);
+		f2fs_down_write(&F2FS_I(inode)->i_sem);
 		page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
@@ -793,7 +793,7 @@ add_dentry:
 	f2fs_update_parent_metadata(dir, inode, current_depth);
 fail:
 	if (inode)
-		up_write(&F2FS_I(inode)->i_sem);
+		f2fs_up_write(&F2FS_I(inode)->i_sem);
 
 	f2fs_put_page(dentry_page, 1);
 
@@ -858,7 +858,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
 	struct page *page;
 	int err = 0;
 
-	down_write(&F2FS_I(inode)->i_sem);
+	f2fs_down_write(&F2FS_I(inode)->i_sem);
 	page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
 	if (IS_ERR(page)) {
 		err = PTR_ERR(page);
@@ -869,7 +869,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
 	clear_inode_flag(inode, FI_NEW_INODE);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 fail:
-	up_write(&F2FS_I(inode)->i_sem);
+	f2fs_up_write(&F2FS_I(inode)->i_sem);
 	return err;
 }
 
@@ -877,7 +877,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 
-	down_write(&F2FS_I(inode)->i_sem);
+	f2fs_down_write(&F2FS_I(inode)->i_sem);
 
 	if (S_ISDIR(inode->i_mode))
 		f2fs_i_links_write(dir, false);
@@ -888,7 +888,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
 		f2fs_i_links_write(inode, false);
 		f2fs_i_size_write(inode, 0);
 	}
-	up_write(&F2FS_I(inode)->i_sem);
+	f2fs_up_write(&F2FS_I(inode)->i_sem);
 
 	if (inode->i_nlink == 0)
 		f2fs_add_orphan_inode(inode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb22fa91c2b26..8178a9152e496 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -123,6 +123,18 @@ typedef u32 nid_t;
 
 #define COMPRESS_EXT_NUM		16
 
+/*
+ * An implementation of an rwsem that is explicitly unfair to readers. This
+ * prevents priority inversion when a low-priority reader acquires the read lock
+ * while sleeping on the write lock but the write lock is needed by
+ * higher-priority clients.
+ */
+
+struct f2fs_rwsem {
+        struct rw_semaphore internal_rwsem;
+        wait_queue_head_t read_waiters;
+};
+
 struct f2fs_mount_info {
 	unsigned int opt;
 	int write_io_size_bits;		/* Write IO size bits */
@@ -752,7 +764,7 @@ struct f2fs_inode_info {
 
 	/* Use below internally in f2fs*/
 	unsigned long flags[BITS_TO_LONGS(FI_MAX)];	/* use to pass per-file flags */
-	struct rw_semaphore i_sem;	/* protect fi info */
+	struct f2fs_rwsem i_sem;	/* protect fi info */
 	atomic_t dirty_pages;		/* # of dirty pages */
 	f2fs_hash_t chash;		/* hash value of given file name */
 	unsigned int clevel;		/* maximum level of given file name */
@@ -777,8 +789,8 @@ struct f2fs_inode_info {
 	struct extent_tree *extent_tree;	/* cached extent_tree entry */
 
 	/* avoid racing between foreground op and gc */
-	struct rw_semaphore i_gc_rwsem[2];
-	struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
+	struct f2fs_rwsem i_gc_rwsem[2];
+	struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */
 
 	int i_extra_isize;		/* size of extra space located in i_addr */
 	kprojid_t i_projid;		/* id for project quota */
@@ -904,7 +916,7 @@ struct f2fs_nm_info {
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
 	struct radix_tree_root nat_set_root;/* root of the nat set cache */
-	struct rw_semaphore nat_tree_lock;	/* protect nat entry tree */
+	struct f2fs_rwsem nat_tree_lock;	/* protect nat entry tree */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	spinlock_t nat_list_lock;	/* protect clean nat entry list */
 	unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
@@ -1017,7 +1029,7 @@ struct f2fs_sm_info {
 	struct dirty_seglist_info *dirty_info;	/* dirty segment information */
 	struct curseg_info *curseg_array;	/* active segment information */
 
-	struct rw_semaphore curseg_lock;	/* for preventing curseg change */
+	struct f2fs_rwsem curseg_lock;	/* for preventing curseg change */
 
 	block_t seg0_blkaddr;		/* block address of 0'th segment */
 	block_t main_blkaddr;		/* start block address of main area */
@@ -1201,11 +1213,11 @@ struct f2fs_bio_info {
 	struct bio *bio;		/* bios to merge */
 	sector_t last_block_in_bio;	/* last block number */
 	struct f2fs_io_info fio;	/* store buffered io info. */
-	struct rw_semaphore io_rwsem;	/* blocking op for bio */
+	struct f2fs_rwsem io_rwsem;	/* blocking op for bio */
 	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
 	struct list_head io_list;	/* track fios */
 	struct list_head bio_list;	/* bio entry list head */
-	struct rw_semaphore bio_list_lock;	/* lock to protect bio entry list */
+	struct f2fs_rwsem bio_list_lock;	/* lock to protect bio entry list */
 };
 
 #define FDEV(i)				(sbi->devs[i])
@@ -1571,7 +1583,7 @@ struct f2fs_sb_info {
 	struct super_block *sb;			/* pointer to VFS super block */
 	struct proc_dir_entry *s_proc;		/* proc entry */
 	struct f2fs_super_block *raw_super;	/* raw super block pointer */
-	struct rw_semaphore sb_lock;		/* lock for raw super block */
+	struct f2fs_rwsem sb_lock;		/* lock for raw super block */
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
 	struct mutex writepages;		/* mutex for writepages() */
@@ -1591,7 +1603,7 @@ struct f2fs_sb_info {
 	/* for bio operations */
 	struct f2fs_bio_info *write_io[NR_PAGE_TYPE];	/* for write bios */
 	/* keep migration IO order for LFS mode */
-	struct rw_semaphore io_order_lock;
+	struct f2fs_rwsem io_order_lock;
 	mempool_t *write_io_dummy;		/* Dummy pages */
 
 	/* for checkpoint */
@@ -1599,10 +1611,10 @@ struct f2fs_sb_info {
 	int cur_cp_pack;			/* remain current cp pack */
 	spinlock_t cp_lock;			/* for flag in ckpt */
 	struct inode *meta_inode;		/* cache meta blocks */
-	struct rw_semaphore cp_global_sem;	/* checkpoint procedure lock */
-	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
-	struct rw_semaphore node_write;		/* locking node writes */
-	struct rw_semaphore node_change;	/* locking node change */
+	struct f2fs_rwsem cp_global_sem;	/* checkpoint procedure lock */
+	struct f2fs_rwsem cp_rwsem;		/* blocking FS operations */
+	struct f2fs_rwsem node_write;		/* locking node writes */
+	struct f2fs_rwsem node_change;	/* locking node change */
 	wait_queue_head_t cp_wait;
 	unsigned long last_time[MAX_TIME];	/* to store time in jiffies */
 	long interval_time[MAX_TIME];		/* to store thresholds */
@@ -1662,7 +1674,7 @@ struct f2fs_sb_info {
 	block_t unusable_block_count;		/* # of blocks saved by last cp */
 
 	unsigned int nquota_files;		/* # of quota sysfile */
-	struct rw_semaphore quota_sem;		/* blocking cp for flags */
+	struct f2fs_rwsem quota_sem;		/* blocking cp for flags */
 
 	/* # of pages, see count_type */
 	atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1678,7 +1690,7 @@ struct f2fs_sb_info {
 	struct f2fs_mount_info mount_opt;	/* mount options */
 
 	/* for cleaning operations */
-	struct rw_semaphore gc_lock;		/*
+	struct f2fs_rwsem gc_lock;		/*
 						 * semaphore for GC, avoid
 						 * race between GC and GC or CP
 						 */
@@ -1698,7 +1710,7 @@ struct f2fs_sb_info {
 
 	/* threshold for gc trials on pinned files */
 	u64 gc_pin_file_threshold;
-	struct rw_semaphore pin_sem;
+	struct f2fs_rwsem pin_sem;
 
 	/* maximum # of trials to find a victim segment for SSR and GC */
 	unsigned int max_victim_search;
@@ -2092,9 +2104,65 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
+static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem)
+{
+	init_rwsem(&sem->internal_rwsem);
+	init_waitqueue_head(&sem->read_waiters);
+}
+
+static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem)
+{
+	return rwsem_is_locked(&sem->internal_rwsem);
+}
+
+static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem)
+{
+	return rwsem_is_contended(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_read(struct f2fs_rwsem *sem)
+{
+	wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem));
+}
+
+static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
+{
+	return down_read_trylock(&sem->internal_rwsem);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
+{
+	down_read_nested(&sem->internal_rwsem, subclass);
+}
+#else
+#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
+#endif
+
+static inline void f2fs_up_read(struct f2fs_rwsem *sem)
+{
+	up_read(&sem->internal_rwsem);
+}
+
+static inline void f2fs_down_write(struct f2fs_rwsem *sem)
+{
+	down_write(&sem->internal_rwsem);
+}
+
+static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
+{
+	return down_write_trylock(&sem->internal_rwsem);
+}
+
+static inline void f2fs_up_write(struct f2fs_rwsem *sem)
+{
+	up_write(&sem->internal_rwsem);
+	wake_up_all(&sem->read_waiters);
+}
+
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-	down_read(&sbi->cp_rwsem);
+	f2fs_down_read(&sbi->cp_rwsem);
 }
 
 static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
@@ -2103,22 +2171,22 @@ static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
 		f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
 		return 0;
 	}
-	return down_read_trylock(&sbi->cp_rwsem);
+	return f2fs_down_read_trylock(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-	up_read(&sbi->cp_rwsem);
+	f2fs_up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-	down_write(&sbi->cp_rwsem);
+	f2fs_down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-	up_write(&sbi->cp_rwsem);
+	f2fs_up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c98ef6af97d1..6ccdd6e347e2c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -237,13 +237,13 @@ static void try_to_fix_pino(struct inode *inode)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	nid_t pino;
 
-	down_write(&fi->i_sem);
+	f2fs_down_write(&fi->i_sem);
 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
 			get_parent_ino(inode, &pino)) {
 		f2fs_i_pino_write(inode, pino);
 		file_got_pino(inode);
 	}
-	up_write(&fi->i_sem);
+	f2fs_up_write(&fi->i_sem);
 }
 
 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
@@ -318,9 +318,9 @@ go_write:
 	 * Both of fdatasync() and fsync() are able to be recovered from
 	 * sudden-power-off.
 	 */
-	down_read(&F2FS_I(inode)->i_sem);
+	f2fs_down_read(&F2FS_I(inode)->i_sem);
 	cp_reason = need_do_checkpoint(inode);
-	up_read(&F2FS_I(inode)->i_sem);
+	f2fs_up_read(&F2FS_I(inode)->i_sem);
 
 	if (cp_reason) {
 		/* all the dirty node pages should be flushed for POR */
@@ -958,7 +958,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 				return err;
 		}
 
-		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		filemap_invalidate_lock(inode->i_mapping);
 
 		truncate_setsize(inode, attr->ia_size);
@@ -970,7 +970,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		 * larger than i_size.
 		 */
 		filemap_invalidate_unlock(inode->i_mapping);
-		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		if (err)
 			return err;
 
@@ -1112,7 +1112,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 			blk_start = (loff_t)pg_start << PAGE_SHIFT;
 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
 
-			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 			filemap_invalidate_lock(inode->i_mapping);
 
 			truncate_pagecache_range(inode, blk_start, blk_end - 1);
@@ -1122,7 +1122,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 			f2fs_unlock_op(sbi);
 
 			filemap_invalidate_unlock(inode->i_mapping);
-			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		}
 	}
 
@@ -1355,7 +1355,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
 	f2fs_balance_fs(sbi, true);
 
 	/* avoid gc operation during block exchange */
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(inode->i_mapping);
 
 	f2fs_lock_op(sbi);
@@ -1365,7 +1365,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
 	f2fs_unlock_op(sbi);
 
 	filemap_invalidate_unlock(inode->i_mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	return ret;
 }
 
@@ -1500,7 +1500,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 			unsigned int end_offset;
 			pgoff_t end;
 
-			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 			filemap_invalidate_lock(mapping);
 
 			truncate_pagecache_range(inode,
@@ -1514,7 +1514,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 			if (ret) {
 				f2fs_unlock_op(sbi);
 				filemap_invalidate_unlock(mapping);
-				up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+				f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 				goto out;
 			}
 
@@ -1526,7 +1526,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 
 			f2fs_unlock_op(sbi);
 			filemap_invalidate_unlock(mapping);
-			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 			f2fs_balance_fs(sbi, dn.node_changed);
 
@@ -1600,7 +1600,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 
 	/* avoid gc operation during block exchange */
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(mapping);
 	truncate_pagecache(inode, offset);
 
@@ -1618,7 +1618,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 		f2fs_unlock_op(sbi);
 	}
 	filemap_invalidate_unlock(mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	/* write out all moved pages, if possible */
 	filemap_invalidate_lock(mapping);
@@ -1674,13 +1674,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 next_alloc:
 		if (has_not_enough_free_secs(sbi, 0,
 			GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
-			down_write(&sbi->gc_lock);
+			f2fs_down_write(&sbi->gc_lock);
 			err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
 			if (err && err != -ENODATA && err != -EAGAIN)
 				goto out_err;
 		}
 
-		down_write(&sbi->pin_sem);
+		f2fs_down_write(&sbi->pin_sem);
 
 		f2fs_lock_op(sbi);
 		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
@@ -1690,7 +1690,7 @@ next_alloc:
 		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
 		file_dont_truncate(inode);
 
-		up_write(&sbi->pin_sem);
+		f2fs_up_write(&sbi->pin_sem);
 
 		expanded += map.m_len;
 		sec_len -= map.m_len;
@@ -2020,7 +2020,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		goto out;
 
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	/*
 	 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
@@ -2031,7 +2031,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 			  inode->i_ino, get_dirty_pages(inode));
 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
 	if (ret) {
-		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		goto out;
 	}
 
@@ -2044,7 +2044,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	/* add inode in inmem_list first and set atomic_file */
 	set_inode_flag(inode, FI_ATOMIC_FILE);
 	clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	F2FS_I(inode)->inmem_task = current;
@@ -2351,7 +2351,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
 	if (err)
 		return err;
 
-	down_write(&sbi->sb_lock);
+	f2fs_down_write(&sbi->sb_lock);
 
 	if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
 		goto got_it;
@@ -2370,7 +2370,7 @@ got_it:
 									16))
 		err = -EFAULT;
 out_err:
-	up_write(&sbi->sb_lock);
+	f2fs_up_write(&sbi->sb_lock);
 	mnt_drop_write_file(filp);
 	return err;
 }
@@ -2447,12 +2447,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 		return ret;
 
 	if (!sync) {
-		if (!down_write_trylock(&sbi->gc_lock)) {
+		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
 			ret = -EBUSY;
 			goto out;
 		}
 	} else {
-		down_write(&sbi->gc_lock);
+		f2fs_down_write(&sbi->gc_lock);
 	}
 
 	ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
@@ -2483,12 +2483,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
 
 do_more:
 	if (!range->sync) {
-		if (!down_write_trylock(&sbi->gc_lock)) {
+		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
 			ret = -EBUSY;
 			goto out;
 		}
 	} else {
-		down_write(&sbi->gc_lock);
+		f2fs_down_write(&sbi->gc_lock);
 	}
 
 	ret = f2fs_gc(sbi, range->sync, true, false,
@@ -2820,10 +2820,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
 
 	f2fs_balance_fs(sbi, true);
 
-	down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
 	if (src != dst) {
 		ret = -EBUSY;
-		if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+		if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
 			goto out_src;
 	}
 
@@ -2841,9 +2841,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
 	f2fs_unlock_op(sbi);
 
 	if (src != dst)
-		up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
 out_src:
-	up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
 out_unlock:
 	if (src != dst)
 		inode_unlock(dst);
@@ -2938,7 +2938,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	end_segno = min(start_segno + range.segments, dev_end_segno);
 
 	while (start_segno < end_segno) {
-		if (!down_write_trylock(&sbi->gc_lock)) {
+		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
 			ret = -EBUSY;
 			goto out;
 		}
@@ -3215,9 +3215,9 @@ int f2fs_precache_extents(struct inode *inode)
 	while (map.m_lblk < end) {
 		map.m_len = end - map.m_lblk;
 
-		down_write(&fi->i_gc_rwsem[WRITE]);
+		f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
 		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
-		up_write(&fi->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
 		if (err)
 			return err;
 
@@ -3294,11 +3294,11 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
 	if (!vbuf)
 		return -ENOMEM;
 
-	down_read(&sbi->sb_lock);
+	f2fs_down_read(&sbi->sb_lock);
 	count = utf16s_to_utf8s(sbi->raw_super->volume_name,
 			ARRAY_SIZE(sbi->raw_super->volume_name),
 			UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
-	up_read(&sbi->sb_lock);
+	f2fs_up_read(&sbi->sb_lock);
 
 	if (copy_to_user((char __user *)arg, vbuf,
 				min(FSLABEL_MAX, count)))
@@ -3326,7 +3326,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
 	if (err)
 		goto out;
 
-	down_write(&sbi->sb_lock);
+	f2fs_down_write(&sbi->sb_lock);
 
 	memset(sbi->raw_super->volume_name, 0,
 			sizeof(sbi->raw_super->volume_name));
@@ -3336,7 +3336,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
 
 	err = f2fs_commit_super(sbi, false);
 
-	up_write(&sbi->sb_lock);
+	f2fs_up_write(&sbi->sb_lock);
 
 	mnt_drop_write_file(filp);
 out:
@@ -3462,7 +3462,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
 	if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
 		goto out;
 
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(inode->i_mapping);
 
 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3499,7 +3499,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
 	}
 
 	filemap_invalidate_unlock(inode->i_mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 out:
 	inode_unlock(inode);
 
@@ -3615,7 +3615,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 		goto unlock_inode;
 	}
 
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(inode->i_mapping);
 
 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3652,7 +3652,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 	}
 
 	filemap_invalidate_unlock(inode->i_mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
 	if (ret >= 0) {
 		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3770,7 +3770,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
 	if (ret)
 		goto err;
 
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	filemap_invalidate_lock(mapping);
 
 	ret = filemap_write_and_wait_range(mapping, range.start,
@@ -3859,7 +3859,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
 				prev_block, len, range.flags);
 out:
 	filemap_invalidate_unlock(mapping);
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 err:
 	inode_unlock(inode);
 	file_end_write(filp);
@@ -4291,12 +4291,12 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
 
 	if (iocb->ki_flags & IOCB_NOWAIT) {
-		if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
 			ret = -EAGAIN;
 			goto out;
 		}
 	} else {
-		down_read(&fi->i_gc_rwsem[READ]);
+		f2fs_down_read(&fi->i_gc_rwsem[READ]);
 	}
 
 	/*
@@ -4315,7 +4315,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		ret = iomap_dio_complete(dio);
 	}
 
-	up_read(&fi->i_gc_rwsem[READ]);
+	f2fs_up_read(&fi->i_gc_rwsem[READ]);
 
 	file_accessed(file);
 out:
@@ -4497,12 +4497,12 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 			goto out;
 		}
 
-		if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
 			ret = -EAGAIN;
 			goto out;
 		}
-		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
-			up_read(&fi->i_gc_rwsem[WRITE]);
+		if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
+			f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
 			ret = -EAGAIN;
 			goto out;
 		}
@@ -4511,9 +4511,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 		if (ret)
 			goto out;
 
-		down_read(&fi->i_gc_rwsem[WRITE]);
+		f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
 		if (do_opu)
-			down_read(&fi->i_gc_rwsem[READ]);
+			f2fs_down_read(&fi->i_gc_rwsem[READ]);
 	}
 	if (whint_mode == WHINT_MODE_OFF)
 		iocb->ki_hint = WRITE_LIFE_NOT_SET;
@@ -4542,8 +4542,8 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
 	if (whint_mode == WHINT_MODE_OFF)
 		iocb->ki_hint = hint;
 	if (do_opu)
-		up_read(&fi->i_gc_rwsem[READ]);
-	up_read(&fi->i_gc_rwsem[WRITE]);
+		f2fs_up_read(&fi->i_gc_rwsem[READ]);
+	f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
 
 	if (ret < 0)
 		goto out;
@@ -4644,12 +4644,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	/* Don't leave any preallocated blocks around past i_size. */
 	if (preallocated && i_size_read(inode) < target_size) {
-		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 		filemap_invalidate_lock(inode->i_mapping);
 		if (!f2fs_truncate(inode))
 			file_dont_truncate(inode);
 		filemap_invalidate_unlock(inode->i_mapping);
-		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	} else {
 		file_dont_truncate(inode);
 	}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ee308a8de4327..0a6b0a8ae97ee 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -105,21 +105,21 @@ static int gc_thread_func(void *data)
 			spin_unlock(&sbi->gc_urgent_high_lock);
 
 			wait_ms = gc_th->urgent_sleep_time;
-			down_write(&sbi->gc_lock);
+			f2fs_down_write(&sbi->gc_lock);
 			goto do_gc;
 		}
 
 		if (foreground) {
-			down_write(&sbi->gc_lock);
+			f2fs_down_write(&sbi->gc_lock);
 			goto do_gc;
-		} else if (!down_write_trylock(&sbi->gc_lock)) {
+		} else if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
 			stat_other_skip_bggc_count(sbi);
 			goto next;
 		}
 
 		if (!is_idle(sbi, GC_TIME)) {
 			increase_sleep_time(gc_th, &wait_ms);
-			up_write(&sbi->gc_lock);
+			f2fs_up_write(&sbi->gc_lock);
 			stat_io_skip_bggc_count(sbi);
 			goto next;
 		}
@@ -1230,7 +1230,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
 	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
 
 	if (lfs_mode)
-		down_write(&fio.sbi->io_order_lock);
+		f2fs_down_write(&fio.sbi->io_order_lock);
 
 	mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
 					fio.old_blkaddr, false);
@@ -1316,7 +1316,7 @@ recover_block:
 							true, true, true);
 up_out:
 	if (lfs_mode)
-		up_write(&fio.sbi->io_order_lock);
+		f2fs_up_write(&fio.sbi->io_order_lock);
 put_out:
 	f2fs_put_dnode(&dn);
 out:
@@ -1475,7 +1475,7 @@ next_step:
 					special_file(inode->i_mode))
 				continue;
 
-			if (!down_write_trylock(
+			if (!f2fs_down_write_trylock(
 				&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
 				iput(inode);
 				sbi->skipped_gc_rwsem++;
@@ -1488,7 +1488,7 @@ next_step:
 			if (f2fs_post_read_required(inode)) {
 				int err = ra_data_block(inode, start_bidx);
 
-				up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+				f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 				if (err) {
 					iput(inode);
 					continue;
@@ -1499,7 +1499,7 @@ next_step:
 
 			data_page = f2fs_get_read_data_page(inode,
 						start_bidx, REQ_RAHEAD, true);
-			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 			if (IS_ERR(data_page)) {
 				iput(inode);
 				continue;
@@ -1518,14 +1518,14 @@ next_step:
 			int err;
 
 			if (S_ISREG(inode->i_mode)) {
-				if (!down_write_trylock(&fi->i_gc_rwsem[READ])) {
+				if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) {
 					sbi->skipped_gc_rwsem++;
 					continue;
 				}
-				if (!down_write_trylock(
+				if (!f2fs_down_write_trylock(
 						&fi->i_gc_rwsem[WRITE])) {
 					sbi->skipped_gc_rwsem++;
-					up_write(&fi->i_gc_rwsem[READ]);
+					f2fs_up_write(&fi->i_gc_rwsem[READ]);
 					continue;
 				}
 				locked = true;
@@ -1548,8 +1548,8 @@ next_step:
 				submitted++;
 
 			if (locked) {
-				up_write(&fi->i_gc_rwsem[WRITE]);
-				up_write(&fi->i_gc_rwsem[READ]);
+				f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+				f2fs_up_write(&fi->i_gc_rwsem[READ]);
 			}
 
 			stat_inc_data_blk_count(sbi, 1, gc_type);
@@ -1807,7 +1807,7 @@ stop:
 				reserved_segments(sbi),
 				prefree_segments(sbi));
 
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 
 	put_gc_inode(&gc_list);
 
@@ -1936,7 +1936,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
 	long long block_count;
 	int segs = secs * sbi->segs_per_sec;
 
-	down_write(&sbi->sb_lock);
+	f2fs_down_write(&sbi->sb_lock);
 
 	section_count = le32_to_cpu(raw_sb->section_count);
 	segment_count = le32_to_cpu(raw_sb->segment_count);
@@ -1957,7 +1957,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
 						cpu_to_le32(dev_segs + segs);
 	}
 
-	up_write(&sbi->sb_lock);
+	f2fs_up_write(&sbi->sb_lock);
 }
 
 static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
@@ -2031,7 +2031,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
 	secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
 
 	/* stop other GC */
-	if (!down_write_trylock(&sbi->gc_lock))
+	if (!f2fs_down_write_trylock(&sbi->gc_lock))
 		return -EAGAIN;
 
 	/* stop CP to protect MAIN_SEC in free_segment_range */
@@ -2051,15 +2051,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
 
 out_unlock:
 	f2fs_unlock_op(sbi);
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 	if (err)
 		return err;
 
 	set_sbi_flag(sbi, SBI_IS_RESIZEFS);
 
 	freeze_super(sbi->sb);
-	down_write(&sbi->gc_lock);
-	down_write(&sbi->cp_global_sem);
+	f2fs_down_write(&sbi->gc_lock);
+	f2fs_down_write(&sbi->cp_global_sem);
 
 	spin_lock(&sbi->stat_lock);
 	if (shrunk_blocks + valid_user_blocks(sbi) +
@@ -2104,8 +2104,8 @@ recover_out:
 		spin_unlock(&sbi->stat_lock);
 	}
 out_err:
-	up_write(&sbi->cp_global_sem);
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->cp_global_sem);
+	f2fs_up_write(&sbi->gc_lock);
 	thaw_super(sbi->sb);
 	clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
 	return err;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 4b5cefa3f90c1..a578bf83b803b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -629,7 +629,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
 	}
 
 	if (inode) {
-		down_write(&F2FS_I(inode)->i_sem);
+		f2fs_down_write(&F2FS_I(inode)->i_sem);
 		page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
@@ -658,7 +658,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
 	f2fs_update_parent_metadata(dir, inode, 0);
 fail:
 	if (inode)
-		up_write(&F2FS_I(inode)->i_sem);
+		f2fs_up_write(&F2FS_I(inode)->i_sem);
 out:
 	f2fs_put_page(ipage, 1);
 	return err;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a728a0af9ce0c..0347c5780910a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -196,7 +196,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
 	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
 	int i, cold_count, hot_count;
 
-	down_read(&sbi->sb_lock);
+	f2fs_down_read(&sbi->sb_lock);
 
 	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
 	hot_count = sbi->raw_super->hot_ext_count;
@@ -206,7 +206,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
 			break;
 	}
 
-	up_read(&sbi->sb_lock);
+	f2fs_up_read(&sbi->sb_lock);
 
 	if (i == cold_count + hot_count)
 		return;
@@ -299,19 +299,19 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
 			(!ext_cnt && !noext_cnt))
 		return;
 
-	down_read(&sbi->sb_lock);
+	f2fs_down_read(&sbi->sb_lock);
 
 	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
 	hot_count = sbi->raw_super->hot_ext_count;
 
 	for (i = cold_count; i < cold_count + hot_count; i++) {
 		if (is_extension_exist(name, extlist[i], false)) {
-			up_read(&sbi->sb_lock);
+			f2fs_up_read(&sbi->sb_lock);
 			return;
 		}
 	}
 
-	up_read(&sbi->sb_lock);
+	f2fs_up_read(&sbi->sb_lock);
 
 	for (i = 0; i < noext_cnt; i++) {
 		if (is_extension_exist(name, noext[i], false)) {
@@ -1023,11 +1023,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		new_page = NULL;
 
 		new_inode->i_ctime = current_time(new_inode);
-		down_write(&F2FS_I(new_inode)->i_sem);
+		f2fs_down_write(&F2FS_I(new_inode)->i_sem);
 		if (old_dir_entry)
 			f2fs_i_links_write(new_inode, false);
 		f2fs_i_links_write(new_inode, false);
-		up_write(&F2FS_I(new_inode)->i_sem);
+		f2fs_up_write(&F2FS_I(new_inode)->i_sem);
 
 		if (!new_inode->i_nlink)
 			f2fs_add_orphan_inode(new_inode);
@@ -1048,13 +1048,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			f2fs_i_links_write(new_dir, true);
 	}
 
-	down_write(&F2FS_I(old_inode)->i_sem);
+	f2fs_down_write(&F2FS_I(old_inode)->i_sem);
 	if (!old_dir_entry || whiteout)
 		file_lost_pino(old_inode);
 	else
 		/* adjust dir's i_pino to pass fsck check */
 		f2fs_i_pino_write(old_inode, new_dir->i_ino);
-	up_write(&F2FS_I(old_inode)->i_sem);
+	f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
 	old_inode->i_ctime = current_time(old_inode);
 	f2fs_mark_inode_dirty_sync(old_inode, false);
@@ -1214,38 +1214,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	/* update directory entry info of old dir inode */
 	f2fs_set_link(old_dir, old_entry, old_page, new_inode);
 
-	down_write(&F2FS_I(old_inode)->i_sem);
+	f2fs_down_write(&F2FS_I(old_inode)->i_sem);
 	if (!old_dir_entry)
 		file_lost_pino(old_inode);
 	else
 		/* adjust dir's i_pino to pass fsck check */
 		f2fs_i_pino_write(old_inode, new_dir->i_ino);
-	up_write(&F2FS_I(old_inode)->i_sem);
+	f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
 	old_dir->i_ctime = current_time(old_dir);
 	if (old_nlink) {
-		down_write(&F2FS_I(old_dir)->i_sem);
+		f2fs_down_write(&F2FS_I(old_dir)->i_sem);
 		f2fs_i_links_write(old_dir, old_nlink > 0);
-		up_write(&F2FS_I(old_dir)->i_sem);
+		f2fs_up_write(&F2FS_I(old_dir)->i_sem);
 	}
 	f2fs_mark_inode_dirty_sync(old_dir, false);
 
 	/* update directory entry info of new dir inode */
 	f2fs_set_link(new_dir, new_entry, new_page, old_inode);
 
-	down_write(&F2FS_I(new_inode)->i_sem);
+	f2fs_down_write(&F2FS_I(new_inode)->i_sem);
 	if (!new_dir_entry)
 		file_lost_pino(new_inode);
 	else
 		/* adjust dir's i_pino to pass fsck check */
 		f2fs_i_pino_write(new_inode, old_dir->i_ino);
-	up_write(&F2FS_I(new_inode)->i_sem);
+	f2fs_up_write(&F2FS_I(new_inode)->i_sem);
 
 	new_dir->i_ctime = current_time(new_dir);
 	if (new_nlink) {
-		down_write(&F2FS_I(new_dir)->i_sem);
+		f2fs_down_write(&F2FS_I(new_dir)->i_sem);
 		f2fs_i_links_write(new_dir, new_nlink > 0);
-		up_write(&F2FS_I(new_dir)->i_sem);
+		f2fs_up_write(&F2FS_I(new_dir)->i_sem);
 	}
 	f2fs_mark_inode_dirty_sync(new_dir, false);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 50b2874e758c9..93512f8859d5d 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
 	struct nat_entry *e;
 	bool need = false;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
 				!get_nat_flag(e, HAS_FSYNCED_INODE))
 			need = true;
 	}
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 	return need;
 }
 
@@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 	struct nat_entry *e;
 	bool is_cp = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = false;
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
@@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
 	struct nat_entry *e;
 	bool need_update = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
 			(get_nat_flag(e, IS_CHECKPOINTED) ||
 			 get_nat_flag(e, HAS_FSYNCED_INODE)))
 		need_update = false;
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 	return need_update;
 }
 
@@ -431,14 +431,14 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
 	struct nat_entry *new, *e;
 
 	/* Let's mitigate lock contention of nat_tree_lock during checkpoint */
-	if (rwsem_is_locked(&sbi->cp_global_sem))
+	if (f2fs_rwsem_is_locked(&sbi->cp_global_sem))
 		return;
 
 	new = __alloc_nat_entry(sbi, nid, false);
 	if (!new)
 		return;
 
-	down_write(&nm_i->nat_tree_lock);
+	f2fs_down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (!e)
 		e = __init_nat_entry(nm_i, new, ne, false);
@@ -447,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
 				nat_get_blkaddr(e) !=
 					le32_to_cpu(ne->block_addr) ||
 				nat_get_version(e) != ne->version);
-	up_write(&nm_i->nat_tree_lock);
+	f2fs_up_write(&nm_i->nat_tree_lock);
 	if (e != new)
 		__free_nat_entry(new);
 }
@@ -459,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 	struct nat_entry *e;
 	struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
 
-	down_write(&nm_i->nat_tree_lock);
+	f2fs_down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
 		e = __init_nat_entry(nm_i, new, NULL, true);
@@ -508,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 			set_nat_flag(e, HAS_FSYNCED_INODE, true);
 		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
 	}
-	up_write(&nm_i->nat_tree_lock);
+	f2fs_up_write(&nm_i->nat_tree_lock);
 }
 
 int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -516,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	int nr = nr_shrink;
 
-	if (!down_write_trylock(&nm_i->nat_tree_lock))
+	if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock))
 		return 0;
 
 	spin_lock(&nm_i->nat_list_lock);
@@ -538,7 +538,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 	}
 	spin_unlock(&nm_i->nat_list_lock);
 
-	up_write(&nm_i->nat_tree_lock);
+	f2fs_up_write(&nm_i->nat_tree_lock);
 	return nr - nr_shrink;
 }
 
@@ -560,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
 	ni->nid = nid;
 retry:
 	/* Check nat cache */
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		ni->ino = nat_get_ino(e);
 		ni->blk_addr = nat_get_blkaddr(e);
 		ni->version = nat_get_version(e);
-		up_read(&nm_i->nat_tree_lock);
+		f2fs_up_read(&nm_i->nat_tree_lock);
 		return 0;
 	}
 
@@ -576,11 +576,11 @@ retry:
 	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
 	 * while not bothering checkpoint.
 	 */
-	if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
+	if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
 		down_read(&curseg->journal_rwsem);
-	} else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+	} else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) ||
 				!down_read_trylock(&curseg->journal_rwsem)) {
-		up_read(&nm_i->nat_tree_lock);
+		f2fs_up_read(&nm_i->nat_tree_lock);
 		goto retry;
 	}
 
@@ -589,15 +589,15 @@ retry:
 		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
-	up_read(&curseg->journal_rwsem);
+        up_read(&curseg->journal_rwsem);
 	if (i >= 0) {
-		up_read(&nm_i->nat_tree_lock);
+		f2fs_up_read(&nm_i->nat_tree_lock);
 		goto cache;
 	}
 
 	/* Fill node_info from nat page */
 	index = current_nat_addr(sbi, nid);
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 
 	page = f2fs_get_meta_page(sbi, index);
 	if (IS_ERR(page))
@@ -1609,17 +1609,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 		goto redirty_out;
 
 	if (wbc->for_reclaim) {
-		if (!down_read_trylock(&sbi->node_write))
+		if (!f2fs_down_read_trylock(&sbi->node_write))
 			goto redirty_out;
 	} else {
-		down_read(&sbi->node_write);
+		f2fs_down_read(&sbi->node_write);
 	}
 
 	/* This page is already truncated */
 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
 		ClearPageUptodate(page);
 		dec_page_count(sbi, F2FS_DIRTY_NODES);
-		up_read(&sbi->node_write);
+		f2fs_up_read(&sbi->node_write);
 		unlock_page(page);
 		return 0;
 	}
@@ -1627,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 	if (__is_valid_data_blkaddr(ni.blk_addr) &&
 		!f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
 					DATA_GENERIC_ENHANCE)) {
-		up_read(&sbi->node_write);
+		f2fs_up_read(&sbi->node_write);
 		goto redirty_out;
 	}
 
@@ -1648,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 	f2fs_do_write_node_page(nid, &fio);
 	set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
 	dec_page_count(sbi, F2FS_DIRTY_NODES);
-	up_read(&sbi->node_write);
+	f2fs_up_read(&sbi->node_write);
 
 	if (wbc->for_reclaim) {
 		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
@@ -2225,14 +2225,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
 	unsigned int i;
 	bool ret = true;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
 			ret = false;
 			break;
 		}
 	}
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 
 	return ret;
 }
@@ -2415,7 +2415,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	unsigned int i, idx;
 	nid_t nid;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		if (!test_bit_le(i, nm_i->nat_block_bitmap))
@@ -2438,7 +2438,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 out:
 	scan_curseg_cache(sbi);
 
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 }
 
 static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
@@ -2473,7 +2473,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
 	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 
 	while (1) {
 		if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
@@ -2488,7 +2488,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
 			}
 
 			if (ret) {
-				up_read(&nm_i->nat_tree_lock);
+				f2fs_up_read(&nm_i->nat_tree_lock);
 				f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
 				return ret;
 			}
@@ -2508,7 +2508,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
 	/* find free nids from current sum_pages */
 	scan_curseg_cache(sbi);
 
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 
 	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
 					nm_i->ra_nid_pages, META_NAT, false);
@@ -2953,7 +2953,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs;
 
-	down_read(&nm_i->nat_tree_lock);
+	f2fs_down_read(&nm_i->nat_tree_lock);
 
 	for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
 		unsigned int valid = 0, nid_ofs = 0;
@@ -2973,7 +2973,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
 		__update_nat_bits(nm_i, nat_ofs, valid);
 	}
 
-	up_read(&nm_i->nat_tree_lock);
+	f2fs_up_read(&nm_i->nat_tree_lock);
 }
 
 static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -3071,15 +3071,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * nat_cnt[DIRTY_NAT].
 	 */
 	if (cpc->reason & CP_UMOUNT) {
-		down_write(&nm_i->nat_tree_lock);
+		f2fs_down_write(&nm_i->nat_tree_lock);
 		remove_nats_in_journal(sbi);
-		up_write(&nm_i->nat_tree_lock);
+		f2fs_up_write(&nm_i->nat_tree_lock);
 	}
 
 	if (!nm_i->nat_cnt[DIRTY_NAT])
 		return 0;
 
-	down_write(&nm_i->nat_tree_lock);
+	f2fs_down_write(&nm_i->nat_tree_lock);
 
 	/*
 	 * if there are no enough space in journal to store dirty nat
@@ -3108,7 +3108,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			break;
 	}
 
-	up_write(&nm_i->nat_tree_lock);
+	f2fs_up_write(&nm_i->nat_tree_lock);
 	/* Allow dirty nats by node block allocation in write_begin */
 
 	return err;
@@ -3228,7 +3228,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->nid_list_lock);
-	init_rwsem(&nm_i->nat_tree_lock);
+	init_f2fs_rwsem(&nm_i->nat_tree_lock);
 
 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -3334,7 +3334,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
 	spin_unlock(&nm_i->nid_list_lock);
 
 	/* destroy nat cache */
-	down_write(&nm_i->nat_tree_lock);
+	f2fs_down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_cache(nm_i,
 					nid, NATVEC_SIZE, natvec))) {
 		unsigned idx;
@@ -3364,7 +3364,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
 			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
 		}
 	}
-	up_write(&nm_i->nat_tree_lock);
+	f2fs_up_write(&nm_i->nat_tree_lock);
 
 	kvfree(nm_i->nat_block_bitmap);
 	if (nm_i->free_nid_bitmap) {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9683c80ff8c24..10d152cfa58d1 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -796,7 +796,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 	INIT_LIST_HEAD(&dir_list);
 
 	/* prevent checkpoint */
-	down_write(&sbi->cp_global_sem);
+	f2fs_down_write(&sbi->cp_global_sem);
 
 	/* step #1: find fsynced inode numbers */
 	err = find_fsync_dnodes(sbi, &inode_list, check_only);
@@ -845,7 +845,7 @@ skip:
 	if (!err)
 		clear_sbi_flag(sbi, SBI_POR_DOING);
 
-	up_write(&sbi->cp_global_sem);
+	f2fs_up_write(&sbi->cp_global_sem);
 
 	/* let's drop all the directory inodes for clean checkpoint */
 	destroy_fsync_dnodes(&dir_list, err);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1dabc8244083d..216538b573310 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -471,7 +471,7 @@ int f2fs_commit_inmem_pages(struct inode *inode)
 
 	f2fs_balance_fs(sbi, true);
 
-	down_write(&fi->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
 
 	f2fs_lock_op(sbi);
 	set_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -483,7 +483,7 @@ int f2fs_commit_inmem_pages(struct inode *inode)
 	clear_inode_flag(inode, FI_ATOMIC_COMMIT);
 
 	f2fs_unlock_op(sbi);
-	up_write(&fi->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
 
 	return err;
 }
@@ -521,7 +521,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 			io_schedule();
 			finish_wait(&sbi->gc_thread->fggc_wq, &wait);
 		} else {
-			down_write(&sbi->gc_lock);
+			f2fs_down_write(&sbi->gc_lock);
 			f2fs_gc(sbi, false, false, false, NULL_SEGNO);
 		}
 	}
@@ -529,7 +529,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 
 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
 {
-	int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
+	int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
 	unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
 	unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
 	unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -570,7 +570,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 
 	/* there is background inflight IO or foreground operation recently */
 	if (is_inflight_io(sbi, REQ_TIME) ||
-		(!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem)))
+		(!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
 		return;
 
 	/* exceed periodical checkpoint timeout threshold */
@@ -2821,7 +2821,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
 	if (!sbi->am.atgc_enabled)
 		return;
 
-	down_read(&SM_I(sbi)->curseg_lock);
+	f2fs_down_read(&SM_I(sbi)->curseg_lock);
 
 	mutex_lock(&curseg->curseg_mutex);
 	down_write(&SIT_I(sbi)->sentry_lock);
@@ -2831,7 +2831,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
 	up_write(&SIT_I(sbi)->sentry_lock);
 	mutex_unlock(&curseg->curseg_mutex);
 
-	up_read(&SM_I(sbi)->curseg_lock);
+	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 
 }
 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
@@ -2982,7 +2982,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	unsigned int segno;
 
-	down_read(&SM_I(sbi)->curseg_lock);
+	f2fs_down_read(&SM_I(sbi)->curseg_lock);
 	mutex_lock(&curseg->curseg_mutex);
 	down_write(&SIT_I(sbi)->sentry_lock);
 
@@ -3006,7 +3006,7 @@ unlock:
 			    type, segno, curseg->segno);
 
 	mutex_unlock(&curseg->curseg_mutex);
-	up_read(&SM_I(sbi)->curseg_lock);
+	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
 
 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
@@ -3038,23 +3038,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi,
 
 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
 {
-	down_read(&SM_I(sbi)->curseg_lock);
+	f2fs_down_read(&SM_I(sbi)->curseg_lock);
 	down_write(&SIT_I(sbi)->sentry_lock);
 	__allocate_new_section(sbi, type, force);
 	up_write(&SIT_I(sbi)->sentry_lock);
-	up_read(&SM_I(sbi)->curseg_lock);
+	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
 {
 	int i;
 
-	down_read(&SM_I(sbi)->curseg_lock);
+	f2fs_down_read(&SM_I(sbi)->curseg_lock);
 	down_write(&SIT_I(sbi)->sentry_lock);
 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
 		__allocate_new_segment(sbi, i, false, false);
 	up_write(&SIT_I(sbi)->sentry_lock);
-	up_read(&SM_I(sbi)->curseg_lock);
+	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
 
 static const struct segment_allocation default_salloc_ops = {
@@ -3192,9 +3192,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 	if (sbi->discard_blks == 0)
 		goto out;
 
-	down_write(&sbi->gc_lock);
+	f2fs_down_write(&sbi->gc_lock);
 	err = f2fs_write_checkpoint(sbi, &cpc);
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 	if (err)
 		goto out;
 
@@ -3431,7 +3431,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
 	struct seg_entry *se = NULL;
 
-	down_read(&SM_I(sbi)->curseg_lock);
+	f2fs_down_read(&SM_I(sbi)->curseg_lock);
 
 	mutex_lock(&curseg->curseg_mutex);
 	down_write(&sit_i->sentry_lock);
@@ -3514,7 +3514,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	mutex_unlock(&curseg->curseg_mutex);
 
-	up_read(&SM_I(sbi)->curseg_lock);
+	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
@@ -3550,7 +3550,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 	bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
 
 	if (keep_order)
-		down_read(&fio->sbi->io_order_lock);
+		f2fs_down_read(&fio->sbi->io_order_lock);
 reallocate:
 	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
 			&fio->new_blkaddr, sum, type, fio);
@@ -3570,7 +3570,7 @@ reallocate:
 	f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
 
 	if (keep_order)
-		up_read(&fio->sbi->io_order_lock);
+		f2fs_up_read(&fio->sbi->io_order_lock);
 }
 
 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -3705,7 +3705,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	se = get_seg_entry(sbi, segno);
 	type = se->type;
 
-	down_write(&SM_I(sbi)->curseg_lock);
+	f2fs_down_write(&SM_I(sbi)->curseg_lock);
 
 	if (!recover_curseg) {
 		/* for recovery flow */
@@ -3774,7 +3774,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 
 	up_write(&sit_i->sentry_lock);
 	mutex_unlock(&curseg->curseg_mutex);
-	up_write(&SM_I(sbi)->curseg_lock);
+	f2fs_up_write(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -5258,7 +5258,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
-	init_rwsem(&sm_info->curseg_lock);
+	init_f2fs_rwsem(&sm_info->curseg_lock);
 
 	if (!f2fs_readonly(sbi->sb)) {
 		err = f2fs_create_flush_cmd_control(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 76e6a3df9abac..9af6c20532ece 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1355,16 +1355,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	/* Initialize f2fs-specific inode info */
 	atomic_set(&fi->dirty_pages, 0);
 	atomic_set(&fi->i_compr_blocks, 0);
-	init_rwsem(&fi->i_sem);
+	init_f2fs_rwsem(&fi->i_sem);
 	spin_lock_init(&fi->i_size_lock);
 	INIT_LIST_HEAD(&fi->dirty_list);
 	INIT_LIST_HEAD(&fi->gdirty_list);
 	INIT_LIST_HEAD(&fi->inmem_ilist);
 	INIT_LIST_HEAD(&fi->inmem_pages);
 	mutex_init(&fi->inmem_lock);
-	init_rwsem(&fi->i_gc_rwsem[READ]);
-	init_rwsem(&fi->i_gc_rwsem[WRITE]);
-	init_rwsem(&fi->i_xattr_sem);
+	init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
+	init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
+	init_f2fs_rwsem(&fi->i_xattr_sem);
 
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
@@ -2088,7 +2088,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 	f2fs_update_time(sbi, DISABLE_TIME);
 
 	while (!f2fs_time_over(sbi, DISABLE_TIME)) {
-		down_write(&sbi->gc_lock);
+		f2fs_down_write(&sbi->gc_lock);
 		err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
 		if (err == -ENODATA) {
 			err = 0;
@@ -2110,7 +2110,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 		goto restore_flag;
 	}
 
-	down_write(&sbi->gc_lock);
+	f2fs_down_write(&sbi->gc_lock);
 	cpc.reason = CP_PAUSE;
 	set_sbi_flag(sbi, SBI_CP_DISABLED);
 	err = f2fs_write_checkpoint(sbi, &cpc);
@@ -2122,7 +2122,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 	spin_unlock(&sbi->stat_lock);
 
 out_unlock:
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 restore_flag:
 	sbi->sb->s_flags = s_flags;	/* Restore SB_RDONLY status */
 	return err;
@@ -2142,12 +2142,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
 	if (unlikely(retry < 0))
 		f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
 
-	down_write(&sbi->gc_lock);
+	f2fs_down_write(&sbi->gc_lock);
 	f2fs_dirty_to_prefree(sbi);
 
 	clear_sbi_flag(sbi, SBI_CP_DISABLED);
 	set_sbi_flag(sbi, SBI_IS_DIRTY);
-	up_write(&sbi->gc_lock);
+	f2fs_up_write(&sbi->gc_lock);
 
 	f2fs_sync_fs(sbi->sb, 1);
 }
@@ -2707,18 +2707,18 @@ int f2fs_quota_sync(struct super_block *sb, int type)
 		/*
 		 * do_quotactl
 		 *  f2fs_quota_sync
-		 *  down_read(quota_sem)
+		 *  f2fs_down_read(quota_sem)
 		 *  dquot_writeback_dquots()
 		 *  f2fs_dquot_commit
 		 *			      block_operation
-		 *			      down_read(quota_sem)
+		 *			      f2fs_down_read(quota_sem)
 		 */
 		f2fs_lock_op(sbi);
-		down_read(&sbi->quota_sem);
+		f2fs_down_read(&sbi->quota_sem);
 
 		ret = f2fs_quota_sync_file(sbi, cnt);
 
-		up_read(&sbi->quota_sem);
+		f2fs_up_read(&sbi->quota_sem);
 		f2fs_unlock_op(sbi);
 
 		inode_unlock(dqopt->files[cnt]);
@@ -2843,11 +2843,11 @@ static int f2fs_dquot_commit(struct dquot *dquot)
 	struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
 	int ret;
 
-	down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING);
+	f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING);
 	ret = dquot_commit(dquot);
 	if (ret < 0)
 		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
-	up_read(&sbi->quota_sem);
+	f2fs_up_read(&sbi->quota_sem);
 	return ret;
 }
 
@@ -2856,11 +2856,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot)
 	struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
 	int ret;
 
-	down_read(&sbi->quota_sem);
+	f2fs_down_read(&sbi->quota_sem);
 	ret = dquot_acquire(dquot);
 	if (ret < 0)
 		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
-	up_read(&sbi->quota_sem);
+	f2fs_up_read(&sbi->quota_sem);
 	return ret;
 }
 
@@ -3601,14 +3601,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
-	init_rwsem(&sbi->io_order_lock);
+	init_f2fs_rwsem(&sbi->io_order_lock);
 	spin_lock_init(&sbi->cp_lock);
 
 	sbi->dirty_device = 0;
 	spin_lock_init(&sbi->dev_lock);
 
-	init_rwsem(&sbi->sb_lock);
-	init_rwsem(&sbi->pin_sem);
+	init_f2fs_rwsem(&sbi->sb_lock);
+	init_f2fs_rwsem(&sbi->pin_sem);
 }
 
 static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -4067,11 +4067,11 @@ try_onemore:
 
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
-	init_rwsem(&sbi->gc_lock);
+	init_f2fs_rwsem(&sbi->gc_lock);
 	mutex_init(&sbi->writepages);
-	init_rwsem(&sbi->cp_global_sem);
-	init_rwsem(&sbi->node_write);
-	init_rwsem(&sbi->node_change);
+	init_f2fs_rwsem(&sbi->cp_global_sem);
+	init_f2fs_rwsem(&sbi->node_write);
+	init_f2fs_rwsem(&sbi->node_change);
 
 	/* disallow all the data/node/meta page writes */
 	set_sbi_flag(sbi, SBI_POR_DOING);
@@ -4092,18 +4092,18 @@ try_onemore:
 		}
 
 		for (j = HOT; j < n; j++) {
-			init_rwsem(&sbi->write_io[i][j].io_rwsem);
+			init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
 			sbi->write_io[i][j].sbi = sbi;
 			sbi->write_io[i][j].bio = NULL;
 			spin_lock_init(&sbi->write_io[i][j].io_lock);
 			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
-			init_rwsem(&sbi->write_io[i][j].bio_list_lock);
+			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 		}
 	}
 
-	init_rwsem(&sbi->cp_rwsem);
-	init_rwsem(&sbi->quota_sem);
+	init_f2fs_rwsem(&sbi->cp_rwsem);
+	init_f2fs_rwsem(&sbi->quota_sem);
 	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);
 
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index df406c16b2ebd..ce70e798d0d45 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -363,7 +363,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 		if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN)
 			return -EINVAL;
 
-		down_write(&sbi->sb_lock);
+		f2fs_down_write(&sbi->sb_lock);
 
 		ret = f2fs_update_extension_list(sbi, name, hot, set);
 		if (ret)
@@ -373,7 +373,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 		if (ret)
 			f2fs_update_extension_list(sbi, name, hot, !set);
 out:
-		up_write(&sbi->sb_lock);
+		f2fs_up_write(&sbi->sb_lock);
 		return ret ? ret : count;
 	}
 
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index fe5acdccaae19..3d793202cc9fe 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -208,7 +208,7 @@ cleanup:
 	 * from re-instantiating cached pages we are truncating (since unlike
 	 * normal file accesses, garbage collection isn't limited by i_size).
 	 */
-	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	truncate_inode_pages(inode->i_mapping, inode->i_size);
 	err2 = f2fs_truncate(inode);
 	if (err2) {
@@ -216,7 +216,7 @@ cleanup:
 			 err2);
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 	}
-	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 	clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
 	return err ?: err2;
 }
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8e5cd9c916ff1..c76c15086e5f5 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -525,10 +525,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 	if (len > F2FS_NAME_LEN)
 		return -ERANGE;
 
-	down_read(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
 	error = lookup_all_xattrs(inode, ipage, index, len, name,
 				&entry, &base_addr, &base_size, &is_inline);
-	up_read(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
 	if (error)
 		return error;
 
@@ -562,9 +562,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 	int error;
 	size_t rest = buffer_size;
 
-	down_read(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
 	error = read_all_xattrs(inode, NULL, &base_addr);
-	up_read(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
 	if (error)
 		return error;
 
@@ -786,9 +786,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
 	f2fs_balance_fs(sbi, true);
 
 	f2fs_lock_op(sbi);
-	down_write(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_down_write(&F2FS_I(inode)->i_xattr_sem);
 	err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
-	up_write(&F2FS_I(inode)->i_xattr_sem);
+	f2fs_up_write(&F2FS_I(inode)->i_xattr_sem);
 	f2fs_unlock_op(sbi);
 
 	f2fs_update_time(sbi, REQ_TIME);
-- 
GitLab


From 7d19e3dab0002e527052b0aaf986e8c32e5537bf Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Tue, 18 Jan 2022 11:48:02 +0800
Subject: [PATCH 0026/1586] f2fs: fix to enable ATGC correctly via gc_idle
 sysfs interface

It needs to assign sbi->gc_mode with GC_IDLE_AT rather than GC_AT when
user tries to enable ATGC via gc_idle sysfs interface, fix it.

Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection")
Cc: Zhipeng Tan <tanzhipeng@hust.edu.cn>
Signed-off-by: Jicheng Shao <shaojicheng@hust.edu.cn>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index ce70e798d0d45..2bccdaedfb000 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -481,7 +481,7 @@ out:
 		} else if (t == GC_IDLE_AT) {
 			if (!sbi->am.atgc_enabled)
 				return -EINVAL;
-			sbi->gc_mode = GC_AT;
+			sbi->gc_mode = GC_IDLE_AT;
 		} else {
 			sbi->gc_mode = GC_NORMAL;
 		}
-- 
GitLab


From f62ca4e2a863033d9b3b5a00a0d897557c9da6c5 Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Tue, 18 Jan 2022 16:42:38 +0800
Subject: [PATCH 0027/1586] spi: Add spi driver for Sunplus SP7021

Add spi driver for Sunplus SP7021.

Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/37998e515d561e762ee30d0ac4fca25a948e0c5c.1642494310.git.lhjeff911@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS                      |   6 +
 drivers/spi/Kconfig              |  11 +
 drivers/spi/Makefile             |   1 +
 drivers/spi/spi-sunplus-sp7021.c | 602 +++++++++++++++++++++++++++++++
 4 files changed, 620 insertions(+)
 create mode 100644 drivers/spi/spi-sunplus-sp7021.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..a517966bc7e88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18491,6 +18491,12 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
 F:	drivers/rtc/rtc-sunplus.c
 
+SUNPLUS SPI CONTROLLER INTERFACE DRIVER
+M:	Li-hao Kuo <lhjeff911@gmail.com>
+L:	linux-spi@vger.kernel.org
+S:	Maintained
+F:	drivers/spi/spi-sunplus-sp7021.c
+
 SUPERH
 M:	Yoshinori Sato <ysato@users.sourceforge.jp>
 M:	Rich Felker <dalias@libc.org>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b2a8821971e1d..203f4ec321190 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -866,6 +866,17 @@ config SPI_SUN6I
 	help
 	  This enables using the SPI controller on the Allwinner A31 SoCs.
 
+config SPI_SUNPLUS_SP7021
+	tristate "Sunplus SP7021 SPI controller"
+	depends on SOC_SP7021 || COMPILE_TEST
+	help
+	  This enables Sunplus SP7021 SPI controller driver on the SP7021 SoCs.
+	  This driver can also be built as a module. If so, the module will be
+	  called as spi-sunplus-sp7021.
+
+	  If you have a  Sunplus SP7021 platform say Y here.
+	  If unsure, say N.
+
 config SPI_SYNQUACER
 	tristate "Socionext's SynQuacer HighSpeed SPI controller"
 	depends on ARCH_SYNQUACER || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index dd7393a6046fa..b455eafd53677 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_SPI_STM32_QSPI) 		+= spi-stm32-qspi.o
 obj-$(CONFIG_SPI_ST_SSC4)		+= spi-st-ssc4.o
 obj-$(CONFIG_SPI_SUN4I)			+= spi-sun4i.o
 obj-$(CONFIG_SPI_SUN6I)			+= spi-sun6i.o
+obj-$(CONFIG_SPI_SUNPLUS_SP7021)	+= spi-sunplus-sp7021.o
 obj-$(CONFIG_SPI_SYNQUACER)		+= spi-synquacer.o
 obj-$(CONFIG_SPI_TEGRA210_QUAD)		+= spi-tegra210-quad.o
 obj-$(CONFIG_SPI_TEGRA114)		+= spi-tegra114.o
diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
new file mode 100644
index 0000000000000..627b9c3024e96
--- /dev/null
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2021 Sunplus Inc.
+// Author: Li-hao Kuo <lhjeff911@gmail.com>
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/spi/spi.h>
+
+#define SP7021_DATA_RDY_REG		0x0044
+#define SP7021_SLAVE_DMA_CTRL_REG	0x0048
+#define SP7021_SLAVE_DMA_LENGTH_REG	0x004c
+#define SP7021_SLAVE_DMA_ADDR_REG	0x004c
+
+#define SP7021_SLAVE_DATA_RDY		BIT(0)
+#define SP7021_SLAVE_SW_RST		BIT(1)
+#define SP7021_SLA_DMA_W_INT		BIT(8)
+#define SP7021_SLAVE_CLR_INT		BIT(8)
+#define SP7021_SLAVE_DMA_EN		BIT(0)
+#define SP7021_SLAVE_DMA_RW		BIT(6)
+#define SP7021_SLAVE_DMA_CMD		GENMASK(3, 2)
+
+#define SP7021_FIFO_REG			0x0034
+#define SP7021_SPI_STATUS_REG		0x0038
+#define SP7021_SPI_CONFIG_REG		0x003c
+#define SP7021_INT_BUSY_REG		0x004c
+#define SP7021_DMA_CTRL_REG		0x0050
+
+#define SP7021_SPI_START_FD		BIT(0)
+#define SP7021_FD_SW_RST		BIT(1)
+#define SP7021_TX_EMP_FLAG		BIT(2)
+#define SP7021_RX_EMP_FLAG		BIT(4)
+#define SP7021_RX_FULL_FLAG		BIT(5)
+#define SP7021_FINISH_FLAG		BIT(6)
+
+#define SP7021_TX_CNT_MASK		GENMASK(11, 8)
+#define SP7021_RX_CNT_MASK		GENMASK(15, 12)
+#define SP7021_TX_LEN_MASK		GENMASK(23, 16)
+#define SP7021_GET_LEN_MASK		GENMASK(31, 24)
+#define SP7021_SET_TX_LEN		GENMASK(23, 16)
+#define SP7021_SET_XFER_LEN		GENMASK(31, 24)
+
+#define SP7021_CPOL_FD			BIT(0)
+#define SP7021_CPHA_R			BIT(1)
+#define SP7021_CPHA_W			BIT(2)
+#define SP7021_LSB_SEL			BIT(4)
+#define SP7021_CS_POR			BIT(5)
+#define SP7021_FD_SEL			BIT(6)
+
+#define SP7021_RX_UNIT			GENMASK(8, 7)
+#define SP7021_TX_UNIT			GENMASK(10, 9)
+#define SP7021_TX_EMP_FLAG_MASK		BIT(11)
+#define SP7021_RX_FULL_FLAG_MASK	BIT(14)
+#define SP7021_FINISH_FLAG_MASK		BIT(15)
+#define SP7021_CLEAN_RW_BYTE		GENMASK(10, 7)
+#define SP7021_CLEAN_FLUG_MASK		GENMASK(15, 11)
+#define SP7021_CLK_MASK			GENMASK(31, 16)
+
+#define SP7021_INT_BYPASS		BIT(3)
+#define SP7021_CLR_MASTER_INT		BIT(6)
+
+#define SP7021_SPI_DATA_SIZE		(255)
+#define SP7021_FIFO_DATA_LEN		(16)
+
+enum SP_SPI_MODE {
+	SP7021_SLAVE_READ = 0,
+	SP7021_SLAVE_WRITE = 1,
+	SP7021_SPI_IDLE = 2,
+};
+
+enum {
+	SP7021_MASTER_MODE = 0,
+	SP7021_SLAVE_MODE = 1,
+};
+
+struct sp7021_spi_ctlr {
+	struct device *dev;
+	struct spi_controller *ctlr;
+	void __iomem *m_base;
+	void __iomem *s_base;
+	u32 xfer_conf;
+	int mode;
+	int m_irq;
+	int s_irq;
+	struct clk *spi_clk;
+	struct reset_control *rstc;
+	// irq spin lock
+	spinlock_t lock;
+	// data xfer lock
+	struct mutex buf_lock;
+	struct completion isr_done;
+	struct completion slave_isr;
+	unsigned int  rx_cur_len;
+	unsigned int  tx_cur_len;
+	unsigned int  data_unit;
+	const u8 *tx_buf;
+	u8 *rx_buf;
+};
+
+static irqreturn_t sp7021_spi_slave_irq(int irq, void *dev)
+{
+	struct sp7021_spi_ctlr *pspim = dev;
+	unsigned int data_status;
+
+	data_status = readl(pspim->s_base + SP7021_DATA_RDY_REG);
+	writel(data_status | SP7021_SLAVE_CLR_INT, pspim->s_base + SP7021_DATA_RDY_REG);
+	complete(&pspim->slave_isr);
+	return IRQ_HANDLED;
+}
+
+static int sp7021_spi_slave_abort(struct spi_controller *ctlr)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	complete(&pspim->slave_isr);
+	complete(&pspim->isr_done);
+	return 0;
+}
+
+int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
+
+	reinit_completion(&pspim->slave_isr);
+	writel(SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3),
+	       pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+	writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
+	writel(xfer->tx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
+	writel(readl(pspim->s_base + SP7021_DATA_RDY_REG) | SP7021_SLAVE_DATA_RDY,
+	       pspim->s_base + SP7021_DATA_RDY_REG);
+	if (wait_for_completion_interruptible(&pspim->isr_done)) {
+		dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__);
+		return -EINTR;
+	}
+	return 0;
+}
+
+int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
+	int ret = 0;
+
+	reinit_completion(&pspim->isr_done);
+	writel(SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3),
+	       pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+	writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
+	writel(xfer->rx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
+	if (wait_for_completion_interruptible(&pspim->isr_done)) {
+		dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__);
+		return -EINTR;
+	}
+	writel(SP7021_SLAVE_SW_RST, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+	return ret;
+}
+
+void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		pspim->rx_buf[pspim->rx_cur_len] =
+			readl(pspim->m_base + SP7021_FIFO_REG);
+		pspim->rx_cur_len++;
+	}
+}
+
+void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		writel(pspim->tx_buf[pspim->tx_cur_len],
+		       pspim->m_base + SP7021_FIFO_REG);
+		pspim->tx_cur_len++;
+	}
+}
+
+static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
+{
+	struct sp7021_spi_ctlr *pspim = dev;
+	unsigned int tx_cnt, total_len;
+	unsigned int tx_len, rx_cnt;
+	unsigned int fd_status;
+	unsigned long flags;
+	bool isrdone = false;
+	u32 value;
+
+	fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+	tx_cnt = FIELD_GET(SP7021_TX_CNT_MASK, fd_status);
+	tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status);
+	total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+
+	if ((fd_status & SP7021_TX_EMP_FLAG) && (fd_status & SP7021_RX_EMP_FLAG) && total_len == 0)
+		return IRQ_NONE;
+
+	if (tx_len == 0 && total_len == 0)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&pspim->lock, flags);
+
+	rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
+	if (fd_status & SP7021_RX_FULL_FLAG)
+		rx_cnt = pspim->data_unit;
+
+	tx_cnt = min(tx_len - pspim->tx_cur_len, pspim->data_unit - tx_cnt);
+	dev_dbg(pspim->dev, "fd_st=0x%x rx_c:%d tx_c:%d tx_l:%d",
+		fd_status, rx_cnt, tx_cnt, tx_len);
+
+	if (rx_cnt > 0)
+		sp7021_spi_master_rb(pspim, rx_cnt);
+	if (tx_cnt > 0)
+		sp7021_spi_master_wb(pspim, tx_cnt);
+
+	fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+	tx_len = FIELD_GET(SP7021_TX_LEN_MASK, fd_status);
+	total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+
+	if (fd_status & SP7021_FINISH_FLAG || tx_len == pspim->tx_cur_len) {
+		while (total_len != pspim->rx_cur_len) {
+			fd_status = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+			total_len = FIELD_GET(SP7021_GET_LEN_MASK, fd_status);
+			if (fd_status & SP7021_RX_FULL_FLAG)
+				rx_cnt = pspim->data_unit;
+			else
+				rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
+
+			if (rx_cnt > 0)
+				sp7021_spi_master_rb(pspim, rx_cnt);
+		}
+		value = readl(pspim->m_base + SP7021_INT_BUSY_REG);
+		value |= SP7021_CLR_MASTER_INT;
+		writel(value, pspim->m_base + SP7021_INT_BUSY_REG);
+		writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG);
+		isrdone = true;
+	}
+
+	if (isrdone)
+		complete(&pspim->isr_done);
+	spin_unlock_irqrestore(&pspim->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static void sp7021_prep_transfer(struct spi_controller *ctlr, struct spi_device *spi)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	pspim->tx_cur_len = 0;
+	pspim->rx_cur_len = 0;
+	pspim->data_unit = SP7021_FIFO_DATA_LEN;
+}
+
+// preliminary set CS, CPOL, CPHA and LSB
+static int sp7021_spi_controller_prepare_message(struct spi_controller *ctlr,
+						 struct spi_message *msg)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+	struct spi_device *s = msg->spi;
+	u32 valus, rs = 0;
+
+	valus = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+	valus |= SP7021_FD_SW_RST;
+	writel(valus, pspim->m_base + SP7021_SPI_STATUS_REG);
+	rs |= SP7021_FD_SEL;
+	if (s->mode & SPI_CPOL)
+		rs |= SP7021_CPOL_FD;
+
+	if (s->mode & SPI_LSB_FIRST)
+		rs |= SP7021_LSB_SEL;
+
+	if (s->mode & SPI_CS_HIGH)
+		rs |= SP7021_CS_POR;
+
+	if (s->mode & SPI_CPHA)
+		rs |=  SP7021_CPHA_R;
+	else
+		rs |=  SP7021_CPHA_W;
+
+	rs |=  FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0);
+	pspim->xfer_conf = rs;
+	if (pspim->xfer_conf & SP7021_CPOL_FD)
+		writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+	return 0;
+}
+
+static void sp7021_spi_setup_clk(struct spi_controller *ctlr, struct spi_transfer *xfer)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+	u32 clk_rate, clk_sel, div;
+
+	clk_rate = clk_get_rate(pspim->spi_clk);
+	div = clk_rate / xfer->speed_hz;
+	if (div < 2)
+		div = 2;
+	clk_sel = (div / 2) - 1;
+	pspim->xfer_conf &= SP7021_CLK_MASK;
+	pspim->xfer_conf |= FIELD_PREP(SP7021_CLK_MASK, clk_sel);
+	writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+}
+
+static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+				       struct spi_transfer *xfer)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+	unsigned long timeout = msecs_to_jiffies(1000);
+	unsigned int xfer_cnt, xfer_len, last_len;
+	unsigned int i, len_temp;
+	u32 reg_temp;
+	int ret;
+
+	xfer_cnt = xfer->len / SP7021_SPI_DATA_SIZE;
+	last_len = xfer->len % SP7021_SPI_DATA_SIZE;
+
+	for (i = 0; i <= xfer_cnt; i++) {
+		mutex_lock(&pspim->buf_lock);
+		sp7021_prep_transfer(ctlr, spi);
+		sp7021_spi_setup_clk(ctlr, xfer);
+		reinit_completion(&pspim->isr_done);
+
+		if (i == xfer_cnt)
+			xfer_len = last_len;
+		else
+			xfer_len = SP7021_SPI_DATA_SIZE;
+
+		pspim->tx_buf = xfer->tx_buf + i * SP7021_SPI_DATA_SIZE;
+		pspim->rx_buf = xfer->rx_buf + i * SP7021_SPI_DATA_SIZE;
+
+		if (pspim->tx_cur_len < xfer_len) {
+			len_temp = min(pspim->data_unit, xfer_len);
+			sp7021_spi_master_wb(pspim, len_temp);
+		}
+		reg_temp = readl(pspim->m_base + SP7021_SPI_CONFIG_REG);
+		reg_temp &= ~SP7021_CLEAN_RW_BYTE;
+		reg_temp &= ~SP7021_CLEAN_FLUG_MASK;
+		reg_temp |= SP7021_FD_SEL | SP7021_FINISH_FLAG_MASK |
+			    SP7021_TX_EMP_FLAG_MASK | SP7021_RX_FULL_FLAG_MASK |
+			    FIELD_PREP(SP7021_TX_UNIT, 0) | FIELD_PREP(SP7021_RX_UNIT, 0);
+		writel(reg_temp, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+		reg_temp = FIELD_PREP(SP7021_SET_TX_LEN, xfer_len) |
+				      FIELD_PREP(SP7021_SET_XFER_LEN, xfer_len) |
+				      SP7021_SPI_START_FD;
+		writel(reg_temp, pspim->m_base + SP7021_SPI_STATUS_REG);
+
+		if (!wait_for_completion_interruptible_timeout(&pspim->isr_done, timeout)) {
+			dev_err(&spi->dev, "wait_for_completion err\n");
+			return -ETIMEDOUT;
+		}
+
+		reg_temp = readl(pspim->m_base + SP7021_SPI_STATUS_REG);
+		if (reg_temp & SP7021_FINISH_FLAG) {
+			writel(SP7021_FINISH_FLAG, pspim->m_base + SP7021_SPI_STATUS_REG);
+			writel(readl(pspim->m_base + SP7021_SPI_CONFIG_REG) &
+				SP7021_CLEAN_FLUG_MASK, pspim->m_base + SP7021_SPI_CONFIG_REG);
+		}
+
+		if (pspim->xfer_conf & SP7021_CPOL_FD)
+			writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
+
+		mutex_unlock(&pspim->buf_lock);
+		ret = 0;
+	}
+	return ret;
+}
+
+static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+				       struct spi_transfer *xfer)
+{
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+	struct device *dev = pspim->dev;
+	int mode, ret = 0;
+
+	mode = SP7021_SPI_IDLE;
+	if (xfer->tx_buf && xfer->rx_buf) {
+		dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__);
+		ret = -EINVAL;
+	} else if (xfer->tx_buf) {
+		xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf,
+					      xfer->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, xfer->tx_dma))
+			return -ENOMEM;
+		mode = SP7021_SLAVE_WRITE;
+	} else if (xfer->rx_buf) {
+		xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len,
+					      DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, xfer->rx_dma))
+			return -ENOMEM;
+		mode = SP7021_SLAVE_READ;
+	}
+
+	switch (mode) {
+	case SP7021_SLAVE_WRITE:
+		ret = sp7021_spi_slave_tx(spi, xfer);
+		break;
+	case SP7021_SLAVE_READ:
+		ret = sp7021_spi_slave_rx(spi, xfer);
+		break;
+	default:
+		break;
+	}
+	if (xfer->tx_buf)
+		dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
+	if (xfer->rx_buf)
+		dma_unmap_single(dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
+
+	spi_finalize_current_transfer(ctlr);
+	return ret;
+}
+
+static void sp7021_spi_disable_unprepare(void *data)
+{
+	clk_disable_unprepare(data);
+}
+
+static void sp7021_spi_reset_control_assert(void *data)
+{
+	reset_control_assert(data);
+}
+
+static int sp7021_spi_controller_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp7021_spi_ctlr *pspim;
+	struct spi_controller *ctlr;
+	int mode, ret;
+
+	pdev->id = of_alias_get_id(pdev->dev.of_node, "sp_spi");
+
+	if (device_property_read_bool(dev, "spi-slave"))
+		mode = SP7021_SLAVE_MODE;
+	else
+		mode = SP7021_MASTER_MODE;
+
+	if (mode == SP7021_SLAVE_MODE)
+		ctlr = devm_spi_alloc_slave(dev, sizeof(*pspim));
+	else
+		ctlr = devm_spi_alloc_master(dev, sizeof(*pspim));
+	if (!ctlr)
+		return -ENOMEM;
+	device_set_node(&ctlr->dev, pdev->dev.fwnode);
+	ctlr->bus_num = pdev->id;
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+	ctlr->auto_runtime_pm = true;
+	ctlr->prepare_message = sp7021_spi_controller_prepare_message;
+	if (mode == SP7021_SLAVE_MODE) {
+		ctlr->transfer_one = sp7021_spi_slave_transfer_one;
+		ctlr->slave_abort = sp7021_spi_slave_abort;
+		ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	} else {
+		ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+		ctlr->min_speed_hz = 40000;
+		ctlr->max_speed_hz = 25000000;
+		ctlr->use_gpio_descriptors = true;
+		ctlr->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+		ctlr->transfer_one = sp7021_spi_master_transfer_one;
+	}
+	platform_set_drvdata(pdev, ctlr);
+	pspim = spi_controller_get_devdata(ctlr);
+	pspim->mode = mode;
+	pspim->ctlr = ctlr;
+	pspim->dev = dev;
+	spin_lock_init(&pspim->lock);
+	mutex_init(&pspim->buf_lock);
+	init_completion(&pspim->isr_done);
+	init_completion(&pspim->slave_isr);
+
+	pspim->m_base = devm_platform_ioremap_resource_byname(pdev, "master");
+	if (IS_ERR(pspim->m_base))
+		return dev_err_probe(dev, PTR_ERR(pspim->m_base), "m_base get fail\n");
+
+	pspim->s_base = devm_platform_ioremap_resource_byname(pdev, "slave");
+	if (IS_ERR(pspim->s_base))
+		return dev_err_probe(dev, PTR_ERR(pspim->s_base), "s_base get fail\n");
+
+	pspim->m_irq = platform_get_irq_byname(pdev, "master_risc");
+	if (pspim->m_irq < 0)
+		return pspim->m_irq;
+
+	pspim->s_irq = platform_get_irq_byname(pdev, "slave_risc");
+	if (pspim->s_irq < 0)
+		return pspim->s_irq;
+
+	ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq,
+			       IRQF_TRIGGER_RISING, pdev->name, pspim);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq,
+			       IRQF_TRIGGER_RISING, pdev->name, pspim);
+	if (ret)
+		return ret;
+
+	pspim->spi_clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(pspim->spi_clk))
+		return dev_err_probe(dev, PTR_ERR(pspim->spi_clk), "clk get fail\n");
+
+	pspim->rstc = devm_reset_control_get_exclusive(dev, NULL);
+	if (IS_ERR(pspim->rstc))
+		return dev_err_probe(dev, PTR_ERR(pspim->rstc), "rst get fail\n");
+
+	ret = clk_prepare_enable(pspim->spi_clk);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to enable clk\n");
+
+	ret = devm_add_action_or_reset(dev, sp7021_spi_disable_unprepare, pspim->spi_clk);
+	if (ret)
+		return ret;
+
+	ret = reset_control_deassert(pspim->rstc);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to deassert reset\n");
+
+	ret = devm_add_action_or_reset(dev, sp7021_spi_reset_control_assert, pspim->rstc);
+	if (ret)
+		return ret;
+
+	pm_runtime_enable(dev);
+	ret = spi_register_controller(ctlr);
+	if (ret) {
+		pm_runtime_disable(dev);
+		return dev_err_probe(dev, ret, "spi_register_master fail\n");
+	}
+	return 0;
+}
+
+static int sp7021_spi_controller_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(&pdev->dev);
+
+	spi_unregister_controller(ctlr);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	return 0;
+}
+
+static int __maybe_unused sp7021_spi_controller_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	return reset_control_assert(pspim->rstc);
+}
+
+static int __maybe_unused sp7021_spi_controller_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	reset_control_deassert(pspim->rstc);
+	return clk_prepare_enable(pspim->spi_clk);
+}
+
+static int sp7021_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	return reset_control_assert(pspim->rstc);
+}
+
+static int sp7021_spi_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
+
+	return reset_control_deassert(pspim->rstc);
+}
+
+static const struct dev_pm_ops sp7021_spi_pm_ops = {
+	SET_RUNTIME_PM_OPS(sp7021_spi_runtime_suspend,
+			   sp7021_spi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(sp7021_spi_controller_suspend,
+				sp7021_spi_controller_resume)
+};
+
+static const struct of_device_id sp7021_spi_controller_ids[] = {
+	{ .compatible = "sunplus,sp7021-spi" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sp7021_spi_controller_ids);
+
+static struct platform_driver sp7021_spi_controller_driver = {
+	.probe = sp7021_spi_controller_probe,
+	.remove = sp7021_spi_controller_remove,
+	.driver = {
+		.name = "sunplus,sp7021-spi-controller",
+		.of_match_table = sp7021_spi_controller_ids,
+		.pm     = &sp7021_spi_pm_ops,
+	},
+};
+module_platform_driver(sp7021_spi_controller_driver);
+
+MODULE_AUTHOR("Li-hao Kuo <lhjeff911@gmail.com>");
+MODULE_DESCRIPTION("Sunplus SPI controller driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From a708078eeb992799161e794d9c569cf4f725a5b0 Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Tue, 25 Jan 2022 12:14:29 +0000
Subject: [PATCH 0028/1586] spi: Add Sunplus SP7021 schema

Add bindings for Sunplus SP7021 spi driver

Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/spi-sunplus-sp7021.yaml      | 81 +++++++++++++++++++
 MAINTAINERS                                   |  1 +
 2 files changed, 82 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml

diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
new file mode 100644
index 0000000000000..24382cdda6455
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-sunplus-sp7021.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus sp7021 SPI controller
+
+allOf:
+  - $ref: "spi-controller.yaml"
+
+maintainers:
+  - Li-hao Kuo <lhjeff911@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - sunplus,sp7021-spi
+
+  reg:
+    items:
+      - the SPI master registers
+      - the SPI slave registers
+
+  reg-names:
+    items:
+      - const: master
+      - const: slave
+
+  interrupt-names:
+    items:
+      - const: dma_w
+      - const: master_risc
+      - const: slave_risc
+
+  interrupts:
+    minItems: 3
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clocks-names
+  - resets
+  - pinctrl-names
+  - pinctrl-0
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sp-sp7021.h>
+    #include <dt-bindings/reset/sp-sp7021.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi@9C002D80 {
+        compatible = "sunplus,sp7021-spi";
+        reg = <0x9C002D80 0x80>, <0x9C002E00 0x80>;
+        reg-names = "master", "slave";
+        interrupt-parent = <&intc>;
+        interrupt-names = "dma_w",
+                          "master_risc",
+                          "slave_risc";
+        interrupts = <144 IRQ_TYPE_LEVEL_HIGH>,
+                     <146 IRQ_TYPE_LEVEL_HIGH>,
+                     <145 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clkc SPI_COMBO_0>;
+        resets = <&rstc RST_SPI_COMBO_0>;
+        pinctrl-names = "default";
+        pinctrl-0 = <&pins_spi0>;
+    };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index a517966bc7e88..13301adabe6e8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18495,6 +18495,7 @@ SUNPLUS SPI CONTROLLER INTERFACE DRIVER
 M:	Li-hao Kuo <lhjeff911@gmail.com>
 L:	linux-spi@vger.kernel.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
 F:	drivers/spi/spi-sunplus-sp7021.c
 
 SUPERH
-- 
GitLab


From b1bbd3a57b94889cd17147f5594db7f0652275ef Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 17 Jan 2022 16:48:21 -0800
Subject: [PATCH 0029/1586] thermal: fix Documentation bullet list warning

Fix bullet list indentation formatting to prevent a docs build warning.
Use same indentation style as other files in this directory.

Documentation/ABI/testing/sysfs-class-thermal:201: WARNING: Bullet list ends without a blank line; unexpected unindent.

Fixes: 80da1b508f29 ("thermal: Move ABI documentation to Documentation/ABI")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-class-thermal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal
index 2c52bb1f864ce..8eee37982b2a8 100644
--- a/Documentation/ABI/testing/sysfs-class-thermal
+++ b/Documentation/ABI/testing/sysfs-class-thermal
@@ -203,7 +203,7 @@ Description:
 
 		- for generic ACPI: should be "Fan", "Processor" or "LCD"
 		- for memory controller device on intel_menlow platform:
-		should be "Memory controller".
+		  should be "Memory controller".
 
 		RO, Required
 
-- 
GitLab


From ae57857b9b6341096ddfd9c0cf26fb640c561160 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 11 Jan 2022 16:55:25 +0100
Subject: [PATCH 0030/1586] ACPICA: Use uintptr_t and offsetof() in Linux
 kernel builds

To avoid "performing pointer subtraction with a null pointer has
undefined behavior" compiler warnings, use uintptr_t and offsetof()
that are always available during Linux kernel builds to define
acpi_uintptr_t and the ACPI_TO_INTEGER() and ACPI_OFFSET() macros.

Based on earlier proposal from Arnd Bergmann.

Link: https://lore.kernel.org/linux-acpi/20210927121338.938994-1-arnd@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
---
 include/acpi/actypes.h          | 4 ++++
 include/acpi/platform/aclinux.h | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 69e89d572b9e3..02c1fa16e6388 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -507,8 +507,12 @@ typedef u64 acpi_integer;
 /* Pointer/Integer type conversions */
 
 #define ACPI_TO_POINTER(i)              ACPI_CAST_PTR (void, (acpi_size) (i))
+#ifndef ACPI_TO_INTEGER
 #define ACPI_TO_INTEGER(p)              ACPI_PTR_DIFF (p, (void *) 0)
+#endif
+#ifndef ACPI_OFFSET
 #define ACPI_OFFSET(d, f)               ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0)
+#endif
 #define ACPI_PTR_TO_PHYSADDR(i)         ACPI_TO_INTEGER(i)
 
 /* Optimizations for 4-character (32-bit) acpi_name manipulation */
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index b3ffb9bbf664b..cec41e004ecf7 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -114,6 +114,11 @@
 #define acpi_raw_spinlock                   raw_spinlock_t *
 #define acpi_cpu_flags                      unsigned long
 
+#define acpi_uintptr_t                      uintptr_t
+
+#define ACPI_TO_INTEGER(p)                  ((uintptr_t)(p))
+#define ACPI_OFFSET(d, f)                   offsetof(d, f)
+
 /* Use native linux version of acpi_os_allocate_zeroed */
 
 #define USE_NATIVE_ALLOCATE_ZEROED
-- 
GitLab


From 2e433a94dab0246fee706d18aaecd67007ead404 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 Jan 2022 19:31:48 +0100
Subject: [PATCH 0031/1586] ACPI: OSL: Fix and clean up
 acpi_os_read/write_port()

First, remove type casts that make acpi_os_read_port() only work on
little endian and are generally not needed.

Second, avoid clearing the memory pointed to by the value return
pointer in acpi_os_read_port() if it is the dummy on the stack (in
which case clearing it is not necessary).

Finally, prevent both acpi_os_read_port() and acpi_os_write_port()
from crashing the kernel when they receive an unsupported width
value and make them print a debug message and return an error instead.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 45c5c0e45e332..7a70c4bfc23c6 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -642,22 +642,24 @@ u64 acpi_os_get_timer(void)
 		(ACPI_100NSEC_PER_SEC / HZ);
 }
 
-acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width)
+acpi_status acpi_os_read_port(acpi_io_address port, u32 *value, u32 width)
 {
 	u32 dummy;
 
-	if (!value)
+	if (value)
+		*value = 0;
+	else
 		value = &dummy;
 
-	*value = 0;
 	if (width <= 8) {
-		*(u8 *) value = inb(port);
+		*value = inb(port);
 	} else if (width <= 16) {
-		*(u16 *) value = inw(port);
+		*value = inw(port);
 	} else if (width <= 32) {
-		*(u32 *) value = inl(port);
+		*value = inl(port);
 	} else {
-		BUG();
+		pr_debug("%s: Access width %d not supported\n", __func__, width);
+		return AE_BAD_PARAMETER;
 	}
 
 	return AE_OK;
@@ -674,7 +676,8 @@ acpi_status acpi_os_write_port(acpi_io_address port, u32 value, u32 width)
 	} else if (width <= 32) {
 		outl(value, port);
 	} else {
-		BUG();
+		pr_debug("%s: Access width %d not supported\n", __func__, width);
+		return AE_BAD_PARAMETER;
 	}
 
 	return AE_OK;
-- 
GitLab


From babc92da5928f81af951663fc436997352e02d3a Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 14 Jan 2022 13:24:49 +0200
Subject: [PATCH 0032/1586] ACPI: properties: Consistently return -ENOENT if
 there are no more references

__acpi_node_get_property_reference() is documented to return -ENOENT if
the caller requests a property reference at an index that does not exist,
not -EINVAL which it actually does.

Fix this by returning -ENOENT consistenly, independently of whether the
property value is a plain reference or a package.

Fixes: c343bc2ce2c6 ("ACPI: properties: Align return codes of __acpi_node_get_property_reference()")
Cc: 4.14+ <stable@vger.kernel.org> # 4.14+
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index d0986bda29640..3fceb4681ec9f 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -685,7 +685,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 	 */
 	if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
 		if (index)
-			return -EINVAL;
+			return -ENOENT;
 
 		device = acpi_fetch_acpi_dev(obj->reference.handle);
 		if (!device)
-- 
GitLab


From 0266c25e7c2821181b610595df42cbca6bc93cb8 Mon Sep 17 00:00:00 2001
From: "GONG, Ruiqi" <gongruiqi1@huawei.com>
Date: Tue, 25 Jan 2022 15:11:33 +0800
Subject: [PATCH 0033/1586] selinux: access superblock_security_struct in LSM
 blob way

LSM blob has been involved for superblock's security struct. So fix the
remaining direct access to sb->s_security by using the LSM blob
mechanism.

Fixes: 08abe46b2cfc ("selinux: fall back to SECURITY_FS_USE_GENFS if no xattr support")
Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount")
Signed-off-by: GONG, Ruiqi <gongruiqi1@huawei.com>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5b6895e4fc29e..a0243bae84238 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -479,7 +479,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb)
 
 static int sb_check_xattr_support(struct super_block *sb)
 {
-	struct superblock_security_struct *sbsec = sb->s_security;
+	struct superblock_security_struct *sbsec = selinux_superblock(sb);
 	struct dentry *root = sb->s_root;
 	struct inode *root_inode = d_backing_inode(root);
 	u32 sid;
@@ -2647,7 +2647,7 @@ free_opt:
 static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
 {
 	struct selinux_mnt_opts *opts = mnt_opts;
-	struct superblock_security_struct *sbsec = sb->s_security;
+	struct superblock_security_struct *sbsec = selinux_superblock(sb);
 	u32 sid;
 	int rc;
 
-- 
GitLab


From bcb62828e3e8c813b6613db6eb7fd9657db248fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:20 +0100
Subject: [PATCH 0034/1586] selinux: check return value of sel_make_avc_files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sel_make_avc_files() might fail and return a negative errno value on
memory allocation failures. Re-add the check of the return value,
dropped in 66f8e2f03c02 ("selinux: sidtab reverse lookup hash table").

Reported by clang-analyzer:

    security/selinux/selinuxfs.c:2129:2: warning: Value stored to
      'ret' is never read [deadcode.DeadStores]
            ret = sel_make_avc_files(dentry);
            ^     ~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 66f8e2f03c02 ("selinux: sidtab reverse lookup hash table")
Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
[PM: description line wrapping, added proper commit ref]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/selinuxfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index e4cd7cb856f37..f2f6203e0fff5 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc)
 	}
 
 	ret = sel_make_avc_files(dentry);
+	if (ret)
+		goto err;
 
 	dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino);
 	if (IS_ERR(dentry)) {
-- 
GitLab


From 08df49054f311ca04954cf24d1216d3b5ddfd0a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:12 +0100
Subject: [PATCH 0035/1586] selinux: declare path parameters of _genfs_sid
 const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The path parameter is only read from in security_genfs_sid(),
selinux_policy_genfs_sid() and __security_genfs_sid(). Since a string
literal is passed as argument, declare the parameter const.
Also align the parameter names in the declaration and definition.

Reported by clang [-Wwrite-strings]:

    security/selinux/hooks.c:553:60: error: passing 'const char [2]'
      to parameter of type 'char *' discards qualifiers
      [-Wincompatible-pointer-types-discards-qualifiers]
            rc = security_genfs_sid(&selinux_state, ... , /,
                                                          ^~~
    ./security/selinux/include/security.h:389:36: note: passing
      argument to parameter 'name' here
                           const char *fstype, char *name, u16 sclass,
                                                     ^

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
[PM: wrapped description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/include/security.h | 4 ++--
 security/selinux/ss/services.c      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index ac0ece01305a6..6482e0efb3689 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -386,11 +386,11 @@ int security_get_allow_unknown(struct selinux_state *state);
 int security_fs_use(struct selinux_state *state, struct super_block *sb);
 
 int security_genfs_sid(struct selinux_state *state,
-		       const char *fstype, char *name, u16 sclass,
+		       const char *fstype, const char *path, u16 sclass,
 		       u32 *sid);
 
 int selinux_policy_genfs_sid(struct selinux_policy *policy,
-		       const char *fstype, char *name, u16 sclass,
+		       const char *fstype, const char *path, u16 sclass,
 		       u32 *sid);
 
 #ifdef CONFIG_NETLABEL
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8e92af7dd284c..5a7df45bdab17 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2875,7 +2875,7 @@ out_unlock:
  */
 static inline int __security_genfs_sid(struct selinux_policy *policy,
 				       const char *fstype,
-				       char *path,
+				       const char *path,
 				       u16 orig_sclass,
 				       u32 *sid)
 {
@@ -2928,7 +2928,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy,
  */
 int security_genfs_sid(struct selinux_state *state,
 		       const char *fstype,
-		       char *path,
+		       const char *path,
 		       u16 orig_sclass,
 		       u32 *sid)
 {
@@ -2952,7 +2952,7 @@ int security_genfs_sid(struct selinux_state *state,
 
 int selinux_policy_genfs_sid(struct selinux_policy *policy,
 			const char *fstype,
-			char *path,
+			const char *path,
 			u16 orig_sclass,
 			u32 *sid)
 {
-- 
GitLab


From 841255544b653cbabe4ee5eda56bbb8b7ad8de8a Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 14 Dec 2021 20:07:33 +0800
Subject: [PATCH 0036/1586] clocksource/drivers/imx-sysctr: Drop IRQF_IRQPOLL

Per the Documentation,
IRQF_IRQPOLL is used for polling (only the interrupt that is registered
first in a shared interrupt is considered for performance reasons)
But this timer is not sharing interrupt line with others, and
actually irqpoll not work with this timer with IRQF_IRQPOLL set, so
drop the flag.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20211214120737.1611955-2-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-sysctr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c
index 55a8e198d2a19..523e37662a6e8 100644
--- a/drivers/clocksource/timer-imx-sysctr.c
+++ b/drivers/clocksource/timer-imx-sysctr.c
@@ -110,7 +110,7 @@ static struct timer_of to_sysctr = {
 	},
 	.of_irq = {
 		.handler		= sysctr_timer_interrupt,
-		.flags			= IRQF_TIMER | IRQF_IRQPOLL,
+		.flags			= IRQF_TIMER,
 	},
 	.of_clk = {
 		.name = "per",
-- 
GitLab


From 59e2bcd8a95b4cc5a04809b6d2ee337e81b47f57 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 14 Dec 2021 20:07:34 +0800
Subject: [PATCH 0037/1586] clocksource/drivers/imx-tpm: Drop IRQF_IRQPOLL

Per the Documentation,
IRQF_IRQPOLL is used for polling (only the interrupt that is registered
first in a shared interrupt is considered for performance reasons)
The TPM timer is not sharing interrupt with others, and pass irqpoll
not make sense for i.MX platform.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20211214120737.1611955-3-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 2cdc077a39f5d..2c0b0d4eba9ef 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -137,7 +137,7 @@ static struct timer_of to_tpm = {
 	},
 	.of_irq = {
 		.handler		= tpm_timer_interrupt,
-		.flags			= IRQF_TIMER | IRQF_IRQPOLL,
+		.flags			= IRQF_TIMER,
 	},
 	.of_clk = {
 		.name = "per",
-- 
GitLab


From 5b3c267506eba2972d53dafb8b988d5fd28d223d Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 14 Dec 2021 20:07:35 +0800
Subject: [PATCH 0038/1586] clocksource/drivers/imx-tpm: Mark two variable with
 __ro_after_init

counter_width and timer_base will not be updated after init, so mark
as __ro_after_init.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20211214120737.1611955-4-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 2c0b0d4eba9ef..3afd9b0a668a6 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -32,8 +32,8 @@
 #define TPM_C0SC_CHF_MASK		(0x1 << 7)
 #define TPM_C0V				0x24
 
-static int counter_width;
-static void __iomem *timer_base;
+static int counter_width __ro_after_init;
+static void __iomem *timer_base __ro_after_init;
 
 static inline void tpm_timer_disable(void)
 {
-- 
GitLab


From 39664b624f6a6518001b2c8f86bac1352c89d0af Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 14 Dec 2021 20:07:36 +0800
Subject: [PATCH 0039/1586] clocksource/drivers/imx-tpm: Add
 CLOCK_EVT_FEAT_DYNIRQ

Add CLOCK_EVT_FEAT_DYNIRQ to allow the IRQ could be runtime set affinity
to the cores that needs wake up, otherwise saying core0 has to send
IPI to wakeup core1. With CLOCK_EVT_FEAT_DYNIRQ set, when broadcast
timer could wake up the cores, IPI is not needed.

Acked-by: Jacky Bai <ping.bai@nxp.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20211214120737.1611955-5-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 3afd9b0a668a6..578fe162fd993 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -129,7 +129,7 @@ static struct timer_of to_tpm = {
 	.clkevt = {
 		.name			= "i.MX7ULP TPM Timer",
 		.rating			= 200,
-		.features		= CLOCK_EVT_FEAT_ONESHOT,
+		.features		= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ,
 		.set_state_shutdown	= tpm_set_state_shutdown,
 		.set_state_oneshot	= tpm_set_state_oneshot,
 		.set_next_event		= tpm_set_next_event,
-- 
GitLab


From e547ffe9e6f497e36bde9d86dbcfbc781946752b Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 14 Dec 2021 20:07:37 +0800
Subject: [PATCH 0040/1586] clocksource/drivers/imx-tpm: Update name of clkevt

The tpm driver is not only for i.MX7ULP now, i.MX8ULP also use it. It
maybe also used by other i.MX variants, so update name to reflect it.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20211214120737.1611955-6-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 578fe162fd993..df8064122b10c 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -127,7 +127,7 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id)
 static struct timer_of to_tpm = {
 	.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
 	.clkevt = {
-		.name			= "i.MX7ULP TPM Timer",
+		.name			= "i.MX TPM Timer",
 		.rating			= 200,
 		.features		= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ,
 		.set_state_shutdown	= tpm_set_state_shutdown,
-- 
GitLab


From 474fc2e6395d62758e80b9ea65f61339296355fc Mon Sep 17 00:00:00 2001
From: Guochun Mao <guochun.mao@mediatek.com>
Date: Wed, 26 Jan 2022 17:11:59 +0800
Subject: [PATCH 0041/1586] spi: spi-mtk-nor: make some internal variables
 static

Variables mtk_nor_caps_mt8173, mtk_nor_caps_mt8186 and
mtk_nor_caps_mt8192 are not declared.
Make them static.

Fixes: 5b177234e9fd ("spi: spi-mtk-nor: improve device table for adding more capabilities")
Signed-off-by: Guochun Mao <guochun.mao@mediatek.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20220126091159.27513-1-guochun.mao@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mtk-nor.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 455b4dcb26e9a..94fb09696677f 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -770,17 +770,17 @@ static const struct spi_controller_mem_ops mtk_nor_mem_ops = {
 	.exec_op = mtk_nor_exec_op
 };
 
-const struct mtk_nor_caps mtk_nor_caps_mt8173 = {
+static const struct mtk_nor_caps mtk_nor_caps_mt8173 = {
 	.dma_bits = 32,
 	.extra_dummy_bit = 0,
 };
 
-const struct mtk_nor_caps mtk_nor_caps_mt8186 = {
+static const struct mtk_nor_caps mtk_nor_caps_mt8186 = {
 	.dma_bits = 32,
 	.extra_dummy_bit = 1,
 };
 
-const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
+static const struct mtk_nor_caps mtk_nor_caps_mt8192 = {
 	.dma_bits = 36,
 	.extra_dummy_bit = 0,
 };
-- 
GitLab


From b0596da1a019c092ca5ab64d6999f5e28d52865e Mon Sep 17 00:00:00 2001
From: Eliav Farber <farbere@amazon.com>
Date: Thu, 13 Jan 2022 10:06:20 +0000
Subject: [PATCH 0042/1586] EDAC/mc: Remove unnecessary cast to char * in
 edac_align_ptr()

Remove the forgotten (char *) casts as that function returns void *.

  [ bp: Rewrite commit message. ]

Signed-off-by: Eliav Farber <farbere@amazon.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220113100622.12783-3-farbere@amazon.com
---
 drivers/edac/edac_mc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 9d9aabdec96b7..f92c4c69a033e 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
 	else if (size > sizeof(char))
 		align = sizeof(short);
 	else
-		return (char *)ptr;
+		return ptr;
 
 	r = (unsigned long)p % align;
 
 	if (r == 0)
-		return (char *)ptr;
+		return ptr;
 
 	*p += align - r;
 
-- 
GitLab


From d3b1161f29cf479b86d4c3c6f200a8eb27254877 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:13 +0100
Subject: [PATCH 0043/1586] selinux: declare name parameter of hash_eval const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

String literals are passed as second argument to hash_eval(). Also the
parameter is already declared const in the DEBUG_HASHES configuration.

Reported by clang [-Wwrite-strings]:

    security/selinux/ss/policydb.c:1881:26: error: passing
      'const char [8]' to parameter of type 'char *' discards
      qualifiers
            hash_eval(&p->range_tr, rangetr);
                                    ^~~~~~~~~
    security/selinux/ss/policydb.c:707:55: note: passing argument to
      parameter 'hash_name' here
    static inline void hash_eval(struct hashtab *h, char *hash_name)
                                                          ^
    security/selinux/ss/policydb.c:2099:32: error: passing
      'const char [11]' to parameter of type 'char *' discards
      qualifiers
            hash_eval(&p->filename_trans, filenametr);
                                          ^~~~~~~~~~~~
    security/selinux/ss/policydb.c:707:55: note: passing argument to
      parameter 'hash_name' here
    static inline void hash_eval(struct hashtab *h, char *hash_name)
                                                          ^

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
[PM: line wrapping in description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/policydb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 0ae1b718194a3..67e03f6e89667 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -704,7 +704,7 @@ static void symtab_hash_eval(struct symtab *s)
 }
 
 #else
-static inline void hash_eval(struct hashtab *h, char *hash_name)
+static inline void hash_eval(struct hashtab *h, const char *hash_name)
 {
 }
 #endif
-- 
GitLab


From 9e2fe574c02bde46307255467a5e4291f65227fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:14 +0100
Subject: [PATCH 0044/1586] selinux: enclose macro arguments in parenthesis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enclose the macro arguments in parenthesis to avoid potential evaluation
order issues.

Note the xperm and ebitmap macros are still not side-effect safe due to
double evaluation.

Reported by clang-tidy [bugprone-macro-parentheses]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/include/security.h | 4 ++--
 security/selinux/ss/ebitmap.h       | 6 +++---
 security/selinux/ss/sidtab.c        | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 6482e0efb3689..d91a5672de991 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -254,8 +254,8 @@ struct av_decision {
 #define XPERMS_AUDITALLOW 2
 #define XPERMS_DONTAUDIT 4
 
-#define security_xperm_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f))
-#define security_xperm_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f)))
+#define security_xperm_set(perms, x) ((perms)[(x) >> 5] |= 1 << ((x) & 0x1f))
+#define security_xperm_test(perms, x) (1 & ((perms)[(x) >> 5] >> ((x) & 0x1f)))
 struct extended_perms_data {
 	u32 p[8];
 };
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 9eb2d0af2805e..58eb822f11eef 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -118,9 +118,9 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
 }
 
 #define ebitmap_for_each_positive_bit(e, n, bit)	\
-	for (bit = ebitmap_start_positive(e, &n);	\
-	     bit < ebitmap_length(e);			\
-	     bit = ebitmap_next_positive(e, &n, bit))	\
+	for ((bit) = ebitmap_start_positive(e, &(n));	\
+	     (bit) < ebitmap_length(e);			\
+	     (bit) = ebitmap_next_positive(e, &(n), bit))	\
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index 293ec048af08c..a54b8652bfb50 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -27,8 +27,8 @@ struct sidtab_str_cache {
 	char str[];
 };
 
-#define index_to_sid(index) (index + SECINITSID_NUM + 1)
-#define sid_to_index(sid) (sid - (SECINITSID_NUM + 1))
+#define index_to_sid(index) ((index) + SECINITSID_NUM + 1)
+#define sid_to_index(sid) ((sid) - (SECINITSID_NUM + 1))
 
 int sidtab_init(struct sidtab *s)
 {
-- 
GitLab


From 0b3c2b3dc96a57fd64691d666c4c7123a4f4e7b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:15 +0100
Subject: [PATCH 0045/1586] selinux: drop cast to same type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both the lvalue scontextp and rvalue scontext are of the type char*.
Drop the redundant explicit cast not needed since commit 9a59daa03df7
("SELinux: fix sleeping allocation in security_context_to_sid"), where
the type of scontext changed from const char* to char*.

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/services.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 5a7df45bdab17..2f8db93e53b29 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1452,7 +1452,7 @@ static int string_to_context_struct(struct policydb *pol,
 	/* Parse the security context. */
 
 	rc = -EINVAL;
-	scontextp = (char *) scontext;
+	scontextp = scontext;
 
 	/* Extract the user. */
 	p = scontextp;
-- 
GitLab


From 056945a96cf58060560498e069a10d94a1ef802b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:16 +0100
Subject: [PATCH 0046/1586] selinux: drop unused parameter of avtab_insert_node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter cur is not used in avtab_insert_node().

Reported by clang [-Wunused-parameter]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/avtab.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index c97695ae508f1..cfdae20792e11 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -67,7 +67,7 @@ static inline int avtab_hash(const struct avtab_key *keyp, u32 mask)
 
 static struct avtab_node*
 avtab_insert_node(struct avtab *h, int hvalue,
-		  struct avtab_node *prev, struct avtab_node *cur,
+		  struct avtab_node *prev,
 		  const struct avtab_key *key, const struct avtab_datum *datum)
 {
 	struct avtab_node *newnode;
@@ -137,7 +137,7 @@ static int avtab_insert(struct avtab *h, const struct avtab_key *key,
 			break;
 	}
 
-	newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum);
+	newnode = avtab_insert_node(h, hvalue, prev, key, datum);
 	if (!newnode)
 		return -ENOMEM;
 
@@ -177,7 +177,7 @@ struct avtab_node *avtab_insert_nonunique(struct avtab *h,
 		    key->target_class < cur->key.target_class)
 			break;
 	}
-	return avtab_insert_node(h, hvalue, prev, cur, key, datum);
+	return avtab_insert_node(h, hvalue, prev, key, datum);
 }
 
 struct avtab_datum *avtab_search(struct avtab *h, const struct avtab_key *key)
-- 
GitLab


From 73073d956a2073554b99d621a7a7ec9437055044 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:17 +0100
Subject: [PATCH 0047/1586] selinux: do not discard const qualifier in cast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do not discard the const qualifier on the cast from const void* to
__be32*; the addressed value is not modified.

Reported by clang [-Wcast-qual]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/netnode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 4a7d2ab5b9609..889552db0d31a 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -107,7 +107,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family)
 
 	switch (family) {
 	case PF_INET:
-		idx = sel_netnode_hashfn_ipv4(*(__be32 *)addr);
+		idx = sel_netnode_hashfn_ipv4(*(const __be32 *)addr);
 		break;
 	case PF_INET6:
 		idx = sel_netnode_hashfn_ipv6(addr);
@@ -121,7 +121,7 @@ static struct sel_netnode *sel_netnode_find(const void *addr, u16 family)
 		if (node->nsec.family == family)
 			switch (family) {
 			case PF_INET:
-				if (node->nsec.addr.ipv4 == *(__be32 *)addr)
+				if (node->nsec.addr.ipv4 == *(const __be32 *)addr)
 					return node;
 				break;
 			case PF_INET6:
-- 
GitLab


From b084e189b01a7614d3098aca4f2381a759460d88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:18 +0100
Subject: [PATCH 0048/1586] selinux: simplify cred_init_security
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter of selinux_cred() is declared const, so an explicit cast
dropping the const qualifier is not necessary. Without the cast the
local variable cred serves no purpose.

Reported by clang [-Wcast-qual]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a0243bae84238..eae7dbd62df16 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -211,10 +211,9 @@ static int selinux_lsm_notifier_avc_callback(u32 event)
  */
 static void cred_init_security(void)
 {
-	struct cred *cred = (struct cred *) current->real_cred;
 	struct task_security_struct *tsec;
 
-	tsec = selinux_cred(cred);
+	tsec = selinux_cred(current->real_cred);
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
 }
 
-- 
GitLab


From b5e68162f859132af419af479bdb96e2ae18fa2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Tue, 25 Jan 2022 15:14:19 +0100
Subject: [PATCH 0049/1586] selinux: drop unused macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The macro _DEBUG_HASHES is nowhere used. The configuration DEBUG_HASHES
enables debugging of the SELinux hash tables, but the with an underscore
prefixed macro definition has no direct impact or any documentation.

Reported by clang [-Wunused-macros]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/policydb.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 67e03f6e89667..d036e1238e771 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -41,8 +41,6 @@
 #include "mls.h"
 #include "services.h"
 
-#define _DEBUG_HASHES
-
 #ifdef DEBUG_HASHES
 static const char *symtab_name[SYM_NUM] = {
 	"common prefixes",
-- 
GitLab


From cdeea45422f579b9302e377d1ede29133d3fde8e Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Thu, 27 Jan 2022 10:45:59 -0500
Subject: [PATCH 0050/1586] selinux: fix a type cast problem in
 cred_init_security()

In the process of removing an explicit type cast to preserve a cred
const qualifier in cred_init_security() we ran into a problem where
the task_struct::real_cred field is defined with the "__rcu"
attribute but the selinux_cred() function parameter is not, leading
to a sparse warning:

  security/selinux/hooks.c:216:36: sparse: sparse:
    incorrect type in argument 1 (different address spaces)
    @@     expected struct cred const *cred
    @@     got struct cred const [noderef] __rcu *real_cred

As we don't want to add the "__rcu" attribute to the selinux_cred()
parameter, we're going to add an explicit cast back to
cred_init_security().

Fixes: b084e189b01a ("selinux: simplify cred_init_security")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index eae7dbd62df16..221e642025f53 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -213,7 +213,7 @@ static void cred_init_security(void)
 {
 	struct task_security_struct *tsec;
 
-	tsec = selinux_cred(current->real_cred);
+	tsec = selinux_cred(unrcu_pointer(current->real_cred));
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
 }
 
-- 
GitLab


From c75c6a8add370cde88117901f21cf0d6eaf09a8f Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Wed, 29 Dec 2021 10:42:10 +0800
Subject: [PATCH 0051/1586] docs/zh_CN: Cleanup index.rst

Many */index in the Chinese index.rst are not in the same order as the
English version. Put them to where they should be.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20211229024212.32752-2-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 46e14ec9963da..e7f5fa3e75027 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -104,13 +104,13 @@ TODOList:
    :maxdepth: 2
 
    core-api/index
+   accounting/index
    cpu-freq/index
    iio/index
+   infiniband/index
+   virt/index
    sound/index
    filesystems/index
-   virt/index
-   infiniband/index
-   accounting/index
    scheduler/index
 
 TODOList:
-- 
GitLab


From 30e61d38f01d77d6d1f76c7566e70d46eff3724d Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Wed, 29 Dec 2021 10:42:11 +0800
Subject: [PATCH 0052/1586] docs/zh_CN: Add power/index Chinese translation

Translate power/index.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20211229024212.32752-3-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst    |  2 +-
 .../translations/zh_CN/power/index.rst        | 55 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/power/index.rst

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index e7f5fa3e75027..08144252630c3 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -108,6 +108,7 @@ TODOList:
    cpu-freq/index
    iio/index
    infiniband/index
+   power/index
    virt/index
    sound/index
    filesystems/index
@@ -129,7 +130,6 @@ TODOList:
 * netlabel/index
 * networking/index
 * pcmcia/index
-* power/index
 * target/index
 * timers/index
 * spi/index
diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst
new file mode 100644
index 0000000000000..fd379adfda8e6
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/index.rst
@@ -0,0 +1,55 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/index.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+========
+电源管理
+========
+
+.. toctree::
+    :maxdepth: 1
+
+TODOList:
+
+    * apm-acpi
+    * basic-pm-debugging
+    * charger-manager
+    * drivers-testing
+    * energy-model
+    * freezing-of-tasks
+    * opp
+    * pci
+    * pm_qos_interface
+    * power_supply_class
+    * runtime_pm
+    * s2ram
+    * suspend-and-cpuhotplug
+    * suspend-and-interrupts
+    * swsusp-and-swap-files
+    * swsusp-dmcrypt
+    * swsusp
+    * video
+    * tricks
+
+    * userland-swsusp
+
+    * powercap/powercap
+    * powercap/dtpm
+
+    * regulator/consumer
+    * regulator/design
+    * regulator/machine
+    * regulator/overview
+    * regulator/regulator
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
GitLab


From dd774a07ddfcb4dfe15778ea30cd5bb592ffab29 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Wed, 29 Dec 2021 10:42:12 +0800
Subject: [PATCH 0053/1586] docs/zh_CN: Add opp Chinese translation

Translate power/opp.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20211229024212.32752-4-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/power/index.rst        |   3 +-
 .../translations/zh_CN/power/opp.rst          | 341 ++++++++++++++++++
 2 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/power/opp.rst

diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst
index fd379adfda8e6..ad80a9e80b7cd 100644
--- a/Documentation/translations/zh_CN/power/index.rst
+++ b/Documentation/translations/zh_CN/power/index.rst
@@ -14,6 +14,8 @@
 .. toctree::
     :maxdepth: 1
 
+    opp
+
 TODOList:
 
     * apm-acpi
@@ -22,7 +24,6 @@ TODOList:
     * drivers-testing
     * energy-model
     * freezing-of-tasks
-    * opp
     * pci
     * pm_qos_interface
     * power_supply_class
diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst
new file mode 100644
index 0000000000000..8d6e3f6f62024
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/opp.rst
@@ -0,0 +1,341 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/opp.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+======================
+操作性能值（OPP）库
+======================
+
+(C) 2009-2010 Nishanth Menon <nm@ti.com>, 德州仪器公司
+
+.. 目录
+
+  1. 简介
+  2. OPP链表初始注册
+  3. OPP搜索函数
+  4. OPP可用性控制函数
+  5. OPP数据检索函数
+  6. 数据结构
+
+1. 简介
+=======
+
+1.1 何为操作性能值（OPP）？
+------------------------------
+
+当今复杂的单片系统（SoC）由多个子模块组成，这些子模块会联合工作。在一个执行不同用例
+的操作系统中，并不是SoC中的所有模块都需要一直以最高频率工作。为了促成这一点，SoC中
+的子模块被分组为不同域，允许一些域以较低的电压和频率运行，而其它域则以较高的“电压/
+频率对”运行。
+
+设备按域支持的由频率电压对组成的离散的元组的集合，被称为操作性能值（组），或OPPs。
+
+举例来说：
+
+让我们考虑一个支持下述频率、电压值的内存保护单元（MPU）设备：
+{300MHz，最低电压为1V}, {800MHz，最低电压为1.2V}, {1GHz，最低电压为1.3V}
+
+我们能将它们表示为3个OPP，如下述{Hz, uV}元组（译注：频率的单位是赫兹，电压的单位是
+微伏）。
+
+- {300000000, 1000000}
+- {800000000, 1200000}
+- {1000000000, 1300000}
+
+1.2 操作性能值库
+----------------
+
+OPP库提供了一组辅助函数来组织和查询OPP信息。该库位于drivers/opp/目录下，其头文件
+位于include/linux/pm_opp.h中。OPP库可以通过开启CONFIG_PM_OPP来启用。某些SoC，
+如德州仪器的OMAP框架允许在不需要cpufreq的情况下可选地在某一OPP下启动。
+
+OPP库的典型用法如下::
+
+ （用户）        -> 注册一个默认的OPP集合               -> （库）
+ （SoC框架）     -> 在必要的情况下，对某些OPP进行修改     -> OPP layer
+                -> 搜索/检索信息的查询                 ->
+
+OPP层期望每个域由一个唯一的设备指针来表示。SoC框架在OPP层为每个设备注册了一组初始
+OPP。这个链表的长度被期望是一个最优化的小数字，通常每个设备大约5个。初始链表包含了
+一个OPP集合，这个集合被期望能在系统中安全使能。
+
+关于OPP可用性的说明
+^^^^^^^^^^^^^^^^^^^
+
+随着系统的运行，SoC框架可能会基于各种外部因素选择让某些OPP在每个设备上可用或不可用，
+示例：温度管理或其它异常场景中，SoC框架可能会选择禁用一个较高频率的OPP以安全地继续
+运行，直到该OPP被重新启用（如果可能）。
+
+OPP库在它的实现中达成了这个概念。以下操作函数只能对可用的OPP使用：
+dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage,
+dev_pm_opp_get_freq, dev_pm_opp_get_opp_count。
+
+dev_pm_opp_find_freq_exact是用来查找OPP指针的，该指针可被用在dev_pm_opp_enable/
+disable函数，使一个OPP在被需要时变为可用。
+
+警告：如果对一个设备调用dev_pm_opp_enable/disable函数，OPP库的用户应该使用
+dev_pm_opp_get_opp_count来刷新OPP的可用性计数。触发这些的具体机制，或者对有依赖的
+子系统（比如cpufreq）的通知机制，都是由使用OPP库的SoC特定框架酌情处理的。在这些操作
+中，同样需要注意刷新cpufreq表。
+
+2. OPP链表初始注册
+==================
+SoC的实现会迭代调用dev_pm_opp_add函数来增加每个设备的OPP。预期SoC框架将以最优的
+方式注册OPP条目 - 典型的数字范围小于5。通过注册OPP生成的OPP链表，在整个设备运行过程
+中由OPP库维护。SoC框架随后可以使用dev_pm_opp_enable / disable函数动态地
+控制OPP的可用性。
+
+dev_pm_opp_add
+	为设备指针所指向的特定域添加一个新的OPP。OPP是用频率和电压定义的。一旦完成
+	添加，OPP被认为是可用的，可以用dev_pm_opp_enable/disable函数来控制其可用性。
+	OPP库内部用dev_pm_opp结构体存储并管理这些信息。这个函数可以被SoC框架根据SoC
+	的使用环境的需求来定义一个最优链表。
+
+	警告：
+		不要在中断上下文使用这个函数。
+
+	示例::
+
+	 soc_pm_init()
+	 {
+		/* 做一些事情 */
+		r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
+		if (!r) {
+			pr_err("%s: unable to register mpu opp(%d)\n", r);
+			goto no_cpufreq;
+		}
+		/* 做一些和cpufreq相关的事情 */
+	 no_cpufreq:
+		/* 做剩余的事情 */
+	 }
+
+3. OPP搜索函数
+==============
+cpufreq等高层框架对频率进行操作，为了将频率映射到相应的OPP，OPP库提供了便利的函数
+来搜索OPP库内部管理的OPP链表。这些搜索函数如果找到匹配的OPP，将返回指向该OPP的指针，
+否则返回错误。这些错误预计由标准的错误检查，如IS_ERR()来处理，并由调用者采取适当的
+行动。
+
+这些函数的调用者应在使用完OPP后调用dev_pm_opp_put()。否则，OPP的内存将永远不会
+被释放，并导致内存泄露。
+
+dev_pm_opp_find_freq_exact
+	根据 *精确的* 频率和可用性来搜索OPP。这个函数对默认不可用的OPP特别有用。
+	例子：在SoC框架检测到更高频率可用的情况下，它可以使用这个函数在调用
+	dev_pm_opp_enable之前找到OPP::
+
+	 opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+	 dev_pm_opp_put(opp);
+	 /* 不要操作指针.. 只是做有效性检查.. */
+	 if (IS_ERR(opp)) {
+		pr_err("frequency not disabled!\n");
+		/* 触发合适的操作.. */
+	 } else {
+		dev_pm_opp_enable(dev,1000000000);
+	 }
+
+	注意：
+	  这是唯一一个可以搜索不可用OPP的函数。
+
+dev_pm_opp_find_freq_floor
+	搜索一个 *最多* 提供指定频率的可用OPP。这个函数在搜索较小的匹配或按频率
+	递减的顺序操作OPP信息时很有用。
+	例子：要找的一个设备的最高OPP::
+
+	 freq = ULONG_MAX;
+	 opp = dev_pm_opp_find_freq_floor(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+dev_pm_opp_find_freq_ceil
+	搜索一个 *最少* 提供指定频率的可用OPP。这个函数在搜索较大的匹配或按频率
+	递增的顺序操作OPP信息时很有用。
+	例1：找到一个设备最小的OPP::
+
+	 freq = 0;
+	 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	 dev_pm_opp_put(opp);
+
+	例: 一个SoC的cpufreq_driver->target的简易实现::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* 做策略检查等操作 */
+		/* 找到和请求最接近的频率 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		dev_pm_opp_put(opp);
+		if (!IS_ERR(opp))
+			soc_switch_to_freq_voltage(freq);
+		else
+			/* 当不能满足请求时，要做的事 */
+		/* 做其它事 */
+	 }
+
+4. OPP可用性控制函数
+====================
+在OPP库中注册的默认OPP链表也许无法满足所有可能的场景。OPP库提供了一套函数来修改
+OPP链表中的某个OPP的可用性。这使得SoC框架能够精细地动态控制哪一组OPP是可用于操作
+的。设计这些函数的目的是在诸如考虑温度时 *暂时地* 删除某个OPP（例如，在温度下降
+之前不要使用某OPP）。
+
+警告：
+	不要在中断上下文使用这些函数。
+
+dev_pm_opp_enable
+	使一个OPP可用于操作。
+	例子：假设1GHz的OPP只有在SoC温度低于某个阈值时才可用。SoC框架的实现可能
+	会选择做以下事情::
+
+	 if (cur_temp < temp_low_thresh) {
+		/* 若1GHz未使能，则使能 */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+		dev_pm_opp_put(opp);
+		/* 仅仅是错误检查 */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_enable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+dev_pm_opp_disable
+	使一个OPP不可用于操作。
+	例子：假设1GHz的OPP只有在SoC温度高于某个阈值时才可用。SoC框架的实现可能
+	会选择做以下事情::
+
+	 if (cur_temp > temp_high_thresh) {
+		/* 若1GHz已使能，则关闭 */
+		opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
+		dev_pm_opp_put(opp);
+		/* 仅仅是错误检查 */
+		if (!IS_ERR(opp))
+			ret = dev_pm_opp_disable(dev, 1000000000);
+		else
+			goto try_something_else;
+	 }
+
+5. OPP数据检索函数
+==================
+由于OPP库对OPP信息进行了抽象化处理，因此需要一组函数来从dev_pm_opp结构体中提取
+信息。一旦使用搜索函数检索到一个OPP指针，以下函数就可以被SoC框架用来检索OPP层
+内部描述的信息。
+
+dev_pm_opp_get_voltage
+	检索OPP指针描述的电压。
+	例子: 当cpufreq切换到到不同频率时，SoC框架需要用稳压器框架将OPP描述
+	的电压设置到提供电压的电源管理芯片中::
+
+	 soc_switch_to_freq_voltage(freq)
+	 {
+		/* 做一些事情 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		v = dev_pm_opp_get_voltage(opp);
+		dev_pm_opp_put(opp);
+		if (v)
+			regulator_set_voltage(.., v);
+		/* 做其它事 */
+	 }
+
+dev_pm_opp_get_freq
+	检索OPP指针描述的频率。
+	例子：比方说，SoC框架使用了几个辅助函数，通过这些函数，我们可以将OPP
+	指针传入，而不是传入额外的参数，用来处理一系列数据参数::
+
+	 soc_cpufreq_target(..)
+	 {
+		/* 做一些事情.. */
+		 max_freq = ULONG_MAX;
+		 max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+		 requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
+		 if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+			r = soc_test_validity(max_opp, requested_opp);
+		 dev_pm_opp_put(max_opp);
+		 dev_pm_opp_put(requested_opp);
+		/* 做其它事 */
+	 }
+	 soc_test_validity(..)
+	 {
+		 if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
+			 return -EINVAL;
+		 if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
+			 return -EINVAL;
+		/* 做一些事情.. */
+	 }
+
+dev_pm_opp_get_opp_count
+	检索某个设备可用的OPP数量。
+	例子：假设SoC中的一个协处理器需要知道某个表中的可用频率，主处理器可以
+	按如下方式发出通知::
+
+	 soc_notify_coproc_available_frequencies()
+	 {
+		/* 做一些事情 */
+		num_available = dev_pm_opp_get_opp_count(dev);
+		speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+		/* 按升序填充表 */
+		freq = 0;
+		while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
+			speeds[i] = freq;
+			freq++;
+			i++;
+			dev_pm_opp_put(opp);
+		}
+
+		soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+		/* 做其它事 */
+	 }
+
+6. 数据结构
+===========
+通常，一个SoC包含多个可变电压域。每个域由一个设备指针描述。和OPP之间的关系可以
+按以下方式描述::
+
+  SoC
+   |- device 1
+   |	|- opp 1 (availability, freq, voltage)
+   |	|- opp 2 ..
+   ...	...
+   |	`- opp n ..
+   |- device 2
+   ...
+   `- device m
+
+OPP库维护着一个内部链表，SoC框架使用上文描述的各个函数来填充和访问。然而，描述
+真实OPP和域的结构体是OPP库自身的内部组成，以允许合适的抽象在不同系统中得到复用。
+
+struct dev_pm_opp
+	OPP库的内部数据结构，用于表示一个OPP。除了频率、电压、可用性信息外，
+	它还包含OPP库运行所需的内部统计信息。指向这个结构体的指针被提供给
+	用户（比如SoC框架）使用，在与OPP层的交互中作为OPP的标识符。
+
+	警告：
+	  结构体dev_pm_opp的指针不应该由用户解析或修改。一个实例的默认值由
+	  dev_pm_opp_add填充，但OPP的可用性由dev_pm_opp_enable/disable函数
+	  修改。
+
+struct device
+	这用于向OPP层标识一个域。设备的性质和它的实现是由OPP库的用户决定的，
+	如SoC框架。
+
+总体来说，以一个简化的视角看，对数据结构的操作可以描述为下面各图::
+
+  初始化 / 修改:
+              +-----+        /- dev_pm_opp_enable
+  dev_pm_opp_add --> | opp | <-------
+    |         +-----+        \- dev_pm_opp_disable
+    \-------> domain_info(device)
+
+  搜索函数:
+               /-- dev_pm_opp_find_freq_ceil  ---\   +-----+
+  domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+               \-- dev_pm_opp_find_freq_floor ---/   +-----+
+
+  检索函数:
+  +-----+     /- dev_pm_opp_get_voltage
+  | opp | <---
+  +-----+     \- dev_pm_opp_get_freq
+
+  domain_info <- dev_pm_opp_get_opp_count
-- 
GitLab


From bf026e2e316ba57135b70e8ce591276239c7b2cf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Jan 2022 11:50:43 +0100
Subject: [PATCH 0054/1586] Documentation: Fill the gaps about entry/noinstr
 constraints

The entry/exit handling for exceptions, interrupts, syscalls and KVM is
not really documented except for some comments.

Fill the gaps.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de
Co-developed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>

----

Changes since v3:
 - s/nointr/noinstr/

Changes since v2:
 - No big content changes, just style corrections, so it should be
   pretty clean at this stage. In the light of this, I kept Mark's
   Reviewed-by.
 - Paul's style and paragraph re-writes
 - Randy's style comments
 - Add links to transition type sections

Documentation/core-api/entry.rst | 261 +++++++++++++++++++++++++++++++
 Documentation/core-api/index.rst |   8 +
 2 files changed, 269 insertions(+)
 create mode 100644 Documentation/core-api/entry.rst

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20220110105044.94423-1-nsaenzju@redhat.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/entry.rst | 261 +++++++++++++++++++++++++++++++
 Documentation/core-api/index.rst |   8 +
 2 files changed, 269 insertions(+)
 create mode 100644 Documentation/core-api/entry.rst

diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
new file mode 100644
index 0000000000000..c6f8e22c88fe8
--- /dev/null
+++ b/Documentation/core-api/entry.rst
@@ -0,0 +1,261 @@
+Entry/exit handling for exceptions, interrupts, syscalls and KVM
+================================================================
+
+All transitions between execution domains require state updates which are
+subject to strict ordering constraints. State updates are required for the
+following:
+
+  * Lockdep
+  * RCU / Context tracking
+  * Preemption counter
+  * Tracing
+  * Time accounting
+
+The update order depends on the transition type and is explained below in
+the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
+exceptions`_, `NMI and NMI-like exceptions`_.
+
+Non-instrumentable code - noinstr
+---------------------------------
+
+Most instrumentation facilities depend on RCU, so intrumentation is prohibited
+for entry code before RCU starts watching and exit code after RCU stops
+watching. In addition, many architectures must save and restore register state,
+which means that (for example) a breakpoint in the breakpoint entry code would
+overwrite the debug registers of the initial breakpoint.
+
+Such code must be marked with the 'noinstr' attribute, placing that code into a
+special section inaccessible to instrumentation and debug facilities. Some
+functions are partially instrumentable, which is handled by marking them
+noinstr and using instrumentation_begin() and instrumentation_end() to flag the
+instrumentable ranges of code:
+
+.. code-block:: c
+
+  noinstr void entry(void)
+  {
+  	handle_entry();     // <-- must be 'noinstr' or '__always_inline'
+	...
+
+	instrumentation_begin();
+	handle_context();   // <-- instrumentable code
+	instrumentation_end();
+
+	...
+	handle_exit();      // <-- must be 'noinstr' or '__always_inline'
+  }
+
+This allows verification of the 'noinstr' restrictions via objtool on
+supported architectures.
+
+Invoking non-instrumentable functions from instrumentable context has no
+restrictions and is useful to protect e.g. state switching which would
+cause malfunction if instrumented.
+
+All non-instrumentable entry/exit code sections before and after the RCU
+state transitions must run with interrupts disabled.
+
+Syscalls
+--------
+
+Syscall-entry code starts in assembly code and calls out into low-level C code
+after establishing low-level architecture-specific state and stack frames. This
+low-level C code must not be instrumented. A typical syscall handling function
+invoked from low-level assembly code looks like this:
+
+.. code-block:: c
+
+  noinstr void syscall(struct pt_regs *regs, int nr)
+  {
+	arch_syscall_enter(regs);
+	nr = syscall_enter_from_user_mode(regs, nr);
+
+	instrumentation_begin();
+	if (!invoke_syscall(regs, nr) && nr != -1)
+	 	result_reg(regs) = __sys_ni_syscall(regs);
+	instrumentation_end();
+
+	syscall_exit_to_user_mode(regs);
+  }
+
+syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
+establishes state in the following order:
+
+  * Lockdep
+  * RCU / Context tracking
+  * Tracing
+
+and then invokes the various entry work functions like ptrace, seccomp, audit,
+syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
+function can be invoked. The instrumentable code section then ends, after which
+syscall_exit_to_user_mode() is invoked.
+
+syscall_exit_to_user_mode() handles all work which needs to be done before
+returning to user space like tracing, audit, signals, task work etc. After
+that it invokes exit_to_user_mode() which again handles the state
+transition in the reverse order:
+
+  * Tracing
+  * RCU / Context tracking
+  * Lockdep
+
+syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
+available as fine grained subfunctions in cases where the architecture code
+has to do extra work between the various steps. In such cases it has to
+ensure that enter_from_user_mode() is called first on entry and
+exit_to_user_mode() is called last on exit.
+
+
+KVM
+---
+
+Entering or exiting guest mode is very similar to syscalls. From the host
+kernel point of view the CPU goes off into user space when entering the
+guest and returns to the kernel on exit.
+
+kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
+and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
+The state operations have the same ordering.
+
+Task work handling is done separately for guest at the boundary of the
+vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
+the work handled on return to user space.
+
+Interrupts and regular exceptions
+---------------------------------
+
+Interrupts entry and exit handling is slightly more complex than syscalls
+and KVM transitions.
+
+If an interrupt is raised while the CPU executes in user space, the entry
+and exit handling is exactly the same as for syscalls.
+
+If the interrupt is raised while the CPU executes in kernel space the entry and
+exit handling is slightly different. RCU state is only updated when the
+interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
+already be watching. Lockdep and tracing have to be updated unconditionally.
+
+irqentry_enter() and irqentry_exit() provide the implementation for this.
+
+The architecture-specific part looks similar to syscall handling:
+
+.. code-block:: c
+
+  noinstr void interrupt(struct pt_regs *regs, int nr)
+  {
+	arch_interrupt_enter(regs);
+	state = irqentry_enter(regs);
+
+	instrumentation_begin();
+
+	irq_enter_rcu();
+	invoke_irq_handler(regs, nr);
+	irq_exit_rcu();
+
+	instrumentation_end();
+
+	irqentry_exit(regs, state);
+  }
+
+Note that the invocation of the actual interrupt handler is within a
+irq_enter_rcu() and irq_exit_rcu() pair.
+
+irq_enter_rcu() updates the preemption count which makes in_hardirq()
+return true, handles NOHZ tick state and interrupt time accounting. This
+means that up to the point where irq_enter_rcu() is invoked in_hardirq()
+returns false.
+
+irq_exit_rcu() handles interrupt time accounting, undoes the preemption
+count update and eventually handles soft interrupts and NOHZ tick state.
+
+In theory, the preemption count could be updated in irqentry_enter(). In
+practice, deferring this update to irq_enter_rcu() allows the preemption-count
+code to be traced, while also maintaining symmetry with irq_exit_rcu() and
+irqentry_exit(), which are described in the next paragraph. The only downside
+is that the early entry code up to irq_enter_rcu() must be aware that the
+preemption count has not yet been updated with the HARDIRQ_OFFSET state.
+
+Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
+before it handles soft interrupts, whose handlers must run in BH context rather
+than irq-disabled context. In addition, irqentry_exit() might schedule, which
+also requires that HARDIRQ_OFFSET has been removed from the preemption count.
+
+NMI and NMI-like exceptions
+---------------------------
+
+NMIs and NMI-like exceptions (machine checks, double faults, debug
+interrupts, etc.) can hit any context and must be extra careful with
+the state.
+
+State changes for debug exceptions and machine-check exceptions depend on
+whether these exceptions happened in user-space (breakpoints or watchpoints) or
+in kernel mode (code patching). From user-space, they are treated like
+interrupts, while from kernel mode they are treated like NMIs.
+
+NMIs and other NMI-like exceptions handle state transitions without
+distinguishing between user-mode and kernel-mode origin.
+
+The state update on entry is handled in irqentry_nmi_enter() which updates
+state in the following order:
+
+  * Preemption counter
+  * Lockdep
+  * RCU / Context tracking
+  * Tracing
+
+The exit counterpart irqentry_nmi_exit() does the reverse operation in the
+reverse order.
+
+Note that the update of the preemption counter has to be the first
+operation on enter and the last operation on exit. The reason is that both
+lockdep and RCU rely on in_nmi() returning true in this case. The
+preemption count modification in the NMI entry/exit case must not be
+traced.
+
+Architecture-specific code looks like this:
+
+.. code-block:: c
+
+  noinstr void nmi(struct pt_regs *regs)
+  {
+	arch_nmi_enter(regs);
+	state = irqentry_nmi_enter(regs);
+
+	instrumentation_begin();
+	nmi_handler(regs);
+	instrumentation_end();
+
+	irqentry_nmi_exit(regs);
+  }
+
+and for e.g. a debug exception it can look like this:
+
+.. code-block:: c
+
+  noinstr void debug(struct pt_regs *regs)
+  {
+	arch_nmi_enter(regs);
+
+	debug_regs = save_debug_regs();
+
+	if (user_mode(regs)) {
+		state = irqentry_enter(regs);
+
+		instrumentation_begin();
+		user_mode_debug_handler(regs, debug_regs);
+		instrumentation_end();
+
+		irqentry_exit(regs, state);
+  	} else {
+  		state = irqentry_nmi_enter(regs);
+
+		instrumentation_begin();
+		kernel_mode_debug_handler(regs, debug_regs);
+		instrumentation_end();
+
+		irqentry_nmi_exit(regs, state);
+	}
+  }
+
+There is no combined irqentry_nmi_if_kernel() function available as the
+above cannot be handled in an exception-agnostic way.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 5de2c7a4b1b3c..972d46a5ddf62 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
    timekeeping
    errseq
 
+Low level entry and exit
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   entry
+
 Concurrency primitives
 ======================
 
-- 
GitLab


From e3aa43e936d854373d9a75372aefcefebfca208f Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Date: Mon, 10 Jan 2022 11:50:44 +0100
Subject: [PATCH 0055/1586] Documentation: core-api: entry: Add comments about
 nesting

The topic of nesting and reentrancy in the context of early entry code
hasn't been addressed so far. So do it.

Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20220110105044.94423-2-nsaenzju@redhat.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/entry.rst | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
index c6f8e22c88fe8..e12f22ab33c7b 100644
--- a/Documentation/core-api/entry.rst
+++ b/Documentation/core-api/entry.rst
@@ -105,6 +105,8 @@ has to do extra work between the various steps. In such cases it has to
 ensure that enter_from_user_mode() is called first on entry and
 exit_to_user_mode() is called last on exit.
 
+Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
+to print a warning.
 
 KVM
 ---
@@ -121,6 +123,8 @@ Task work handling is done separately for guest at the boundary of the
 vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
 the work handled on return to user space.
 
+Do not nest KVM entry/exit transitions because doing so is nonsensical.
+
 Interrupts and regular exceptions
 ---------------------------------
 
@@ -180,6 +184,16 @@ before it handles soft interrupts, whose handlers must run in BH context rather
 than irq-disabled context. In addition, irqentry_exit() might schedule, which
 also requires that HARDIRQ_OFFSET has been removed from the preemption count.
 
+Even though interrupt handlers are expected to run with local interrupts
+disabled, interrupt nesting is common from an entry/exit perspective. For
+example, softirq handling happens within an irqentry_{enter,exit}() block with
+local interrupts enabled. Also, although uncommon, nothing prevents an
+interrupt handler from re-enabling interrupts.
+
+Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
+runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
+the entry code is shared between the two.
+
 NMI and NMI-like exceptions
 ---------------------------
 
@@ -259,3 +273,7 @@ and for e.g. a debug exception it can look like this:
 
 There is no combined irqentry_nmi_if_kernel() function available as the
 above cannot be handled in an exception-agnostic way.
+
+NMIs can happen in any context. For example, an NMI-like exception triggered
+while handling an NMI. So NMI entry code has to be reentrant and state updates
+need to handle nesting.
-- 
GitLab


From 9c3519d2b50968ded1373e872fcc34ca3d748007 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 11 Jan 2022 10:13:14 +0800
Subject: [PATCH 0056/1586] docs/zh_CN: add vm/index translation

Translate ../vm/index.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/60c579b34792c4c76194c4843a695263a982b37d.1641866889.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst    |  2 +-
 Documentation/translations/zh_CN/vm/index.rst | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/index.rst

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 08144252630c3..f831887eacfb1 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -113,6 +113,7 @@ TODOList:
    sound/index
    filesystems/index
    scheduler/index
+   vm/index
 
 TODOList:
 
@@ -140,7 +141,6 @@ TODOList:
 * gpu/index
 * security/index
 * crypto/index
-* vm/index
 * bpf/index
 * usb/index
 * PCI/index
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
new file mode 100644
index 0000000000000..0fda40c602ac8
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -0,0 +1,52 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================
+Linux内存管理文档
+=================
+
+这是一个关于Linux内存管理（mm）子系统内部的文档集，其中有不同层次的细节，包括注释
+和邮件列表的回复，用于阐述数据结构和算法的基本情况。如果你正在寻找关于简单分配内存的建
+议，请参阅(Documentation/translations/zh_CN/core-api/memory-allocation.rst)。
+对于控制和调整指南，请参阅(Documentation/admin-guide/mm/index)。
+TODO:待引用文档集被翻译完毕后请及时修改此处）
+
+.. toctree::
+   :maxdepth: 1
+
+
+TODOLIST:
+* active_mm
+* arch_pgtable_helpers
+* balance
+* damon/index
+* free_page_reporting
+* frontswap
+* highmem
+* hmm
+* hwpoison
+* hugetlbfs_reserv
+* ksm
+* memory-model
+* mmu_notifier
+* numa
+* overcommit-accounting
+* page_migration
+* page_frags
+* page_owner
+* page_table_check
+* remap_file_pages
+* slub
+* split_page_table_lock
+* transhuge
+* unevictable-lru
+* vmalloced-kernel-stacks
+* z3fold
+* zsmalloc
-- 
GitLab


From 6f5dbb213c140f04fc6d49b70b178ee11333c5f3 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 11 Jan 2022 10:13:15 +0800
Subject: [PATCH 0057/1586] docs_zh_CN: add active_mm translation

Translatr .../vm/active_mm.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Tang Yizhou <tangyizhou@huawei.com>
Link: https://lore.kernel.org/r/99ba014bdd9550bad57db6c21653cb7314d7c2d8.1641866889.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/active_mm.rst       | 85 +++++++++++++++++++
 Documentation/translations/zh_CN/vm/index.rst |  2 +-
 2 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/active_mm.rst

diff --git a/Documentation/translations/zh_CN/vm/active_mm.rst b/Documentation/translations/zh_CN/vm/active_mm.rst
new file mode 100644
index 0000000000000..366609ea4f375
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/active_mm.rst
@@ -0,0 +1,85 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/active_mm.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=========
+Active MM
+=========
+
+这是一封linux之父回复开发者的一封邮件，所以翻译时我尽量保持邮件格式的完整。
+
+::
+
+ List:       linux-kernel
+ Subject:    Re: active_mm
+ From:       Linus Torvalds <torvalds () transmeta ! com>
+ Date:       1999-07-30 21:36:24
+
+ 因为我并不经常写解释，所以已经抄送到linux-kernel邮件列表，而当我做这些，
+ 且更多的人在阅读它们时，我觉得棒极了。
+
+ 1999年7月30日 星期五， David Mosberger 写道：
+ >
+ > 是否有一个简短的描述，说明task_struct中的
+ >  "mm" 和 "active_mm"应该如何使用？ (如果
+ > 这个问题在邮件列表中讨论过，我表示歉意--我刚
+ > 刚度假回来，有一段时间没能关注linux-kernel了）。
+
+ 基本上，新的设定是：
+
+  - 我们有“真实地址空间”和“匿名地址空间”。区别在于，匿名地址空间根本不关心用
+    户级页表，所以当我们做上下文切换到匿名地址空间时，我们只是让以前的地址空间
+    处于活动状态。
+
+    一个“匿名地址空间”的明显用途是任何不需要任何用户映射的线程--所有的内核线
+    程基本上都属于这一类，但即使是“真正的”线程也可以暂时说在一定时间内它们不
+    会对用户空间感兴趣，调度器不妨试着避免在切换VM状态上浪费时间。目前只有老
+    式的bdflush sync能做到这一点。
+
+  - “tsk->mm” 指向 “真实地址空间”。对于一个匿名进程来说，tsk->mm将是NULL，
+    其逻辑原因是匿名进程实际上根本就 “没有” 真正的地址空间。
+
+  - 然而，我们显然需要跟踪我们为这样的匿名用户“偷用”了哪个地址空间。为此，我们
+    有 “tsk->active_mm”，它显示了当前活动的地址空间是什么。
+
+    规则是，对于一个有真实地址空间的进程（即tsk->mm是 non-NULL），active_mm
+    显然必须与真实的mm相同。
+
+    对于一个匿名进程，tsk->mm == NULL，而tsk->active_mm是匿名进程运行时
+    “借用”的mm。当匿名进程被调度走时，借用的地址空间被返回并清除。
+
+ 为了支持所有这些，“struct mm_struct”现在有两个计数器：一个是 “mm_users”
+ 计数器，即有多少 “真正的地址空间用户”，另一个是 “mm_count”计数器，即 “lazy”
+ 用户（即匿名用户）的数量，如果有任何真正的用户，则加1。
+
+ 通常情况下，至少有一个真正的用户，但也可能是真正的用户在另一个CPU上退出，而
+ 一个lazy的用户仍在活动，所以你实际上得到的情况是，你有一个地址空间 **只**
+ 被lazy的用户使用。这通常是一个短暂的生命周期状态，因为一旦这个线程被安排给一
+ 个真正的线程，这个 “僵尸” mm就会被释放，因为 “mm_count”变成了零。
+
+ 另外，一个新的规则是，**没有人** 再把 “init_mm” 作为一个真正的MM了。
+ “init_mm”应该被认为只是一个 “没有其他上下文时的lazy上下文”，事实上，它主
+ 要是在启动时使用，当时还没有真正的VM被创建。因此，用来检查的代码
+
+   if (current->mm == &init_mm)
+
+ 一般来说，应该用
+
+   if (!current->mm)
+
+ 取代上面的写法（这更有意义--测试基本上是 “我们是否有一个用户环境”，并且通常
+ 由缺页异常处理程序和类似的东西来完成）。
+
+ 总之，我刚才在ftp.kernel.org上放了一个pre-patch-2.3.13-1，因为它稍微改
+ 变了接口以适配alpha（谁会想到呢，但alpha体系结构上下文切换代码实际上最终是
+ 最丑陋的之一--不像其他架构的MM和寄存器状态是分开的，alpha的PALcode将两者
+ 连接起来，你需要同时切换两者）。
+
+ (文档来源 http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index 0fda40c602ac8..5fcdb75d8682e 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -21,9 +21,9 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
 .. toctree::
    :maxdepth: 1
 
+   active_mm
 
 TODOLIST:
-* active_mm
 * arch_pgtable_helpers
 * balance
 * damon/index
-- 
GitLab


From 88ba790d84e98b6f030c8efc8d3e9f042c0ab777 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 11 Jan 2022 10:13:16 +0800
Subject: [PATCH 0058/1586] docs/zh_CN: add balance translation

Translate .../vm/balance.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Tang Yizhou <tangyizhou@huawei.com>
Link: https://lore.kernel.org/r/f1e5a6bcbee3e34fe65a56ee185d9b44daf01cab.1641866889.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/balance.rst         | 81 +++++++++++++++++++
 Documentation/translations/zh_CN/vm/index.rst |  2 +-
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/balance.rst

diff --git a/Documentation/translations/zh_CN/vm/balance.rst b/Documentation/translations/zh_CN/vm/balance.rst
new file mode 100644
index 0000000000000..e98a47ef24a8f
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/balance.rst
@@ -0,0 +1,81 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/balance.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+内存平衡
+========
+
+2000年1月开始，作者：Kanoj Sarcar <kanoj@sgi.com>
+
+对于 !__GFP_HIGH 和 !__GFP_KSWAPD_RECLAIM 以及非 __GFP_IO 的分配，需要进行
+内存平衡。
+
+调用者避免回收的第一个原因是调用者由于持有自旋锁或处于中断环境中而无法睡眠。第二个
+原因可能是，调用者愿意在不产生页面回收开销的情况下分配失败。这可能发生在有0阶回退
+选项的机会主义高阶分配请求中。在这种情况下，调用者可能也希望避免唤醒kswapd。
+
+__GFP_IO分配请求是为了防止文件系统死锁。
+
+在没有非睡眠分配请求的情况下，做平衡似乎是有害的。页面回收可以被懒散地启动，也就是
+说，只有在需要的时候（也就是区域的空闲内存为0），而不是让它成为一个主动的过程。
+
+也就是说，内核应该尝试从直接映射池中满足对直接映射页的请求，而不是回退到dma池中，
+这样就可以保持dma池为dma请求（不管是不是原子的）所填充。类似的争论也适用于高内存
+和直接映射的页面。相反，如果有很多空闲的dma页，最好是通过从dma池中分配一个来满足
+常规的内存请求，而不是产生常规区域平衡的开销。
+
+在2.2中，只有当空闲页总数低于总内存的1/64时，才会启动内存平衡/页面回收。如果dma
+和常规内存的比例合适，即使dma区完全空了，也很可能不会进行平衡。2.2已经在不同内存
+大小的生产机器上运行，即使有这个问题存在，似乎也做得不错。在2.3中，由于HIGHMEM的
+存在，这个问题变得更加严重。
+
+在2.3中，区域平衡可以用两种方式之一来完成：根据区域的大小（可能是低级区域的大小），
+我们可以在初始化阶段决定在平衡任何区域时应该争取多少空闲页。好的方面是，在平衡的时
+候，我们不需要看低级区的大小，坏的方面是，我们可能会因为忽略低级区可能较低的使用率
+而做过于频繁的平衡。另外，只要对分配程序稍作修改，就有可能将memclass()宏简化为一
+个简单的等式。
+
+另一个可能的解决方案是，我们只在一个区 **和** 其所有低级区的空闲内存低于该区及其
+低级区总内存的1/64时进行平衡。这就解决了2.2的平衡问题，并尽可能地保持了与2.2行为
+的接近。另外，平衡算法在各种架构上的工作方式也是一样的，这些架构有不同数量和类型的
+内存区。如果我们想变得更花哨一点，我们可以在未来为不同区域的自由页面分配不同的权重。
+
+请注意，如果普通区的大小与dma区相比是巨大的，那么在决定是否平衡普通区的时候，考虑
+空闲的dma页就变得不那么重要了。那么第一个解决方案就变得更有吸引力。
+
+所附的补丁实现了第二个解决方案。它还 “修复”了两个问题：首先，在低内存条件下，kswapd
+被唤醒，就像2.2中的非睡眠分配。第二，HIGHMEM区也被平衡了，以便给replace_with_highmem()
+一个争取获得HIGHMEM页的机会，同时确保HIGHMEM分配不会落回普通区。这也确保了HIGHMEM
+页不会被泄露（例如，在一个HIGHMEM页在交换缓存中但没有被任何人使用的情况下）。
+
+kswapd还需要知道它应该平衡哪些区。kswapd主要是在无法进行平衡的情况下需要的，可能
+是因为所有的分配请求都来自中断上下文，而所有的进程上下文都在睡眠。对于2.3，
+kswapd并不真正需要平衡高内存区，因为中断上下文并不请求高内存页。kswapd看zone
+结构体中的zone_wake_kswapd字段来决定一个区是否需要平衡。
+
+如果从进程内存和shm中偷取页面可以减轻该页面节点中任何区的内存压力，而该区的内存压力
+已经低于其水位，则会进行偷取。
+
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd：
+这些是每个区的字段，用于确定一个区何时需要平衡。当页面数低于水位[WMARK_MIN]时，
+hysteric 的字段low_on_memory被设置。这个字段会一直被设置，直到空闲页数变成水位
+[WMARK_HIGH]。当low_on_memory被设置时，页面分配请求将尝试释放该区域的一些页面（如果
+请求中设置了GFP_WAIT）。与此相反的是，决定唤醒kswapd以释放一些区的页。这个决定不是基于
+hysteresis 的，而是当空闲页的数量低于watermark[WMARK_LOW]时就会进行；在这种情况下，
+zone_wake_kswapd也被设置。
+
+
+我所听到的（超棒的）想法：
+
+1. 动态经历应该影响平衡：可以跟踪一个区的失败请求的数量，并反馈到平衡方案中（jalvo@mbay.net）。
+
+2. 实现一个类似于replace_with_highmem()的replace_with_regular()，以保留dma页面。
+   (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index 5fcdb75d8682e..fc39f0d60318e 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -22,10 +22,10 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
    :maxdepth: 1
 
    active_mm
+   balance
 
 TODOLIST:
 * arch_pgtable_helpers
-* balance
 * damon/index
 * free_page_reporting
 * frontswap
-- 
GitLab


From 2701b511e491f95d829a340c2540d602db5c63d8 Mon Sep 17 00:00:00 2001
From: wangyong <wang.yong12@zte.com.cn>
Date: Wed, 12 Jan 2022 08:04:42 -0800
Subject: [PATCH 0059/1586] docs/zh_CN: Update
 zh_CN/accounting/delay-accounting.rst

Update zh_CN/accounting/delay-accounting.rst.
The document modification has been merged which refers to the following link:
https://lore.kernel.org/all/1639583021-92977-1-git-send-email-wang.yong12@zte.com.cn/

Signed-off-by: wangyong <wang.yong12@zte.com.cn>
Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/1642003482-48935-1-git-send-email-wang.yong12@zte.com.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/accounting/delay-accounting.rst     | 62 +++++++++----------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
index 67d5606e54010..f1849411018e9 100644
--- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst
+++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst
@@ -17,6 +17,8 @@ a) 等待一个CPU（任务为可运行）
 b) 完成由该任务发起的块I/O同步请求
 c) 页面交换
 d) 内存回收
+e) 页缓存抖动
+f) 直接规整
 
 并将这些统计信息通过taskstats接口提供给用户空间。
 
@@ -37,10 +39,10 @@ d) 内存回收
 向用户态返回一个通用数据结构，对应每pid或每tgid的统计信息。延时计数功能填写
 该数据结构的特定字段。见
 
-     include/linux/taskstats.h
+     include/uapi/linux/taskstats.h
 
 其描述了延时计数相关字段。系统通常以计数器形式返回 CPU、同步块 I/O、交换、内存
-回收等的累积延时。
+回收、页缓存抖动、直接规整等的累积延时。
 
 取任务某计数器两个连续读数的差值，将得到任务在该时间间隔内等待对应资源的总延时。
 
@@ -72,40 +74,36 @@ kernel.task_delayacct进行开关。注意，只有在启用延时计数后启
 
 getdelays命令的一般格式::
 
-	getdelays [-t tgid] [-p pid] [-c cmd...]
+	getdelays [-dilv] [-t tgid] [-p pid]
 
 获取pid为10的任务从系统启动后的延时信息::
 
-	# ./getdelays -p 10
+	# ./getdelays -d -p 10
 	（输出信息和下例相似）
 
 获取所有tgid为5的任务从系统启动后的总延时信息::
 
-	# ./getdelays -t 5
-
-
-	CPU	count	real total	virtual total	delay total
-		7876	92005750	100000000	24001500
-	IO	count	delay total
-		0	0
-	SWAP	count	delay total
-		0	0
-	RECLAIM	count	delay total
-		0	0
-
-获取指定简单命令运行时的延时信息::
-
-  # ./getdelays -c ls /
-
-  bin   data1  data3  data5  dev  home  media  opt   root  srv        sys  usr
-  boot  data2  data4  data6  etc  lib   mnt    proc  sbin  subdomain  tmp  var
-
-
-  CPU	count	real total	virtual total	delay total
-	6	4000250		4000000		0
-  IO	count	delay total
-	0	0
-  SWAP	count	delay total
-	0	0
-  RECLAIM	count	delay total
-	0	0
+	# ./getdelays -d -t 5
+	print delayacct stats ON
+	TGID	5
+
+
+	CPU             count     real total  virtual total    delay total  delay average
+	                    8        7000000        6872122        3382277          0.423ms
+	IO              count    delay total  delay average
+	                    0              0              0ms
+	SWAP            count    delay total  delay average
+	                    0              0              0ms
+	RECLAIM         count    delay total  delay average
+	                    0              0              0ms
+	THRASHING       count    delay total  delay average
+	                    0              0              0ms
+	COMPACT         count    delay total  delay average
+	                    0              0              0ms
+
+获取pid为1的IO计数，它只和-p一起使用::
+	# ./getdelays -i -p 1
+	printing IO accounting
+	linuxrc: read=65536, write=0, cancelled_write=0
+
+上面的命令与-v一起使用，可以获取更多调试信息。
-- 
GitLab


From e2d99027da08a816ba63d0b8335dbde0ec1152c6 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Sat, 15 Jan 2022 15:15:00 +0800
Subject: [PATCH 0060/1586] docs/zh_CN: add damon index tronslation

1) Translate .../vm/damon/index.rst into Chinese.
2) add damon into .../zh_CN/vm/index.rst

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/8e8c7651785f1ce20766bc1b3a4fc44faedb84bb.1642230669.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/damon/index.rst     | 33 +++++++++++++++++++
 Documentation/translations/zh_CN/vm/index.rst |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/damon/index.rst

diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
new file mode 100644
index 0000000000000..9a7b4ea91cb82
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -0,0 +1,33 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+==========================
+DAMON:数据访问监视器
+==========================
+
+DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心机制使其成为
+（该核心机制详见(Documentation/translations/zh_CN/vm/damon/design.rst)）
+
+ - *准确度* （监测输出对DRAM级别的内存管理足够有用；但可能不适合CPU Cache级别），
+ - *轻量级* （监控开销低到可以在线应用），以及
+ - *可扩展* （无论目标工作负载的大小，开销的上限值都在恒定范围内）。
+
+因此，利用这个框架，内核的内存管理机制可以做出高级决策。会导致高数据访问监控开销的实
+验性内存管理优化工作可以再次进行。同时，在用户空间，有一些特殊工作负载的用户可以编写
+个性化的应用程序，以便更好地了解和优化他们的工作负载和系统。
+
+.. toctree::
+   :maxdepth: 2
+
+TODOLIST:
+*   faq
+*   design
+*   api
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index fc39f0d60318e..432bc3ef1c18c 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -23,10 +23,10 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
 
    active_mm
    balance
+   damon/index
 
 TODOLIST:
 * arch_pgtable_helpers
-* damon/index
 * free_page_reporting
 * frontswap
 * highmem
-- 
GitLab


From 3fd8816219311289eaec2eb9bc389146a553fe4e Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Sat, 15 Jan 2022 15:15:01 +0800
Subject: [PATCH 0061/1586] docs/zh_CN: add damon faq translation

Translate .../vm/damon/faq.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/30507f807a835360f57bb9498c37f4c3644b33b7.1642230669.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/damon/faq.rst       | 48 +++++++++++++++++++
 .../translations/zh_CN/vm/damon/index.rst     |  4 +-
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/damon/faq.rst

diff --git a/Documentation/translations/zh_CN/vm/damon/faq.rst b/Documentation/translations/zh_CN/vm/damon/faq.rst
new file mode 100644
index 0000000000000..07b4ac19407de
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/faq.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/faq.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+========
+常见问题
+========
+
+为什么是一个新的子系统，而不是扩展perf或其他用户空间工具？
+==========================================================
+
+首先，因为它需要尽可能的轻量级，以便可以在线使用，所以应该避免任何不必要的开销，如内核-用户
+空间的上下文切换成本。第二，DAMON的目标是被包括内核在内的其他程序所使用。因此，对特定工具
+（如perf）的依赖性是不可取的。这就是DAMON在内核空间实现的两个最大的原因。
+
+
+“闲置页面跟踪” 或 “perf mem” 可以替代DAMON吗？
+==============================================
+
+闲置页跟踪是物理地址空间访问检查的一个低层次的原始方法。“perf mem”也是类似的，尽管它可以
+使用采样来减少开销。另一方面，DAMON是一个更高层次的框架，用于监控各种地址空间。它专注于内
+存管理优化，并提供复杂的精度/开销处理机制。因此，“空闲页面跟踪” 和 “perf mem” 可以提供
+DAMON输出的一个子集，但不能替代DAMON。
+
+
+DAMON是否只支持虚拟内存？
+=========================
+
+不，DAMON的核心是独立于地址空间的。用户可以在DAMON核心上实现和配置特定地址空间的低级原始
+部分，包括监测目标区域的构造和实际的访问检查。通过这种方式，DAMON用户可以用任何访问检查技
+术来监测任何地址空间。
+
+尽管如此，DAMON默认为虚拟内存和物理内存提供了基于vma/rmap跟踪和PTE访问位检查的地址空间
+相关功能的实现，以供参考和方便使用。
+
+
+我可以简单地监测页面的粒度吗？
+==============================
+
+是的，你可以通过设置 ``min_nr_regions`` 属性高于工作集大小除以页面大小的值来实现。
+因为监视目标区域的大小被强制为 ``>=page size`` ，所以区域分割不会产生任何影响。
diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
index 9a7b4ea91cb82..dfa82244cbe8c 100644
--- a/Documentation/translations/zh_CN/vm/damon/index.rst
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -27,7 +27,9 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心
 .. toctree::
    :maxdepth: 2
 
+   faq
+
 TODOLIST:
-*   faq
+
 *   design
 *   api
-- 
GitLab


From 18e74934dc4a65ff1dfb24b83f9778ded0f247dd Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Sat, 15 Jan 2022 15:15:02 +0800
Subject: [PATCH 0062/1586] docs/zh_CN: add damon design translation

Translate .../vm/damon/design.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/2685ed7d446620b260c20158685728c3adb5e0fe.1642230669.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/damon/design.rst    | 139 ++++++++++++++++++
 .../translations/zh_CN/vm/damon/index.rst     |   2 +-
 2 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/damon/design.rst

diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst
new file mode 100644
index 0000000000000..05f66c02740a9
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/design.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/design.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+====
+设计
+====
+
+可配置的层
+==========
+
+DAMON提供了数据访问监控功能，同时使其准确性和开销可控。基本的访问监控需要依赖于目标地址空间
+并为之优化的基元。另一方面，作为DAMON的核心，准确性和开销的权衡机制是在纯逻辑空间中。DAMON
+将这两部分分离在不同的层中，并定义了它的接口，以允许各种低层次的基元实现与核心逻辑的配置。
+
+由于这种分离的设计和可配置的接口，用户可以通过配置核心逻辑和适当的低级基元实现来扩展DAMON的
+任何地址空间。如果没有提供合适的，用户可以自己实现基元。
+
+例如，物理内存、虚拟内存、交换空间、那些特定的进程、NUMA节点、文件和支持的内存设备将被支持。
+另外，如果某些架构或设备支持特殊的优化访问检查基元，这些基元将很容易被配置。
+
+
+特定地址空间基元的参考实现
+==========================
+
+基本访问监测的低级基元被定义为两部分。:
+
+1. 确定地址空间的监测目标地址范围
+2. 目标空间中特定地址范围的访问检查。
+
+DAMON目前为物理和虚拟地址空间提供了基元的实现。下面两个小节描述了这些工作的方式。
+
+
+基于VMA的目标地址范围构造
+-------------------------
+
+这仅仅是针对虚拟地址空间基元的实现。对于物理地址空间，只是要求用户手动设置监控目标地址范围。
+
+在进程的超级巨大的虚拟地址空间中，只有小部分被映射到物理内存并被访问。因此，跟踪未映射的地
+址区域只是一种浪费。然而，由于DAMON可以使用自适应区域调整机制来处理一定程度的噪声，所以严
+格来说，跟踪每一个映射并不是必须的，但在某些情况下甚至会产生很高的开销。也就是说，监测目标
+内部过于巨大的未映射区域应该被移除，以不占用自适应机制的时间。
+
+出于这个原因，这个实现将复杂的映射转换为三个不同的区域，覆盖地址空间的每个映射区域。这三个
+区域之间的两个空隙是给定地址空间中两个最大的未映射区域。这两个最大的未映射区域是堆和最上面
+的mmap()区域之间的间隙，以及在大多数情况下最下面的mmap()区域和堆之间的间隙。因为这些间隙
+在通常的地址空间中是异常巨大的，排除这些间隙就足以做出合理的权衡。下面详细说明了这一点::
+
+    <heap>
+    <BIG UNMAPPED REGION 1>
+    <uppermost mmap()-ed region>
+    (small mmap()-ed regions and munmap()-ed regions)
+    <lowermost mmap()-ed region>
+    <BIG UNMAPPED REGION 2>
+    <stack>
+
+
+基于PTE访问位的访问检查
+-----------------------
+
+物理和虚拟地址空间的实现都使用PTE Accessed-bit进行基本访问检查。唯一的区别在于从地址中
+找到相关的PTE访问位的方式。虚拟地址的实现是为该地址的目标任务查找页表，而物理地址的实现则
+是查找与该地址有映射关系的每一个页表。通过这种方式，实现者找到并清除下一个采样目标地址的位，
+并检查该位是否在一个采样周期后再次设置。这可能会干扰其他使用访问位的内核子系统，即空闲页跟
+踪和回收逻辑。为了避免这种干扰，DAMON使其与空闲页面跟踪相互排斥，并使用 ``PG_idle`` 和
+``PG_young`` 页面标志来解决与回收逻辑的冲突，就像空闲页面跟踪那样。
+
+
+独立于地址空间的核心机制
+========================
+
+下面四个部分分别描述了DAMON的核心机制和五个监测属性，即 ``采样间隔`` 、 ``聚集间隔`` 、
+``区域更新间隔`` 、 ``最小区域数`` 和 ``最大区域数`` 。
+
+
+访问频率监测
+------------
+
+DAMON的输出显示了在给定的时间内哪些页面的访问频率是多少。访问频率的分辨率是通过设置
+``采样间隔`` 和 ``聚集间隔`` 来控制的。详细地说，DAMON检查每个 ``采样间隔`` 对每
+个页面的访问，并将结果汇总。换句话说，计算每个页面的访问次数。在每个 ``聚合间隔`` 过
+去后，DAMON调用先前由用户注册的回调函数，以便用户可以阅读聚合的结果，然后再清除这些结
+果。这可以用以下简单的伪代码来描述::
+
+    while monitoring_on:
+        for page in monitoring_target:
+            if accessed(page):
+                nr_accesses[page] += 1
+        if time() % aggregation_interval == 0:
+            for callback in user_registered_callbacks:
+                callback(monitoring_target, nr_accesses)
+            for page in monitoring_target:
+                nr_accesses[page] = 0
+        sleep(sampling interval)
+
+这种机制的监测开销将随着目标工作负载规模的增长而任意增加。
+
+
+基于区域的抽样调查
+------------------
+
+为了避免开销的无限制增加，DAMON将假定具有相同访问频率的相邻页面归入一个区域。只要保持
+这个假设（一个区域内的页面具有相同的访问频率），该区域内就只需要检查一个页面。因此，对
+于每个 ``采样间隔`` ，DAMON在每个区域中随机挑选一个页面，等待一个 ``采样间隔`` ，检
+查该页面是否同时被访问，如果被访问则增加该区域的访问频率。因此，监测开销是可以通过设置
+区域的数量来控制的。DAMON允许用户设置最小和最大的区域数量来进行权衡。
+
+然而，如果假设没有得到保证，这个方案就不能保持输出的质量。
+
+
+适应性区域调整
+--------------
+
+即使最初的监测目标区域被很好地构建以满足假设（同一区域内的页面具有相似的访问频率），数
+据访问模式也会被动态地改变。这将导致监测质量下降。为了尽可能地保持假设，DAMON根据每个
+区域的访问频率自适应地进行合并和拆分。
+
+对于每个 ``聚集区间`` ，它比较相邻区域的访问频率，如果频率差异较小，就合并这些区域。
+然后，在它报告并清除每个区域的聚合接入频率后，如果区域总数不超过用户指定的最大区域数，
+它将每个区域拆分为两个或三个区域。
+
+通过这种方式，DAMON提供了其最佳的质量和最小的开销，同时保持了用户为其权衡设定的界限。
+
+
+动态目标空间更新处理
+--------------------
+
+监测目标地址范围可以动态改变。例如，虚拟内存可以动态地被映射和解映射。物理内存可以被
+热插拔。
+
+由于在某些情况下变化可能相当频繁，DAMON检查动态内存映射的变化，并仅在用户指定的时间
+间隔（ ``区域更新间隔`` ）内将其应用于抽象的目标区域。
diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
index dfa82244cbe8c..077db7e4326f0 100644
--- a/Documentation/translations/zh_CN/vm/damon/index.rst
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -28,8 +28,8 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心
    :maxdepth: 2
 
    faq
+   design
 
 TODOLIST:
 
-*   design
 *   api
-- 
GitLab


From 722cc663d79c80051cbd57de0336582c8e3cbf93 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Sat, 15 Jan 2022 15:15:03 +0800
Subject: [PATCH 0063/1586] docs/zh_CN: add damon api translation

Translate .../vm/damon/api.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/3d81dbc79e0f982edd68fb9dfee6f0ccb47f7710.1642230669.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/damon/api.rst       | 32 +++++++++++++++++++
 .../translations/zh_CN/vm/damon/index.rst     |  4 +--
 2 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/vm/damon/api.rst

diff --git a/Documentation/translations/zh_CN/vm/damon/api.rst b/Documentation/translations/zh_CN/vm/damon/api.rst
new file mode 100644
index 0000000000000..21143eea4ebed
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/damon/api.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+:Original: Documentation/vm/damon/api.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=======
+API参考
+=======
+
+内核空间的程序可以使用下面的API来使用DAMON的每个功能。你所需要做的就是引用 ``damon.h`` ，
+它位于源代码树的include/linux/。
+
+结构体
+======
+
+该API在以下内核代码中:
+
+include/linux/damon.h
+
+
+函数
+====
+
+该API在以下内核代码中:
+
+mm/damon/core.c
diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst
index 077db7e4326f0..84d36d90c9b0f 100644
--- a/Documentation/translations/zh_CN/vm/damon/index.rst
+++ b/Documentation/translations/zh_CN/vm/damon/index.rst
@@ -29,7 +29,5 @@ DAMON是Linux内核的一个数据访问监控框架子系统。DAMON的核心
 
    faq
    design
+   api
 
-TODOLIST:
-
-*   api
-- 
GitLab


From adeacecbd36cb820f65e93194baba17decdded32 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 27 Jan 2022 09:47:53 +0800
Subject: [PATCH 0064/1586] docs/zh_CN: add free_page_reporting translation

Translate .../vm/free_page_reporting.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/4ef77c1be8319fc45b18e9f4c41986095d794562.1643246827.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/vm/free_page_reporting.rst          | 38 +++++++++++++++++++
 Documentation/translations/zh_CN/vm/index.rst |  2 +-
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/free_page_reporting.rst

diff --git a/Documentation/translations/zh_CN/vm/free_page_reporting.rst b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
new file mode 100644
index 0000000000000..31d6c34b956b4
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/free_page_reporting.rst
@@ -0,0 +1,38 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/_free_page_reporting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==========
+空闲页报告
+==========
+
+空闲页报告是一个API，设备可以通过它来注册接收系统当前未使用的页面列表。这在虚拟
+化的情况下是很有用的，客户机能够使用这些数据来通知管理器它不再使用内存中的某些页
+面。
+
+对于驱动，通常是气球驱动要使用这个功能，它将分配和初始化一个page_reporting_dev_info
+结构体。它要填充的结构体中的字段是用于处理散点列表的 "report" 函数指针。它还必
+须保证每次调用该函数时能处理至少相当于PAGE_REPORTING_CAPACITY的散点列表条目。
+假设没有其他页面报告设备已经注册， 对page_reporting_register的调用将向报告框
+架注册页面报告接口。
+
+一旦注册，页面报告API将开始向驱动报告成批的页面。API将在接口被注册后2秒开始报告
+页面，并在任何足够高的页面被释放之后2秒继续报告。
+
+报告的页面将被存储在传递给报告函数的散列表中，最后一个条目的结束位被设置在条目
+nent-1中。 当页面被报告函数处理时，分配器将无法访问它们。一旦报告函数完成，这些
+页将被返回到它们所获得的自由区域。
+
+在移除使用空闲页报告的驱动之前，有必要调用page_reporting_unregister，以移除
+目前被空闲页报告使用的page_reporting_dev_info结构体。这样做将阻止进一步的报
+告通过该接口发出。如果另一个驱动或同一驱动被注册，它就有可能恢复前一个驱动在报告
+空闲页方面的工作。
+
+
+Alexander Duyck, 2019年12月04日
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index 432bc3ef1c18c..13df8321f2426 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -24,6 +24,7 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
    active_mm
    balance
    damon/index
+   free_page_reporting
 
 TODOLIST:
 * arch_pgtable_helpers
@@ -33,7 +34,6 @@ TODOLIST:
 * hmm
 * hwpoison
 * hugetlbfs_reserv
-* ksm
 * memory-model
 * mmu_notifier
 * numa
-- 
GitLab


From 4c97fdb06b9884da8682c869303b659a25c0b952 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 27 Jan 2022 09:47:54 +0800
Subject: [PATCH 0065/1586] docs/zh_CN: add highmem translation

Translate .../vm/highmem.rst into Chenese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/51e088d1e8659b9411534a5a3ad03d88c79a5297.1643246827.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/vm/highmem.rst         | 128 ++++++++++++++++++
 Documentation/translations/zh_CN/vm/index.rst |   2 +-
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/vm/highmem.rst

diff --git a/Documentation/translations/zh_CN/vm/highmem.rst b/Documentation/translations/zh_CN/vm/highmem.rst
new file mode 100644
index 0000000000000..018838e58c3e7
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/highmem.rst
@@ -0,0 +1,128 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/highmem.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+==========
+高内存处理
+==========
+
+作者: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+.. contents:: :local:
+
+高内存是什么？
+==============
+
+当物理内存的大小接近或超过虚拟内存的最大大小时，就会使用高内存（highmem）。在这一点上，内
+核不可能在任何时候都保持所有可用的物理内存的映射。这意味着内核需要开始使用它想访问的物理内
+存的临时映射。
+
+没有被永久映射覆盖的那部分（物理）内存就是我们所说的 "高内存"。对于这个边界的确切位置，有
+各种架构上的限制。
+
+例如，在i386架构中，我们选择将内核映射到每个进程的虚拟空间，这样我们就不必为内核的进入/退
+出付出全部的TLB作废代价。这意味着可用的虚拟内存空间（i386上为4GiB）必须在用户和内核空间之
+间进行划分。
+
+使用这种方法的架构的传统分配方式是3:1，3GiB用于用户空间，顶部的1GiB用于内核空间。::
+
+		+--------+ 0xffffffff
+		| Kernel |
+		+--------+ 0xc0000000
+		|        |
+		| User   |
+		|        |
+		+--------+ 0x00000000
+
+这意味着内核在任何时候最多可以映射1GiB的物理内存，但是由于我们需要虚拟地址空间来做其他事
+情--包括访问其余物理内存的临时映射--实际的直接映射通常会更少（通常在~896MiB左右）。
+
+其他有mm上下文标签的TLB的架构可以有独立的内核和用户映射。然而，一些硬件（如一些ARM）在使
+用mm上下文标签时，其虚拟空间有限。
+
+
+临时虚拟映射
+============
+
+内核包含几种创建临时映射的方法。:
+
+* vmap().  这可以用来将多个物理页长期映射到一个连续的虚拟空间。它需要synchronization
+  来解除映射。
+
+* kmap().  这允许对单个页面进行短期映射。它需要synchronization，但在一定程度上被摊销。
+  当以嵌套方式使用时，它也很容易出现死锁，因此不建议在新代码中使用它。
+
+* kmap_atomic().  这允许对单个页面进行非常短的时间映射。由于映射被限制在发布它的CPU上，
+  它表现得很好，但发布任务因此被要求留在该CPU上直到它完成，以免其他任务取代它的映射。
+
+  kmap_atomic() 也可以由中断上下文使用，因为它不睡眠，而且调用者可能在调用kunmap_atomic()
+  之后才睡眠。
+
+  可以假设k[un]map_atomic()不会失败。
+
+
+使用kmap_atomic
+===============
+
+何时何地使用 kmap_atomic() 是很直接的。当代码想要访问一个可能从高内存（见__GFP_HIGHMEM）
+分配的页面的内容时，例如在页缓存中的页面，就会使用它。该API有两个函数，它们的使用方式与
+下面类似::
+
+	/* 找到感兴趣的页面。 */
+	struct page *page = find_get_page(mapping, offset);
+
+	/* 获得对该页内容的访问权。 */
+	void *vaddr = kmap_atomic(page);
+
+	/* 对该页的内容做一些处理。 */
+	memset(vaddr, 0, PAGE_SIZE);
+
+	/* 解除该页面的映射。 */
+	kunmap_atomic(vaddr);
+
+注意，kunmap_atomic()调用的是kmap_atomic()调用的结果而不是参数。
+
+如果你需要映射两个页面，因为你想从一个页面复制到另一个页面，你需要保持kmap_atomic调用严
+格嵌套，如::
+
+	vaddr1 = kmap_atomic(page1);
+	vaddr2 = kmap_atomic(page2);
+
+	memcpy(vaddr1, vaddr2, PAGE_SIZE);
+
+	kunmap_atomic(vaddr2);
+	kunmap_atomic(vaddr1);
+
+
+临时映射的成本
+==============
+
+创建临时映射的代价可能相当高。体系架构必须操作内核的页表、数据TLB和/或MMU的寄存器。
+
+如果CONFIG_HIGHMEM没有被设置，那么内核会尝试用一点计算来创建映射，将页面结构地址转换成
+指向页面内容的指针，而不是去捣鼓映射。在这种情况下，解映射操作可能是一个空操作。
+
+如果CONFIG_MMU没有被设置，那么就不可能有临时映射和高内存。在这种情况下，也将使用计算方法。
+
+
+i386 PAE
+========
+
+在某些情况下，i386 架构将允许你在 32 位机器上安装多达 64GiB 的内存。但这有一些后果:
+
+* Linux需要为系统中的每个页面建立一个页帧结构，而且页帧需要驻在永久映射中，这意味着：
+
+* 你最多可以有896M/sizeof(struct page)页帧；由于页结构体是32字节的，所以最终会有
+  112G的页；然而，内核需要在内存中存储更多的页帧......
+
+* PAE使你的页表变大--这使系统变慢，因为更多的数据需要在TLB填充等方面被访问。一个好处
+  是，PAE有更多的PTE位，可以提供像NX和PAT这样的高级功能。
+
+一般的建议是，你不要在32位机器上使用超过8GiB的空间--尽管更多的空间可能对你和你的工作
+量有用，但你几乎是靠你自己--不要指望内核开发者真的会很关心事情的进展情况。
diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index 13df8321f2426..2f9834eb9475c 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -25,12 +25,12 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
    balance
    damon/index
    free_page_reporting
+   highmem
 
 TODOLIST:
 * arch_pgtable_helpers
 * free_page_reporting
 * frontswap
-* highmem
 * hmm
 * hwpoison
 * hugetlbfs_reserv
-- 
GitLab


From 869f496e1aa6d2b9a8653d65aa7040970f76627a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 27 Jan 2022 18:32:58 +0200
Subject: [PATCH 0066/1586] docs: process: submitting-patches: Clarify the
 Reported-by usage

It's unclear from "Submitting Patches" documentation that Reported-by
is not supposed to be used against new features. (It's more clear
in the section 5.4 "Patch formatting and changelogs" of the "A guide
to the Kernel Development Process", where it suggests that change
should fix something existing in the kernel. Clarify the Reported-by
usage in the "Submitting Patches".

Reported-by: Florian Eckert <fe@dev.tdt.de>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20220127163258.48482-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/submitting-patches.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 31ea120ce531c..fb496b2ebfd38 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -495,7 +495,8 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
 The Reported-by tag gives credit to people who find bugs and report them and it
 hopefully inspires them to help us again in the future.  Please note that if
 the bug was reported in private, then ask for permission first before using the
-Reported-by tag.
+Reported-by tag. The tag is intended for bugs; please do not use it to credit
+feature requests.
 
 A Tested-by: tag indicates that the patch has been successfully tested (in
 some environment) by the person named.  This tag informs maintainers that
-- 
GitLab


From ecff30575b5ad0eda149aadad247b7f75411fd47 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Thu, 27 Jan 2022 04:51:00 +0000
Subject: [PATCH 0067/1586] LSM: general protection fault in legacy_parse_param

The usual LSM hook "bail on fail" scheme doesn't work for cases where
a security module may return an error code indicating that it does not
recognize an input.  In this particular case Smack sees a mount option
that it recognizes, and returns 0. A call to a BPF hook follows, which
returns -ENOPARAM, which confuses the caller because Smack has processed
its data.

The SELinux hook incorrectly returns 1 on success. There was a time
when this was correct, however the current expectation is that it
return 0 on success. This is repaired.

Reported-by: syzbot+d1e3b1d92d25abf97943@syzkaller.appspotmail.com
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: James Morris <jamorris@linux.microsoft.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/security.c      | 17 +++++++++++++++--
 security/selinux/hooks.c |  5 ++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/security/security.c b/security/security.c
index 3d4eb474f35b0..e649c8691be27 100644
--- a/security/security.c
+++ b/security/security.c
@@ -884,9 +884,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
 	return call_int_hook(fs_context_dup, 0, fc, src_fc);
 }
 
-int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
+int security_fs_context_parse_param(struct fs_context *fc,
+				    struct fs_parameter *param)
 {
-	return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
+	struct security_hook_list *hp;
+	int trc;
+	int rc = -ENOPARAM;
+
+	hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
+			     list) {
+		trc = hp->hook.fs_context_parse_param(fc, param);
+		if (trc == 0)
+			rc = 0;
+		else if (trc != -ENOPARAM)
+			return trc;
+	}
+	return rc;
 }
 
 int security_sb_alloc(struct super_block *sb)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 221e642025f53..9e3658e9e7ca7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2859,10 +2859,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc,
 		return opt;
 
 	rc = selinux_add_opt(opt, param->string, &fc->security);
-	if (!rc) {
+	if (!rc)
 		param->string = NULL;
-		rc = 1;
-	}
+
 	return rc;
 }
 
-- 
GitLab


From 660ff26080f587052c338094a187e8ecc758d6e9 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 4 Jan 2022 16:29:16 +0800
Subject: [PATCH 0068/1586] Documentation: use the tabs on all acc
 documentation

Use the tabs on all Hisilicon Accelerator documentation. including
hpre, sec, zip.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-hisi-hpre | 168 ++++++++++----------
 Documentation/ABI/testing/debugfs-hisi-sec  | 136 ++++++++--------
 Documentation/ABI/testing/debugfs-hisi-zip  | 136 ++++++++--------
 3 files changed, 220 insertions(+), 220 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index b4be5f1db4b77..68d4dcb1538d9 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -1,140 +1,140 @@
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump debug registers from the HPRE cluster.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/regs
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump debug registers from the HPRE cluster.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Write the HPRE core selection in the cluster into this file,
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/cluster[0-3]/cluster_ctrl
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Write the HPRE core selection in the cluster into this file,
 		and then we can read the debug information of the core.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    HPRE cores debug registers read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/rdclr_en
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	HPRE cores debug registers read clear control. 1 means enable
 		register read clear, otherwise 0. Writing to this file has no
 		functional effect, only enable or disable counters clear after
 		reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/current_qm
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    One HPRE controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/current_qm
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	One HPRE controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/regs
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump debug registers from the HPRE.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/regs
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump debug registers from the HPRE.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump debug registers from the QM.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/regs
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump debug registers from the QM.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    One QM may contain multiple queues. Select specific queue to
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/current_q
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	One QM may contain multiple queues. Select specific queue to
 		show its debug registers in above regs.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
-Date:           Sep 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    QM debug registers(regs) read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/clear_enable
+Date:		Sep 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of requests sent
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of requests sent
 		with returning busy.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of completed but error requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_fail_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of completed but error requests.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of invalid requests being received.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/invalid_req_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of invalid requests being received.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Set the threshold time for counting the request which is
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/overtime_thrhld
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Set the threshold time for counting the request which is
 		processed longer than the threshold.
 		0: disable(default), 1: 1 microsecond.
 		Available for both PF and VF, and take no other effect on HPRE.
 
-What:           /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of time out requests.
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/over_thrhld_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of time out requests.
 		Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 85feb4408e0f6..58cfa354534c7 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -1,113 +1,113 @@
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Enabling/disabling of clear action after reading
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/clear_enable
+Date:		Oct 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Enabling/disabling of clear action after reading
 		the SEC debug registers.
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/current_qm
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    One SEC controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/current_qm
+Date:		Oct 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	One SEC controller has one PF and multiple VFs, each function
 		has a QM. This file can be used to select the QM which below
 		qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump of QM related debug registers.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
+Date:		Oct 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump of QM related debug registers.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    One QM of SEC may contain multiple queues. Select specific
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/current_q
+Date:		Oct 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	One QM of SEC may contain multiple queues. Select specific
 		queue to show its debug registers in above 'regs'.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
-Date:           Oct 2019
-Contact:        linux-crypto@vger.kernel.org
-Description:    Enabling/disabling of clear action after reading
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/clear_enable
+Date:		Oct 2019
+Contact:	linux-crypto@vger.kernel.org
+Description:	Enabling/disabling of clear action after reading
 		the SEC's QM debug registers.
 		0: disable, 1: enable.
 		Only available for PF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of requests sent with returning busy.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of requests sent with returning busy.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of BD type error requests
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/err_bd_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of BD type error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of invalid requests being received.
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/invalid_req_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of invalid requests being received.
 		Available for both PF and VF, and take no other effect on SEC.
 
-What:           /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of completed but marked error requests
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/done_flag_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of completed but marked error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 3034a2bf99ca5..765e29203ef33 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -1,114 +1,114 @@
-What:           /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump of compression cores related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump of compression cores related debug registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump of decompression cores related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump of decompression cores related debug registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    Compression/decompression core debug registers read clear
+What:		/sys/kernel/debug/hisi_zip/<bdf>/clear_enable
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	Compression/decompression core debug registers read clear
 		control. 1 means enable register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/current_qm
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    One ZIP controller has one PF and multiple VFs, each function
+What:		/sys/kernel/debug/hisi_zip/<bdf>/current_qm
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	One ZIP controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump of QM related debug registers.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/regs
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump of QM related debug registers.
 		Available for PF and VF in host. VF in guest currently only
 		has one debug register.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    One QM may contain multiple queues. Select specific queue to
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	One QM may contain multiple queues. Select specific queue to
 		show its debug registers in above regs.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
-Date:           Nov 2018
-Contact:        linux-crypto@vger.kernel.org
-Description:    QM debug registers(regs) read clear control. 1 means enable
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
+Date:		Nov 2018
+Contact:	linux-crypto@vger.kernel.org
+Description:	QM debug registers(regs) read clear control. 1 means enable
 		register read clear, otherwise 0.
 		Writing to this file has no functional effect, only enable or
 		disable counters clear after reading of these registers.
 		Only available for PF.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of invalid interrupts for
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/err_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of invalid interrupts for
 		QM task completion.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of QM async event queue interrupts.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/aeq_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of QM async event queue interrupts.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of interrupts for QM abnormal event.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/abnormal_irq
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of interrupts for QM abnormal event.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of queue allocation errors.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/create_qp_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of queue allocation errors.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the number of failed QM mailbox commands.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/mb_err
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the number of failed QM mailbox commands.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/status
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the status of the QM.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/status
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the status of the QM.
 		Four states: initiated, started, stopped and closed.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of sent requests.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of sent requests.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of received requests.
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/recv_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of received requests.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of requests received
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_busy_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of requests received
 		with returning busy.
 		Available for both PF and VF, and take no other effect on ZIP.
 
-What:           /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
-Date:           Apr 2020
-Contact:        linux-crypto@vger.kernel.org
-Description:    Dump the total number of BD type error requests
+What:		/sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/err_bd_cnt
+Date:		Apr 2020
+Contact:	linux-crypto@vger.kernel.org
+Description:	Dump the total number of BD type error requests
 		to be received.
 		Available for both PF and VF, and take no other effect on ZIP.
-- 
GitLab


From 535761866ec36e85ef32b4d41e8757307506bd17 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 4 Jan 2022 16:29:17 +0800
Subject: [PATCH 0069/1586] Documentation: update debugfs doc for Hisilicon SEC

Update documentation describing DebugFS for function's QoS limiting.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-hisi-sec | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 58cfa354534c7..2bf84ced484b7 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -14,6 +14,16 @@ Description:	One SEC controller has one PF and multiple VFs, each function
 		qm refers to.
 		Only available for PF.
 
+What:		/sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	linux-crypto@vger.kernel.org
+Description:	The <bdf> is related the function for PF and VF.
+		SEC driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
 What:		/sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
 Date:		Oct 2019
 Contact:	linux-crypto@vger.kernel.org
-- 
GitLab


From f6fb93ccc8c835efd15960181c768cd7c0d204a6 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 4 Jan 2022 16:29:18 +0800
Subject: [PATCH 0070/1586] Documentation: update debugfs doc for Hisilicon ZIP

Update documentation describing DebugFS for function's QoS limiting.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-hisi-zip | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 765e29203ef33..bf1258bc64959 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -26,6 +26,16 @@ Description:	One ZIP controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
+What:		/sys/kernel/debug/hisi_zip/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	linux-crypto@vger.kernel.org
+Description:	The <bdf> is related the function for PF and VF.
+		ZIP driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
 What:		/sys/kernel/debug/hisi_zip/<bdf>/qm/regs
 Date:		Nov 2018
 Contact:	linux-crypto@vger.kernel.org
-- 
GitLab


From 68ce6126e11364dd5b3a5c5113b4285c56d8d249 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 4 Jan 2022 16:29:19 +0800
Subject: [PATCH 0071/1586] Documentation: update debugfs doc for Hisilicon
 HPRE

Update documentation describing DebugFS for function's QoS limiting.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-hisi-hpre | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index 68d4dcb1538d9..396de7bc735d0 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -27,6 +27,16 @@ Description:	One HPRE controller has one PF and multiple VFs, each function
 		has a QM. Select the QM which below qm refers to.
 		Only available for PF.
 
+What:		/sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
+Date:		Jun 2021
+Contact:	linux-crypto@vger.kernel.org
+Description:	The <bdf> is related the function for PF and VF.
+		HPRE driver supports to configure each function's QoS, the driver
+		supports to write <bdf> value to alg_qos in the host. Such as
+		"echo <bdf> value > alg_qos". The qos value is 1~1000, means
+		1/1000~1000/1000 of total QoS. The driver reading alg_qos to
+		get related QoS in the host and VM, Such as "cat alg_qos".
+
 What:		/sys/kernel/debug/hisi_hpre/<bdf>/regs
 Date:		Sep 2019
 Contact:	linux-crypto@vger.kernel.org
-- 
GitLab


From eb90686d5d10fef9cadd9c0eb30f3fee66d2b2a5 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:06:55 +0800
Subject: [PATCH 0072/1586] crypto: sm3 - create SM3 stand-alone library

Stand-alone implementation of the SM3 algorithm. It is designed
to have as little dependencies as possible. In other cases you
should generally use the hash APIs from include/crypto/hash.h.
Especially when hashing large amounts of data as those APIs may
be hw-accelerated. In the new SM3 stand-alone library,
sm3_transform() has also been optimized, instead of simply using
the code in sm3_generic.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Reviewed-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sm3.h |  32 ++++++
 lib/crypto/Kconfig   |   3 +
 lib/crypto/Makefile  |   3 +
 lib/crypto/sm3.c     | 246 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 284 insertions(+)
 create mode 100644 lib/crypto/sm3.c

diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h
index 42ea21289ba95..b5fb6d1bf2479 100644
--- a/include/crypto/sm3.h
+++ b/include/crypto/sm3.h
@@ -1,5 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Common values for SM3 algorithm
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
  */
 
 #ifndef _CRYPTO_SM3_H
@@ -39,4 +44,31 @@ extern int crypto_sm3_final(struct shash_desc *desc, u8 *out);
 
 extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
 			     unsigned int len, u8 *hash);
+
+/*
+ * Stand-alone implementation of the SM3 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
+ *
+ * For details see lib/crypto/sm3.c
+ */
+
+static inline void sm3_init(struct sm3_state *sctx)
+{
+	sctx->state[0] = SM3_IVA;
+	sctx->state[1] = SM3_IVB;
+	sctx->state[2] = SM3_IVC;
+	sctx->state[3] = SM3_IVD;
+	sctx->state[4] = SM3_IVE;
+	sctx->state[5] = SM3_IVF;
+	sctx->state[6] = SM3_IVG;
+	sctx->state[7] = SM3_IVH;
+	sctx->count = 0;
+}
+
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len);
+void sm3_final(struct sm3_state *sctx, u8 *out);
+
 #endif
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index e8e525650cf29..379a66d7f504c 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -123,6 +123,9 @@ config CRYPTO_LIB_CHACHA20POLY1305
 config CRYPTO_LIB_SHA256
 	tristate
 
+config CRYPTO_LIB_SM3
+	tristate
+
 config CRYPTO_LIB_SM4
 	tristate
 
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index ed43a41f2dcc8..6c872d05d1e62 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -37,6 +37,9 @@ libpoly1305-y					+= poly1305.o
 obj-$(CONFIG_CRYPTO_LIB_SHA256)			+= libsha256.o
 libsha256-y					:= sha256.o
 
+obj-$(CONFIG_CRYPTO_LIB_SM3)			+= libsm3.o
+libsm3-y					:= sm3.o
+
 obj-$(CONFIG_CRYPTO_LIB_SM4)			+= libsm4.o
 libsm4-y					:= sm4.o
 
diff --git a/lib/crypto/sm3.c b/lib/crypto/sm3.c
new file mode 100644
index 0000000000000..d473e358a873a
--- /dev/null
+++ b/lib/crypto/sm3.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described
+ * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm3.h>
+
+static const u32 ____cacheline_aligned K[64] = {
+	0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+	0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+	0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+	0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+	0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+	0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+	0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+	0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+};
+
+/*
+ * Transform the message X which consists of 16 32-bit-words. See
+ * GM/T 004-2012 for details.
+ */
+#define R(i, a, b, c, d, e, f, g, h, t, w1, w2)			\
+	do {							\
+		ss1 = rol32((rol32((a), 12) + (e) + (t)), 7);	\
+		ss2 = ss1 ^ rol32((a), 12);			\
+		d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2));	\
+		h += GG ## i(e, f, g) + ss1 + (w1);		\
+		b = rol32((b), 9);				\
+		f = rol32((f), 19);				\
+		h = P0((h));					\
+	} while (0)
+
+#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(1, a, b, c, d, e, f, g, h, t, w1, w2)
+#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(2, a, b, c, d, e, f, g, h, t, w1, w2)
+
+#define FF1(x, y, z)  (x ^ y ^ z)
+#define FF2(x, y, z)  ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z)  FF1(x, y, z)
+#define GG2(x, y, z)  ((x & y) | (~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17))
+#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23))
+#define I(i)  (W[i] = get_unaligned_be32(data + i * 4))
+#define W1(i) (W[i & 0x0f])
+#define W2(i) (W[i & 0x0f] =				\
+		P1(W[i & 0x0f]				\
+			^ W[(i-9) & 0x0f]		\
+			^ rol32(W[(i-3) & 0x0f], 15))	\
+		^ rol32(W[(i-13) & 0x0f], 7)		\
+		^ W[(i-6) & 0x0f])
+
+static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16])
+{
+	u32 a, b, c, d, e, f, g, h, ss1, ss2;
+
+	a = sctx->state[0];
+	b = sctx->state[1];
+	c = sctx->state[2];
+	d = sctx->state[3];
+	e = sctx->state[4];
+	f = sctx->state[5];
+	g = sctx->state[6];
+	h = sctx->state[7];
+
+	R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+	R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+	R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+	R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+	R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+	R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+	R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+	R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+	R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+	R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+	R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+	R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+	R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+	R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+	R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+	R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+	R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+	R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+	R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+	R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+	R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+	R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+	R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+	R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+	R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+	R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+	R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+	R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+	R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+	R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+	R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+	R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+	R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+	R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+	R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+	R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+	R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+	R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+	R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+	R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+	R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+	R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+	R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+	R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+	R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+	R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+	R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+	R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+	R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+	R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+	R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+	R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+	R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+	R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+	R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+	R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+	R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+	R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+	R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+	R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+	R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+	R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+	R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+	R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+	sctx->state[0] ^= a;
+	sctx->state[1] ^= b;
+	sctx->state[2] ^= c;
+	sctx->state[3] ^= d;
+	sctx->state[4] ^= e;
+	sctx->state[5] ^= f;
+	sctx->state[6] ^= g;
+	sctx->state[7] ^= h;
+}
+#undef R
+#undef R1
+#undef R2
+#undef I
+#undef W1
+#undef W2
+
+static inline void sm3_block(struct sm3_state *sctx,
+		u8 const *data, int blocks, u32 W[16])
+{
+	while (blocks--) {
+		sm3_transform(sctx, data, W);
+		data += SM3_BLOCK_SIZE;
+	}
+}
+
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+
+	sctx->count += len;
+
+	if ((partial + len) >= SM3_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SM3_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buffer + partial, data, p);
+			data += p;
+			len -= p;
+
+			sm3_block(sctx, sctx->buffer, 1, W);
+		}
+
+		blocks = len / SM3_BLOCK_SIZE;
+		len %= SM3_BLOCK_SIZE;
+
+		if (blocks) {
+			sm3_block(sctx, data, blocks, W);
+			data += blocks * SM3_BLOCK_SIZE;
+		}
+
+		memzero_explicit(W, sizeof(W));
+
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buffer + partial, data, len);
+}
+EXPORT_SYMBOL_GPL(sm3_update);
+
+void sm3_final(struct sm3_state *sctx, u8 *out)
+{
+	const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64);
+	__be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
+	__be32 *digest = (__be32 *)out;
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+	int i;
+
+	sctx->buffer[partial++] = 0x80;
+	if (partial > bit_offset) {
+		memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial);
+		partial = 0;
+
+		sm3_block(sctx, sctx->buffer, 1, W);
+	}
+
+	memset(sctx->buffer + partial, 0, bit_offset - partial);
+	*bits = cpu_to_be64(sctx->count << 3);
+	sm3_block(sctx, sctx->buffer, 1, W);
+
+	for (i = 0; i < 8; i++)
+		put_unaligned_be32(sctx->state[i], digest++);
+
+	/* Zeroize sensitive information. */
+	memzero_explicit(W, sizeof(W));
+	memzero_explicit(sctx, sizeof(*sctx));
+}
+EXPORT_SYMBOL_GPL(sm3_final);
+
+MODULE_DESCRIPTION("Generic SM3 library");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From f3a03d319dbdbb206530ebfce977c334ee2f8765 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:06:56 +0800
Subject: [PATCH 0073/1586] crypto: arm64/sm3-ce - make dependent on sm3
 library

SM3 generic library is stand-alone implementation, sm3-ce can depend
on the SM3 library instead of sm3-generic.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig       |  2 +-
 arch/arm64/crypto/sm3-ce-glue.c | 28 ++++++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index addfa413650bd..2a965aa0188dd 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -45,7 +45,7 @@ config CRYPTO_SM3_ARM64_CE
 	tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
-	select CRYPTO_SM3
+	select CRYPTO_LIB_SM3
 
 config CRYPTO_SM4_ARM64_CE
 	tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index d71faca322f2a..ee98954ae8ca6 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
 static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_update(desc, data, len);
+	if (!crypto_simd_usable()) {
+		sm3_update(shash_desc_ctx(desc), data, len);
+		return 0;
+	}
 
 	kernel_neon_begin();
 	sm3_base_do_update(desc, data, len, sm3_ce_transform);
@@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
 
 static int sm3_ce_final(struct shash_desc *desc, u8 *out)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_finup(desc, NULL, 0, out);
+	if (!crypto_simd_usable()) {
+		sm3_final(shash_desc_ctx(desc), out);
+		return 0;
+	}
 
 	kernel_neon_begin();
 	sm3_base_do_finalize(desc, sm3_ce_transform);
@@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out)
 static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *out)
 {
-	if (!crypto_simd_usable())
-		return crypto_sm3_finup(desc, data, len, out);
+	if (!crypto_simd_usable()) {
+		struct sm3_state *sctx = shash_desc_ctx(desc);
+
+		if (len)
+			sm3_update(sctx, data, len);
+		sm3_final(sctx, out);
+		return 0;
+	}
 
 	kernel_neon_begin();
-	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	if (len)
+		sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	sm3_base_do_finalize(desc, sm3_ce_transform);
 	kernel_neon_end();
 
-	return sm3_ce_final(desc, out);
+	return sm3_base_finish(desc, out);
 }
 
 static struct shash_alg sm3_alg = {
-- 
GitLab


From 114004696bf23499ca834e784d91bd82de195d76 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:06:57 +0800
Subject: [PATCH 0074/1586] crypto: sm2 - make dependent on sm3 library

SM3 generic library is stand-alone implementation, it is necessary
for the calculation of sm2 z digest to depends on SM3 library
instead of sm3-generic.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig |  2 +-
 crypto/sm2.c   | 38 +++++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 442765219c375..833da41860b98 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -267,7 +267,7 @@ config CRYPTO_ECRDSA
 
 config CRYPTO_SM2
 	tristate "SM2 algorithm"
-	select CRYPTO_SM3
+	select CRYPTO_LIB_SM3
 	select CRYPTO_AKCIPHER
 	select CRYPTO_MANAGER
 	select MPILIB
diff --git a/crypto/sm2.c b/crypto/sm2.c
index db8a4a265669d..ae3f77a660703 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -13,7 +13,7 @@
 #include <crypto/internal/akcipher.h>
 #include <crypto/akcipher.h>
 #include <crypto/hash.h>
-#include <crypto/sm3_base.h>
+#include <crypto/sm3.h>
 #include <crypto/rng.h>
 #include <crypto/sm2.h>
 #include "sm2signature.asn1.h"
@@ -213,7 +213,7 @@ int sm2_get_signature_s(void *context, size_t hdrlen, unsigned char tag,
 	return 0;
 }
 
-static int sm2_z_digest_update(struct shash_desc *desc,
+static int sm2_z_digest_update(struct sm3_state *sctx,
 			MPI m, unsigned int pbytes)
 {
 	static const unsigned char zero[32];
@@ -226,20 +226,20 @@ static int sm2_z_digest_update(struct shash_desc *desc,
 
 	if (inlen < pbytes) {
 		/* padding with zero */
-		crypto_sm3_update(desc, zero, pbytes - inlen);
-		crypto_sm3_update(desc, in, inlen);
+		sm3_update(sctx, zero, pbytes - inlen);
+		sm3_update(sctx, in, inlen);
 	} else if (inlen > pbytes) {
 		/* skip the starting zero */
-		crypto_sm3_update(desc, in + inlen - pbytes, pbytes);
+		sm3_update(sctx, in + inlen - pbytes, pbytes);
 	} else {
-		crypto_sm3_update(desc, in, inlen);
+		sm3_update(sctx, in, inlen);
 	}
 
 	kfree(in);
 	return 0;
 }
 
-static int sm2_z_digest_update_point(struct shash_desc *desc,
+static int sm2_z_digest_update_point(struct sm3_state *sctx,
 		MPI_POINT point, struct mpi_ec_ctx *ec, unsigned int pbytes)
 {
 	MPI x, y;
@@ -249,8 +249,8 @@ static int sm2_z_digest_update_point(struct shash_desc *desc,
 	y = mpi_new(0);
 
 	if (!mpi_ec_get_affine(x, y, point, ec) &&
-		!sm2_z_digest_update(desc, x, pbytes) &&
-		!sm2_z_digest_update(desc, y, pbytes))
+	    !sm2_z_digest_update(sctx, x, pbytes) &&
+	    !sm2_z_digest_update(sctx, y, pbytes))
 		ret = 0;
 
 	mpi_free(x);
@@ -265,7 +265,7 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
 	struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm);
 	uint16_t bits_len;
 	unsigned char entl[2];
-	SHASH_DESC_ON_STACK(desc, NULL);
+	struct sm3_state sctx;
 	unsigned int pbytes;
 
 	if (id_len > (USHRT_MAX / 8) || !ec->Q)
@@ -278,17 +278,17 @@ int sm2_compute_z_digest(struct crypto_akcipher *tfm,
 	pbytes = MPI_NBYTES(ec->p);
 
 	/* ZA = H256(ENTLA | IDA | a | b | xG | yG | xA | yA) */
-	sm3_base_init(desc);
-	crypto_sm3_update(desc, entl, 2);
-	crypto_sm3_update(desc, id, id_len);
-
-	if (sm2_z_digest_update(desc, ec->a, pbytes) ||
-		sm2_z_digest_update(desc, ec->b, pbytes) ||
-		sm2_z_digest_update_point(desc, ec->G, ec, pbytes) ||
-		sm2_z_digest_update_point(desc, ec->Q, ec, pbytes))
+	sm3_init(&sctx);
+	sm3_update(&sctx, entl, 2);
+	sm3_update(&sctx, id, id_len);
+
+	if (sm2_z_digest_update(&sctx, ec->a, pbytes) ||
+	    sm2_z_digest_update(&sctx, ec->b, pbytes) ||
+	    sm2_z_digest_update_point(&sctx, ec->G, ec, pbytes) ||
+	    sm2_z_digest_update_point(&sctx, ec->Q, ec, pbytes))
 		return -EINVAL;
 
-	crypto_sm3_final(desc, dgst);
+	sm3_final(&sctx, dgst);
 	return 0;
 }
 EXPORT_SYMBOL(sm2_compute_z_digest);
-- 
GitLab


From b4784a45ea69577f21f89898c71127774a090a2a Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:06:58 +0800
Subject: [PATCH 0075/1586] crypto: sm3 - make dependent on sm3 library

SM3 generic library is stand-alone implementation, it is necessary
making the sm3-generic implementation to depends on SM3 library.
The functions crypto_sm3_*() provided by sm3_generic is no longer
exported.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig       |   1 +
 crypto/sm3_generic.c | 142 +++++--------------------------------------
 include/crypto/sm3.h |  10 ---
 3 files changed, 16 insertions(+), 137 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 833da41860b98..b07174195e984 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -999,6 +999,7 @@ config CRYPTO_SHA3
 config CRYPTO_SM3
 	tristate "SM3 digest algorithm"
 	select CRYPTO_HASH
+	select CRYPTO_LIB_SM3
 	help
 	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
 	  It is part of the Chinese Commercial Cryptography suite.
diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c
index 193c4584bd004..a215c1c37e730 100644
--- a/crypto/sm3_generic.c
+++ b/crypto/sm3_generic.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2017 ARM Limited or its affiliates.
  * Written by Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
  */
 
 #include <crypto/internal/hash.h>
@@ -26,143 +27,29 @@ const u8 sm3_zero_message_hash[SM3_DIGEST_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(sm3_zero_message_hash);
 
-static inline u32 p0(u32 x)
-{
-	return x ^ rol32(x, 9) ^ rol32(x, 17);
-}
-
-static inline u32 p1(u32 x)
-{
-	return x ^ rol32(x, 15) ^ rol32(x, 23);
-}
-
-static inline u32 ff(unsigned int n, u32 a, u32 b, u32 c)
-{
-	return (n < 16) ? (a ^ b ^ c) : ((a & b) | (a & c) | (b & c));
-}
-
-static inline u32 gg(unsigned int n, u32 e, u32 f, u32 g)
-{
-	return (n < 16) ? (e ^ f ^ g) : ((e & f) | ((~e) & g));
-}
-
-static inline u32 t(unsigned int n)
-{
-	return (n < 16) ? SM3_T1 : SM3_T2;
-}
-
-static void sm3_expand(u32 *t, u32 *w, u32 *wt)
-{
-	int i;
-	unsigned int tmp;
-
-	/* load the input */
-	for (i = 0; i <= 15; i++)
-		w[i] = get_unaligned_be32((__u32 *)t + i);
-
-	for (i = 16; i <= 67; i++) {
-		tmp = w[i - 16] ^ w[i - 9] ^ rol32(w[i - 3], 15);
-		w[i] = p1(tmp) ^ (rol32(w[i - 13], 7)) ^ w[i - 6];
-	}
-
-	for (i = 0; i <= 63; i++)
-		wt[i] = w[i] ^ w[i + 4];
-}
-
-static void sm3_compress(u32 *w, u32 *wt, u32 *m)
-{
-	u32 ss1;
-	u32 ss2;
-	u32 tt1;
-	u32 tt2;
-	u32 a, b, c, d, e, f, g, h;
-	int i;
-
-	a = m[0];
-	b = m[1];
-	c = m[2];
-	d = m[3];
-	e = m[4];
-	f = m[5];
-	g = m[6];
-	h = m[7];
-
-	for (i = 0; i <= 63; i++) {
-
-		ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7);
-
-		ss2 = ss1 ^ rol32(a, 12);
-
-		tt1 = ff(i, a, b, c) + d + ss2 + *wt;
-		wt++;
-
-		tt2 = gg(i, e, f, g) + h + ss1 + *w;
-		w++;
-
-		d = c;
-		c = rol32(b, 9);
-		b = a;
-		a = tt1;
-		h = g;
-		g = rol32(f, 19);
-		f = e;
-		e = p0(tt2);
-	}
-
-	m[0] = a ^ m[0];
-	m[1] = b ^ m[1];
-	m[2] = c ^ m[2];
-	m[3] = d ^ m[3];
-	m[4] = e ^ m[4];
-	m[5] = f ^ m[5];
-	m[6] = g ^ m[6];
-	m[7] = h ^ m[7];
-
-	a = b = c = d = e = f = g = h = ss1 = ss2 = tt1 = tt2 = 0;
-}
-
-static void sm3_transform(struct sm3_state *sst, u8 const *src)
-{
-	unsigned int w[68];
-	unsigned int wt[64];
-
-	sm3_expand((u32 *)src, w, wt);
-	sm3_compress(w, wt, sst->state);
-
-	memzero_explicit(w, sizeof(w));
-	memzero_explicit(wt, sizeof(wt));
-}
-
-static void sm3_generic_block_fn(struct sm3_state *sst, u8 const *src,
-				    int blocks)
-{
-	while (blocks--) {
-		sm3_transform(sst, src);
-		src += SM3_BLOCK_SIZE;
-	}
-}
-
-int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
-	return sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
+	sm3_update(shash_desc_ctx(desc), data, len);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_update);
 
-int crypto_sm3_final(struct shash_desc *desc, u8 *out)
+static int crypto_sm3_final(struct shash_desc *desc, u8 *out)
 {
-	sm3_base_do_finalize(desc, sm3_generic_block_fn);
-	return sm3_base_finish(desc, out);
+	sm3_final(shash_desc_ctx(desc), out);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_final);
 
-int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
+static int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *hash)
 {
-	sm3_base_do_update(desc, data, len, sm3_generic_block_fn);
-	return crypto_sm3_final(desc, hash);
+	struct sm3_state *sctx = shash_desc_ctx(desc);
+
+	if (len)
+		sm3_update(sctx, data, len);
+	sm3_final(sctx, hash);
+	return 0;
 }
-EXPORT_SYMBOL(crypto_sm3_finup);
 
 static struct shash_alg sm3_alg = {
 	.digestsize	=	SM3_DIGEST_SIZE,
@@ -174,6 +61,7 @@ static struct shash_alg sm3_alg = {
 	.base		=	{
 		.cra_name	 =	"sm3",
 		.cra_driver_name =	"sm3-generic",
+		.cra_priority	=	100,
 		.cra_blocksize	 =	SM3_BLOCK_SIZE,
 		.cra_module	 =	THIS_MODULE,
 	}
diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h
index b5fb6d1bf2479..1f021ad0533ff 100644
--- a/include/crypto/sm3.h
+++ b/include/crypto/sm3.h
@@ -35,16 +35,6 @@ struct sm3_state {
 	u8 buffer[SM3_BLOCK_SIZE];
 };
 
-struct shash_desc;
-
-extern int crypto_sm3_update(struct shash_desc *desc, const u8 *data,
-			      unsigned int len);
-
-extern int crypto_sm3_final(struct shash_desc *desc, u8 *out);
-
-extern int crypto_sm3_finup(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *hash);
-
 /*
  * Stand-alone implementation of the SM3 algorithm. It is designed to
  * have as little dependencies as possible so it can be used in the
-- 
GitLab


From 930ab34d906d9c44727c9dcfeafcfcd33e3639e7 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:06:59 +0800
Subject: [PATCH 0076/1586] crypto: x86/sm3 - add AVX assembly implementation

This patch adds AVX assembly accelerated implementation of SM3 secure
hash algorithm. From the benchmark data, compared to pure software
implementation sm3-generic, the performance increase is up to 38%.

The main algorithm implementation based on SM3 AES/BMI2 accelerated
work by libgcrypt at:
https://gnupg.org/software/libgcrypt/index.html

Benchmark on Intel i5-6200U 2.30GHz, performance data of two
implementations, pure software sm3-generic and sm3-avx acceleration.
The data comes from the 326 mode and 422 mode of tcrypt. The abscissas
are different lengths of per update. The data is tabulated and the
unit is Mb/s:

update-size |     16      64     256    1024    2048    4096    8192
------------+-------------------------------------------------------
sm3-generic | 105.97  129.60  182.12  189.62  188.06  193.66  194.88
sm3-avx     | 119.87  163.05  244.44  260.92  257.60  264.87  265.88

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile         |   3 +
 arch/x86/crypto/sm3-avx-asm_64.S | 517 +++++++++++++++++++++++++++++++
 arch/x86/crypto/sm3_avx_glue.c   | 134 ++++++++
 crypto/Kconfig                   |  13 +
 4 files changed, 667 insertions(+)
 create mode 100644 arch/x86/crypto/sm3-avx-asm_64.S
 create mode 100644 arch/x86/crypto/sm3_avx_glue.c

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index c3af959648e62..2831685adf6fb 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
 
 obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
 
+obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
+sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
+
 obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
 sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
 
diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
new file mode 100644
index 0000000000000..71e6aae23e17c
--- /dev/null
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 AVX accelerated transform.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at:
+ *  https://gnupg.org/software/libgcrypt/index.html
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Constants */
+
+/* Round constant macros */
+
+#define K0   2043430169  /* 0x79cc4519 */
+#define K1   -208106958  /* 0xf3988a32 */
+#define K2   -416213915  /* 0xe7311465 */
+#define K3   -832427829  /* 0xce6228cb */
+#define K4  -1664855657  /* 0x9cc45197 */
+#define K5    965255983  /* 0x3988a32f */
+#define K6   1930511966  /* 0x7311465e */
+#define K7   -433943364  /* 0xe6228cbc */
+#define K8   -867886727  /* 0xcc451979 */
+#define K9  -1735773453  /* 0x988a32f3 */
+#define K10   823420391  /* 0x311465e7 */
+#define K11  1646840782  /* 0x6228cbce */
+#define K12 -1001285732  /* 0xc451979c */
+#define K13 -2002571463  /* 0x88a32f39 */
+#define K14   289824371  /* 0x11465e73 */
+#define K15   579648742  /* 0x228cbce6 */
+#define K16 -1651869049  /* 0x9d8a7a87 */
+#define K17   991229199  /* 0x3b14f50f */
+#define K18  1982458398  /* 0x7629ea1e */
+#define K19  -330050500  /* 0xec53d43c */
+#define K20  -660100999  /* 0xd8a7a879 */
+#define K21 -1320201997  /* 0xb14f50f3 */
+#define K22  1654563303  /* 0x629ea1e7 */
+#define K23  -985840690  /* 0xc53d43ce */
+#define K24 -1971681379  /* 0x8a7a879d */
+#define K25   351604539  /* 0x14f50f3b */
+#define K26   703209078  /* 0x29ea1e76 */
+#define K27  1406418156  /* 0x53d43cec */
+#define K28 -1482130984  /* 0xa7a879d8 */
+#define K29  1330705329  /* 0x4f50f3b1 */
+#define K30 -1633556638  /* 0x9ea1e762 */
+#define K31  1027854021  /* 0x3d43cec5 */
+#define K32  2055708042  /* 0x7a879d8a */
+#define K33  -183551212  /* 0xf50f3b14 */
+#define K34  -367102423  /* 0xea1e7629 */
+#define K35  -734204845  /* 0xd43cec53 */
+#define K36 -1468409689  /* 0xa879d8a7 */
+#define K37  1358147919  /* 0x50f3b14f */
+#define K38 -1578671458  /* 0xa1e7629e */
+#define K39  1137624381  /* 0x43cec53d */
+#define K40 -2019718534  /* 0x879d8a7a */
+#define K41   255530229  /* 0x0f3b14f5 */
+#define K42   511060458  /* 0x1e7629ea */
+#define K43  1022120916  /* 0x3cec53d4 */
+#define K44  2044241832  /* 0x79d8a7a8 */
+#define K45  -206483632  /* 0xf3b14f50 */
+#define K46  -412967263  /* 0xe7629ea1 */
+#define K47  -825934525  /* 0xcec53d43 */
+#define K48 -1651869049  /* 0x9d8a7a87 */
+#define K49   991229199  /* 0x3b14f50f */
+#define K50  1982458398  /* 0x7629ea1e */
+#define K51  -330050500  /* 0xec53d43c */
+#define K52  -660100999  /* 0xd8a7a879 */
+#define K53 -1320201997  /* 0xb14f50f3 */
+#define K54  1654563303  /* 0x629ea1e7 */
+#define K55  -985840690  /* 0xc53d43ce */
+#define K56 -1971681379  /* 0x8a7a879d */
+#define K57   351604539  /* 0x14f50f3b */
+#define K58   703209078  /* 0x29ea1e76 */
+#define K59  1406418156  /* 0x53d43cec */
+#define K60 -1482130984  /* 0xa7a879d8 */
+#define K61  1330705329  /* 0x4f50f3b1 */
+#define K62 -1633556638  /* 0x9ea1e762 */
+#define K63  1027854021  /* 0x3d43cec5 */
+
+/* Register macros */
+
+#define RSTATE %rdi
+#define RDATA  %rsi
+#define RNBLKS %rdx
+
+#define t0 %eax
+#define t1 %ebx
+#define t2 %ecx
+
+#define a %r8d
+#define b %r9d
+#define c %r10d
+#define d %r11d
+#define e %r12d
+#define f %r13d
+#define g %r14d
+#define h %r15d
+
+#define W0 %xmm0
+#define W1 %xmm1
+#define W2 %xmm2
+#define W3 %xmm3
+#define W4 %xmm4
+#define W5 %xmm5
+
+#define XTMP0 %xmm6
+#define XTMP1 %xmm7
+#define XTMP2 %xmm8
+#define XTMP3 %xmm9
+#define XTMP4 %xmm10
+#define XTMP5 %xmm11
+#define XTMP6 %xmm12
+
+#define BSWAP_REG %xmm15
+
+/* Stack structure */
+
+#define STACK_W_SIZE        (32 * 2 * 3)
+#define STACK_REG_SAVE_SIZE (64)
+
+#define STACK_W             (0)
+#define STACK_REG_SAVE      (STACK_W + STACK_W_SIZE)
+#define STACK_SIZE          (STACK_REG_SAVE + STACK_REG_SAVE_SIZE)
+
+/* Instruction helpers. */
+
+#define roll2(v, reg)		\
+	roll $(v), reg;
+
+#define roll3mov(v, src, dst)	\
+	movl src, dst;		\
+	roll $(v), dst;
+
+#define roll3(v, src, dst)	\
+	rorxl $(32-(v)), src, dst;
+
+#define addl2(a, out)		\
+	leal (a, out), out;
+
+/* Round function macros. */
+
+#define GG1(x, y, z, o, t)	\
+	movl x, o;		\
+	xorl y, o;		\
+	xorl z, o;
+
+#define FF1(x, y, z, o, t) GG1(x, y, z, o, t)
+
+#define GG2(x, y, z, o, t)	\
+	andnl z, x, o;		\
+	movl y, t;		\
+	andl x, t;		\
+	addl2(t, o);
+
+#define FF2(x, y, z, o, t)	\
+	movl y, o;		\
+	xorl x, o;		\
+	movl y, t;		\
+	andl x, t;		\
+	andl z, o;		\
+	xorl t, o;
+
+#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype)		\
+	/* rol(a, 12) => t0 */						\
+	roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \
+	/* rol (t0 + e + t), 7) => t1 */				\
+	leal K##round(t0, e, 1), t1;					\
+	roll2(7, t1);							\
+	/* h + w1 => h */						\
+	addl wtype##_W1_ADDR(round, widx), h;				\
+	/* h + t1 => h */						\
+	addl2(t1, h);							\
+	/* t1 ^ t0 => t0 */						\
+	xorl t1, t0;							\
+	/* w1w2 + d => d */						\
+	addl wtype##_W1W2_ADDR(round, widx), d;				\
+	/* FF##i(a,b,c) => t1 */					\
+	FF##i(a, b, c, t1, t2);						\
+	/* d + t1 => d */						\
+	addl2(t1, d);							\
+	/* GG#i(e,f,g) => t2 */						\
+	GG##i(e, f, g, t2, t1);						\
+	/* h + t2 => h */						\
+	addl2(t2, h);							\
+	/* rol (f, 19) => f */						\
+	roll2(19, f);							\
+	/* d + t0 => d */						\
+	addl2(t0, d);							\
+	/* rol (b, 9) => b */						\
+	roll2(9, b);							\
+	/* P0(h) => h */						\
+	roll3(9, h, t2);						\
+	roll3(17, h, t1);						\
+	xorl t2, h;							\
+	xorl t1, h;
+
+#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \
+	R(1, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \
+	R(2, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 0)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32)
+
+/* Input block loading. */
+#define LOAD_W_XMM_1()							\
+	vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */		\
+	vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */		\
+	vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */	\
+	vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */	\
+	vpshufb BSWAP_REG, XTMP0, XTMP0;				\
+	vpshufb BSWAP_REG, XTMP1, XTMP1;				\
+	vpshufb BSWAP_REG, XTMP2, XTMP2;				\
+	vpshufb BSWAP_REG, XTMP3, XTMP3;				\
+	vpxor XTMP0, XTMP1, XTMP4;					\
+	vpxor XTMP1, XTMP2, XTMP5;					\
+	vpxor XTMP2, XTMP3, XTMP6;					\
+	leaq 64(RDATA), RDATA;						\
+	vmovdqa XTMP0, IW_W1_ADDR(0, 0);				\
+	vmovdqa XTMP4, IW_W1W2_ADDR(0, 0);				\
+	vmovdqa XTMP1, IW_W1_ADDR(4, 0);				\
+	vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);
+
+#define LOAD_W_XMM_2()				\
+	vmovdqa XTMP2, IW_W1_ADDR(8, 0);	\
+	vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);
+
+#define LOAD_W_XMM_3()							\
+	vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */	\
+	vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */	\
+	vmovdqa XTMP1, W2;              /* W2: xx, w6, w5, w4 */	\
+	vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */	\
+	vpalignr $8, XTMP2, XTMP3, W4;  /* W4: xx, w12, w11, w10 */	\
+	vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */
+
+/* Message scheduling. Note: 3 words per XMM register. */
+#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5)			\
+	/* Load (w[i - 16]) => XTMP0 */					\
+	vpshufd $0b10111111, w0, XTMP0;					\
+	vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */	\
+	/* Load (w[i - 13]) => XTMP1 */					\
+	vpshufd $0b10111111, w1, XTMP1;					\
+	vpalignr $12, XTMP1, w2, XTMP1;					\
+	/* w[i - 9] == w3 */						\
+	/* XMM3 ^ XTMP0 => XTMP0 */					\
+	vpxor w3, XTMP0, XTMP0;
+
+#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5)	\
+	/* w[i - 3] == w5 */				\
+	/* rol(XMM5, 15) ^ XTMP0 => XTMP0 */		\
+	vpslld $15, w5, XTMP2;				\
+	vpsrld $(32-15), w5, XTMP3;			\
+	vpxor XTMP2, XTMP3, XTMP3;			\
+	vpxor XTMP3, XTMP0, XTMP0;			\
+	/* rol(XTMP1, 7) => XTMP1 */			\
+	vpslld $7, XTMP1, XTMP5;			\
+	vpsrld $(32-7), XTMP1, XTMP1;			\
+	vpxor XTMP5, XTMP1, XTMP1;			\
+	/* XMM4 ^ XTMP1 => XTMP1 */			\
+	vpxor w4, XTMP1, XTMP1;				\
+	/* w[i - 6] == XMM4 */				\
+	/* P1(XTMP0) ^ XTMP1 => XMM0 */			\
+	vpslld $15, XTMP0, XTMP5;			\
+	vpsrld $(32-15), XTMP0, XTMP6;			\
+	vpslld $23, XTMP0, XTMP2;			\
+	vpsrld $(32-23), XTMP0, XTMP3;			\
+	vpxor XTMP0, XTMP1, XTMP1;			\
+	vpxor XTMP6, XTMP5, XTMP5;			\
+	vpxor XTMP3, XTMP2, XTMP2;			\
+	vpxor XTMP2, XTMP5, XTMP5;			\
+	vpxor XTMP5, XTMP1, w0;
+
+#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5)	\
+	/* W1 in XMM12 */				\
+	vpshufd $0b10111111, w4, XTMP4;			\
+	vpalignr $12, XTMP4, w5, XTMP4;			\
+	vmovdqa XTMP4, XW_W1_ADDR((round), 0);		\
+	/* W1 ^ W2 => XTMP1 */				\
+	vpxor w0, XTMP4, XTMP1;				\
+	vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
+
+
+.section	.rodata.cst16, "aM", @progbits, 16
+.align 16
+
+.Lbe32mask:
+	.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.text
+
+/*
+ * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA.
+ *
+ * void sm3_transform_avx(struct sm3_state *state,
+ *                        const u8 *data, int nblocks);
+ */
+.align 16
+SYM_FUNC_START(sm3_transform_avx)
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: data (64*nblks bytes)
+	 *	%rdx: nblocks
+	 */
+	vzeroupper;
+
+	pushq %rbp;
+	movq %rsp, %rbp;
+
+	movq %rdx, RNBLKS;
+
+	subq $STACK_SIZE, %rsp;
+	andq $(~63), %rsp;
+
+	movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp);
+	movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp);
+	movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp);
+	movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp);
+	movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp);
+
+	vmovdqa .Lbe32mask (%rip), BSWAP_REG;
+
+	/* Get the values of the chaining variables. */
+	movl state_h0(RSTATE), a;
+	movl state_h1(RSTATE), b;
+	movl state_h2(RSTATE), c;
+	movl state_h3(RSTATE), d;
+	movl state_h4(RSTATE), e;
+	movl state_h5(RSTATE), f;
+	movl state_h6(RSTATE), g;
+	movl state_h7(RSTATE), h;
+
+.align 16
+.Loop:
+	/* Load data part1. */
+	LOAD_W_XMM_1();
+
+	leaq -1(RNBLKS), RNBLKS;
+
+	/* Transform 0-3 + Load data part2. */
+	R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2();
+	R1(d, a, b, c, h, e, f, g, 1, 1, IW);
+	R1(c, d, a, b, g, h, e, f, 2, 2, IW);
+	R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3();
+
+	/* Transform 4-7 + Precalc 12-14. */
+	R1(a, b, c, d, e, f, g, h, 4, 0, IW);
+	R1(d, a, b, c, h, e, f, g, 5, 1, IW);
+	R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5);
+	R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 8-11 + Precalc 12-17. */
+	R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5);
+	R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0);
+	R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0);
+	R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 12-14 + Precalc 18-20 */
+	R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1);
+	R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1);
+	R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 15-17 + Precalc 21-23 */
+	R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2);
+	R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2);
+	R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 18-20 + Precalc 24-26 */
+	R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3);
+	R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3);
+	R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 21-23 + Precalc 27-29 */
+	R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4);
+	R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 24-26 + Precalc 30-32 */
+	R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5);
+	R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5);
+	R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 27-29 + Precalc 33-35 */
+	R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0);
+	R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0);
+	R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 30-32 + Precalc 36-38 */
+	R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1);
+	R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1);
+	R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 33-35 + Precalc 39-41 */
+	R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2);
+	R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2);
+	R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 36-38 + Precalc 42-44 */
+	R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3);
+	R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3);
+	R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 39-41 + Precalc 45-47 */
+	R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4);
+	R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4);
+	R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 42-44 + Precalc 48-50 */
+	R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5);
+	R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5);
+	R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5);
+
+	/* Transform 45-47 + Precalc 51-53 */
+	R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0);
+	R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0);
+	R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0);
+
+	/* Transform 48-50 + Precalc 54-56 */
+	R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1);
+	R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1);
+	R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1);
+
+	/* Transform 51-53 + Precalc 57-59 */
+	R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2);
+	R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2);
+	R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2);
+
+	/* Transform 54-56 + Precalc 60-62 */
+	R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3);
+	R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3);
+	R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3);
+
+	/* Transform 57-59 + Precalc 63 */
+	R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 58, 1, XW);
+	R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4);
+
+	/* Transform 60-62 + Precalc 63 */
+	R2(a, b, c, d, e, f, g, h, 60, 0, XW);
+	R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4);
+	R2(c, d, a, b, g, h, e, f, 62, 2, XW);
+
+	/* Transform 63 */
+	R2(b, c, d, a, f, g, h, e, 63, 0, XW);
+
+	/* Update the chaining variables. */
+	xorl state_h0(RSTATE), a;
+	xorl state_h1(RSTATE), b;
+	xorl state_h2(RSTATE), c;
+	xorl state_h3(RSTATE), d;
+	movl a, state_h0(RSTATE);
+	movl b, state_h1(RSTATE);
+	movl c, state_h2(RSTATE);
+	movl d, state_h3(RSTATE);
+	xorl state_h4(RSTATE), e;
+	xorl state_h5(RSTATE), f;
+	xorl state_h6(RSTATE), g;
+	xorl state_h7(RSTATE), h;
+	movl e, state_h4(RSTATE);
+	movl f, state_h5(RSTATE);
+	movl g, state_h6(RSTATE);
+	movl h, state_h7(RSTATE);
+
+	cmpq $0, RNBLKS;
+	jne .Loop;
+
+	vzeroall;
+
+	movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx;
+	movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15;
+	movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14;
+	movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13;
+	movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12;
+
+	vmovdqa %xmm0, IW_W1_ADDR(0, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);
+	vmovdqa %xmm0, IW_W1_ADDR(4, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);
+	vmovdqa %xmm0, IW_W1_ADDR(8, 0);
+	vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);
+
+	movq %rbp, %rsp;
+	popq %rbp;
+	ret;
+SYM_FUNC_END(sm3_transform_avx)
diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c
new file mode 100644
index 0000000000000..661b6f22ffcd8
--- /dev/null
+++ b/arch/x86/crypto/sm3_avx_glue.c
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM3 Secure Hash Algorithm, AVX assembler accelerated.
+ * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <asm/simd.h>
+
+asmlinkage void sm3_transform_avx(struct sm3_state *state,
+			const u8 *data, int nblocks);
+
+static int sm3_avx_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	struct sm3_state *sctx = shash_desc_ctx(desc);
+
+	if (!crypto_simd_usable() ||
+			(sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) {
+		sm3_update(sctx, data, len);
+		return 0;
+	}
+
+	/*
+	 * Make sure struct sm3_state begins directly with the SM3
+	 * 256-bit internal state, as this is what the asm functions expect.
+	 */
+	BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
+
+	kernel_fpu_begin();
+	sm3_base_do_update(desc, data, len, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return 0;
+}
+
+static int sm3_avx_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		struct sm3_state *sctx = shash_desc_ctx(desc);
+
+		if (len)
+			sm3_update(sctx, data, len);
+
+		sm3_final(sctx, out);
+		return 0;
+	}
+
+	kernel_fpu_begin();
+	if (len)
+		sm3_base_do_update(desc, data, len, sm3_transform_avx);
+	sm3_base_do_finalize(desc, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static int sm3_avx_final(struct shash_desc *desc, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		sm3_final(shash_desc_ctx(desc), out);
+		return 0;
+	}
+
+	kernel_fpu_begin();
+	sm3_base_do_finalize(desc, sm3_transform_avx);
+	kernel_fpu_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static struct shash_alg sm3_avx_alg = {
+	.digestsize	=	SM3_DIGEST_SIZE,
+	.init		=	sm3_base_init,
+	.update		=	sm3_avx_update,
+	.final		=	sm3_avx_final,
+	.finup		=	sm3_avx_finup,
+	.descsize	=	sizeof(struct sm3_state),
+	.base		=	{
+		.cra_name	=	"sm3",
+		.cra_driver_name =	"sm3-avx",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SM3_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sm3_avx_mod_init(void)
+{
+	const char *feature_name;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX)) {
+		pr_info("AVX instruction are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!boot_cpu_has(X86_FEATURE_BMI2)) {
+		pr_info("BMI2 instruction are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
+		return -ENODEV;
+	}
+
+	return crypto_register_shash(&sm3_avx_alg);
+}
+
+static void __exit sm3_avx_mod_exit(void)
+{
+	crypto_unregister_shash(&sm3_avx_alg);
+}
+
+module_init(sm3_avx_mod_init);
+module_exit(sm3_avx_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated");
+MODULE_ALIAS_CRYPTO("sm3");
+MODULE_ALIAS_CRYPTO("sm3-avx");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b07174195e984..6dcc77e95caed 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1008,6 +1008,19 @@ config CRYPTO_SM3
 	  http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
 	  https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
 
+config CRYPTO_SM3_AVX_X86_64
+	tristate "SM3 digest algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_HASH
+	select CRYPTO_LIB_SM3
+	help
+	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
+	  It is part of the Chinese Commercial Cryptography suite. This is
+	  SM3 optimized implementation using Advanced Vector Extensions (AVX)
+	  when available.
+
+	  If unsure, say N.
+
 config CRYPTO_STREEBOG
 	tristate "Streebog Hash Function"
 	select CRYPTO_HASH
-- 
GitLab


From ba2c149d0812cee653a186c9cfe451699b211c91 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 7 Jan 2022 20:07:00 +0800
Subject: [PATCH 0077/1586] crypto: tcrypt - add asynchronous speed test for
 SM3

tcrypt supports testing of SM3 hash algorithms that use AVX
instruction acceleration.

In order to add the sm3 asynchronous test to the appropriate
position, shift the testcase sequence number of the multi buffer
backward and start from 450.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 00149657a4bc1..82b5eef2246a2 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -2571,31 +2571,35 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
 	case 422:
+		test_ahash_speed("sm3", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+		fallthrough;
+	case 450:
 		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
 				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 423:
+	case 451:
 		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
 				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 424:
+	case 452:
 		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
 				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 425:
+	case 453:
 		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
 				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 426:
+	case 454:
 		test_mb_ahash_speed("streebog256", sec,
 				    generic_hash_speed_template, num_mb);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 427:
+	case 455:
 		test_mb_ahash_speed("streebog512", sec,
 				    generic_hash_speed_template, num_mb);
 		if (mode > 400 && mode < 500) break;
-- 
GitLab


From 1e1ec11d3ec3134e05d4710f4dee5f9bd05e828d Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Mon, 10 Jan 2022 13:18:37 -0800
Subject: [PATCH 0078/1586] crypto: ccp - Ensure psp_ret is always init'd in
 __sev_platform_init_locked()

Initialize psp_ret inside of __sev_platform_init_locked() because there
are many failure paths with PSP initialization that do not set
__sev_do_cmd_locked().

Fixes: e423b9d75e77: ("crypto: ccp - Move SEV_INIT retry for corrupted data")

Signed-off-by: Peter Gonda <pgonda@google.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Marc Orr <marcorr@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: John Allen <john.allen@amd.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sev-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 8fd774a10edc3..6ab93dfd478a9 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error)
 {
 	struct psp_device *psp = psp_master;
 	struct sev_device *sev;
-	int rc, psp_ret;
+	int rc, psp_ret = -1;
 	int (*init_function)(int *error);
 
 	if (!psp || !psp->sev_data)
-- 
GitLab


From 844318dfd31f7c99f6cebbdba5d6f8392c4c115c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 Jan 2022 10:18:06 +0300
Subject: [PATCH 0079/1586] crypto: qat - fix a signedness bug in
 get_service_enabled()

The "ret" variable needs to be signed or there is an error message which
will not be printed correctly.

Fixes: 0cec19c761e5 ("crypto: qat - add support for compression for 4xxx")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 6d10edc40aca0..68d39c833332e 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -52,7 +52,7 @@ static const char *const dev_cfg_services[] = {
 static int get_service_enabled(struct adf_accel_dev *accel_dev)
 {
 	char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
-	u32 ret;
+	int ret;
 
 	ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
 				      ADF_SERVICES_ENABLED, services);
-- 
GitLab


From fa183433bf53ee092323005f05cb3491e4aaaa8b Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 26 Jan 2022 17:12:50 -0600
Subject: [PATCH 0080/1586] regulator: maxim,max8973: Drop Tegra specifics from
 example

There's no need to complicate examples with a platform specific macro.
It also complicates example parsing to figure out the number of interrupt
cells in examples (based on the bracketing).

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220126231250.1635021-1-robh@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/maxim,max8973.yaml         | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
index 35c53e27f78cc..5898dcf10f067 100644
--- a/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
+++ b/Documentation/devicetree/bindings/regulator/maxim,max8973.yaml
@@ -113,7 +113,7 @@ examples:
     };
 
   - |
-    #include <dt-bindings/gpio/tegra-gpio.h>
+    #include <dt-bindings/gpio/gpio.h>
     #include <dt-bindings/interrupt-controller/irq.h>
 
     i2c {
@@ -123,8 +123,7 @@ examples:
         regulator@1b {
             compatible = "maxim,max77621";
             reg = <0x1b>;
-            interrupt-parent = <&gpio>;
-            interrupts = <TEGRA_GPIO(Y, 1) IRQ_TYPE_LEVEL_LOW>;
+            interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
 
             regulator-always-on;
             regulator-boot-on;
-- 
GitLab


From 20dc69ca1023b7e4c4af3c3495aa5a91e1a9be39 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 27 Jan 2022 19:58:15 +0800
Subject: [PATCH 0081/1586] spi: Fix missing unlock on error in
 sp7021_spi_master_transfer_one()

Add the missing unlock before return from sp7021_spi_master_transfer_one()
in the error handling case.

Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20220127115815.3148950-1-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sunplus-sp7021.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
index 627b9c3024e96..cbbb1664017eb 100644
--- a/drivers/spi/spi-sunplus-sp7021.c
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -351,6 +351,7 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp
 
 		if (!wait_for_completion_interruptible_timeout(&pspim->isr_done, timeout)) {
 			dev_err(&spi->dev, "wait_for_completion err\n");
+			mutex_unlock(&pspim->buf_lock);
 			return -ETIMEDOUT;
 		}
 
-- 
GitLab


From bef8c5fdf50b573351571e94525800c41d9830f2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jan 2022 13:07:54 +0100
Subject: [PATCH 0082/1586] spi: qcom: geni: Simplify DMA setting

As stated in [1], dma_set_mask() with a 64-bit mask will never fail if
dev->dma_mask is non-NULL.
So, if it fails, the 32 bits case will also fail for the same reason.

Simplify code and remove some dead code accordingly.

[1]: https://lkml.org/lkml/2021/6/7/398

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/1b14e4ce91a33c16b2c655389c728071a9c9aa2e.1641643601.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index f7d905d2a90f6..4e83cc5b445d8 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -898,11 +898,8 @@ static int spi_geni_probe(struct platform_device *pdev)
 		return irq;
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
-	if (ret) {
-		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-		if (ret)
-			return dev_err_probe(dev, ret, "could not set DMA mask\n");
-	}
+	if (ret)
+		return dev_err_probe(dev, ret, "could not set DMA mask\n");
 
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
-- 
GitLab


From 7291e7d686308b4a77f43225eaf1753cb20cc692 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:21 -0500
Subject: [PATCH 0083/1586] regulator: rpi-panel: Register with a unique
 backlight name

There's no reason why 2 Raspberry Pi DSI displays can't be
attached to a Pi Compute Module, so the backlight names need to
be unique.

Use the parent dev_name. It's not as readable, but is unique.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-2-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rpi-panel-attiny-regulator.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index ee46bfbf5eee7..370b9ae363dd2 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -181,8 +181,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
-	bl = devm_backlight_device_register(&i2c->dev,
-					    "7inch-touchscreen-panel-bl",
+	bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev),
 					    &i2c->dev, regmap, &attiny_bl,
 					    &props);
 	if (IS_ERR(bl))
-- 
GitLab


From 5665eee7a3800430e7dc3ef6f25722476b603186 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:22 -0500
Subject: [PATCH 0084/1586] regulator: rpi-panel: Handle I2C errors/timing to
 the Atmel

The Atmel is doing some things in the I2C ISR, during which
period it will not respond to further commands. This is
particularly true of the POWERON command.

Increase delays appropriately, and retry should I2C errors be
reported.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-3-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/rpi-panel-attiny-regulator.c    | 56 +++++++++++++++----
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 370b9ae363dd2..00fb69efcfa25 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = {
 static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 {
 	unsigned int data;
+	int ret, i;
 
 	regmap_write(rdev->regmap, REG_POWERON, 1);
+	msleep(80);
+
 	/* Wait for nPWRDWN to go low to indicate poweron is done. */
-	regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data,
-					data & BIT(0), 10, 1000000);
+	for (i = 0; i < 20; i++) {
+		ret = regmap_read(rdev->regmap, REG_PORTB, &data);
+		if (!ret) {
+			if (data & BIT(0))
+				break;
+		}
+		usleep_range(10000, 12000);
+	}
+	usleep_range(10000, 12000);
+
+	if (ret)
+		pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret);
 
 	/* Default to the same orientation as the closed source
 	 * firmware used for the panel.  Runtime rotation
@@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev)
 {
 	regmap_write(rdev->regmap, REG_PWM, 0);
 	regmap_write(rdev->regmap, REG_POWERON, 0);
-	udelay(1);
+	msleep(30);
 	return 0;
 }
 
 static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
 {
 	unsigned int data;
-	int ret;
+	int ret, i;
 
-	ret = regmap_read(rdev->regmap, REG_POWERON, &data);
+	for (i = 0; i < 10; i++) {
+		ret = regmap_read(rdev->regmap, REG_POWERON, &data);
+		if (!ret)
+			break;
+		usleep_range(10000, 12000);
+	}
 	if (ret < 0)
 		return ret;
 
 	if (!(data & BIT(0)))
 		return 0;
 
-	ret = regmap_read(rdev->regmap, REG_PORTB, &data);
+	for (i = 0; i < 10; i++) {
+		ret = regmap_read(rdev->regmap, REG_PORTB, &data);
+		if (!ret)
+			break;
+		usleep_range(10000, 12000);
+	}
+
 	if (ret < 0)
 		return ret;
 
@@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl)
 {
 	struct regmap *regmap = bl_get_data(bl);
 	int brightness = bl->props.brightness;
+	int ret, i;
 
 	if (bl->props.power != FB_BLANK_UNBLANK ||
 	    bl->props.fb_blank != FB_BLANK_UNBLANK)
 		brightness = 0;
 
-	return regmap_write(regmap, REG_PWM, brightness);
+	for (i = 0; i < 10; i++) {
+		ret = regmap_write(regmap, REG_PWM, brightness);
+		if (!ret)
+			break;
+	}
+
+	return ret;
 }
 
 static int attiny_get_brightness(struct backlight_device *bl)
 {
 	struct regmap *regmap = bl_get_data(bl);
-	int ret, brightness;
+	int ret, brightness, i;
+
+	for (i = 0; i < 10; i++) {
+		ret = regmap_read(regmap, REG_PWM, &brightness);
+		if (!ret)
+			break;
+	}
 
-	ret = regmap_read(regmap, REG_PWM, &brightness);
 	if (ret)
 		return ret;
 
@@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 	}
 
 	regmap_write(regmap, REG_POWERON, 0);
-	mdelay(1);
+	msleep(30);
 
 	config.dev = &i2c->dev;
 	config.regmap = regmap;
-- 
GitLab


From 00440bcd211a3cac686b730447c4efa3d4c84c2a Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:23 -0500
Subject: [PATCH 0085/1586] regulator: rpi-panel: Serialise operations.

The driver was using the regmap lock to serialise the
individual accesses, but we really need to protect the
timings of enabling the regulators, including any communication
with the Atmel.

Use a mutex within the driver to control overall accesses to
the Atmel, instead of the regmap lock.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-4-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/rpi-panel-attiny-regulator.c    | 91 ++++++++++++++++---
 1 file changed, 80 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 00fb69efcfa25..a4af7adad2b5e 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -27,18 +27,28 @@
 #define REG_POWERON	0x85
 #define REG_PWM		0x86
 
+struct attiny_lcd {
+	/* lock to serialise overall accesses to the Atmel */
+	struct mutex	lock;
+	struct regmap	*regmap;
+};
+
 static const struct regmap_config attiny_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.disable_locking = 1,
 	.max_register = REG_PWM,
 	.cache_type = REGCACHE_NONE,
 };
 
 static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 {
+	struct mutex *lock = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret, i;
 
+	mutex_lock(lock);
+
 	regmap_write(rdev->regmap, REG_POWERON, 1);
 	msleep(80);
 
@@ -63,33 +73,49 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 	 */
 	regmap_write(rdev->regmap, REG_PORTA, BIT(2));
 
+	mutex_unlock(lock);
+
 	return 0;
 }
 
 static int attiny_lcd_power_disable(struct regulator_dev *rdev)
 {
+	struct mutex *lock = rdev_get_drvdata(rdev);
+
+	mutex_lock(lock);
+
 	regmap_write(rdev->regmap, REG_PWM, 0);
 	regmap_write(rdev->regmap, REG_POWERON, 0);
 	msleep(30);
+
+	mutex_unlock(lock);
+
 	return 0;
 }
 
 static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
 {
+	struct mutex *lock = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret, i;
 
+	mutex_lock(lock);
+
 	for (i = 0; i < 10; i++) {
 		ret = regmap_read(rdev->regmap, REG_POWERON, &data);
 		if (!ret)
 			break;
 		usleep_range(10000, 12000);
 	}
-	if (ret < 0)
+	if (ret < 0) {
+		mutex_unlock(lock);
 		return ret;
+	}
 
-	if (!(data & BIT(0)))
+	if (!(data & BIT(0))) {
+		mutex_unlock(lock);
 		return 0;
+	}
 
 	for (i = 0; i < 10; i++) {
 		ret = regmap_read(rdev->regmap, REG_PORTB, &data);
@@ -98,6 +124,8 @@ static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
 		usleep_range(10000, 12000);
 	}
 
+	mutex_unlock(lock);
+
 	if (ret < 0)
 		return ret;
 
@@ -125,10 +153,13 @@ static const struct regulator_desc attiny_regulator = {
 
 static int attiny_update_status(struct backlight_device *bl)
 {
-	struct regmap *regmap = bl_get_data(bl);
+	struct attiny_lcd *state = bl_get_data(bl);
+	struct regmap *regmap = state->regmap;
 	int brightness = bl->props.brightness;
 	int ret, i;
 
+	mutex_lock(&state->lock);
+
 	if (bl->props.power != FB_BLANK_UNBLANK ||
 	    bl->props.fb_blank != FB_BLANK_UNBLANK)
 		brightness = 0;
@@ -139,20 +170,27 @@ static int attiny_update_status(struct backlight_device *bl)
 			break;
 	}
 
+	mutex_unlock(&state->lock);
+
 	return ret;
 }
 
 static int attiny_get_brightness(struct backlight_device *bl)
 {
-	struct regmap *regmap = bl_get_data(bl);
+	struct attiny_lcd *state = bl_get_data(bl);
+	struct regmap *regmap = state->regmap;
 	int ret, brightness, i;
 
+	mutex_lock(&state->lock);
+
 	for (i = 0; i < 10; i++) {
 		ret = regmap_read(regmap, REG_PWM, &brightness);
 		if (!ret)
 			break;
 	}
 
+	mutex_unlock(&state->lock);
+
 	if (ret)
 		return ret;
 
@@ -174,22 +212,30 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 	struct regulator_config config = { };
 	struct backlight_device *bl;
 	struct regulator_dev *rdev;
+	struct attiny_lcd *state;
 	struct regmap *regmap;
 	unsigned int data;
 	int ret;
 
+	state = devm_kzalloc(&i2c->dev, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	mutex_init(&state->lock);
+	i2c_set_clientdata(i2c, state);
+
 	regmap = devm_regmap_init_i2c(i2c, &attiny_regmap_config);
 	if (IS_ERR(regmap)) {
 		ret = PTR_ERR(regmap);
 		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
 			ret);
-		return ret;
+		goto error;
 	}
 
 	ret = regmap_read(regmap, REG_ID, &data);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret);
-		return ret;
+		goto error;
 	}
 
 	switch (data) {
@@ -198,7 +244,8 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 		break;
 	default:
 		dev_err(&i2c->dev, "Unknown Atmel firmware revision: 0x%02x\n", data);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto error;
 	}
 
 	regmap_write(regmap, REG_POWERON, 0);
@@ -208,23 +255,44 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 	config.regmap = regmap;
 	config.of_node = i2c->dev.of_node;
 	config.init_data = &attiny_regulator_default;
+	config.driver_data = &state->lock;
 
 	rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config);
 	if (IS_ERR(rdev)) {
 		dev_err(&i2c->dev, "Failed to register ATTINY regulator\n");
-		return PTR_ERR(rdev);
+		ret = PTR_ERR(rdev);
+		goto error;
 	}
 
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
+
+	state->regmap = regmap;
+
 	bl = devm_backlight_device_register(&i2c->dev, dev_name(&i2c->dev),
-					    &i2c->dev, regmap, &attiny_bl,
+					    &i2c->dev, state, &attiny_bl,
 					    &props);
-	if (IS_ERR(bl))
-		return PTR_ERR(bl);
+	if (IS_ERR(bl)) {
+		ret = PTR_ERR(bl);
+		goto error;
+	}
 
 	bl->props.brightness = 0xff;
 
+	return 0;
+
+error:
+	mutex_destroy(&state->lock);
+
+	return ret;
+}
+
+static int attiny_i2c_remove(struct i2c_client *client)
+{
+	struct attiny_lcd *state = i2c_get_clientdata(client);
+
+	mutex_destroy(&state->lock);
+
 	return 0;
 }
 
@@ -240,6 +308,7 @@ static struct i2c_driver attiny_regulator_driver = {
 		.of_match_table = of_match_ptr(attiny_dt_ids),
 	},
 	.probe = attiny_i2c_probe,
+	.remove	= attiny_i2c_remove,
 };
 
 module_i2c_driver(attiny_regulator_driver);
-- 
GitLab


From 89339a2ae7608138dbcccda8db67a87870550cbe Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:24 -0500
Subject: [PATCH 0086/1586] regulator: rpi-panel: Ensure the backlight is off
 during probe.

The initial state of the Atmel is not defined, so ensure the
backlight PWM is set to 0 by default.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-5-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rpi-panel-attiny-regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index a4af7adad2b5e..b3629a1e0e502 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -250,6 +250,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 
 	regmap_write(regmap, REG_POWERON, 0);
 	msleep(30);
+	regmap_write(regmap, REG_PWM, 0);
 
 	config.dev = &i2c->dev;
 	config.regmap = regmap;
-- 
GitLab


From 4866e35e48e6bd2bef1c567b05105e7fb3493ff9 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:25 -0500
Subject: [PATCH 0087/1586] regulator: rpi-panel: Convert to drive lines
 directly

The Atmel was doing a load of automatic sequencing of
control lines, however it was combining the touch controller's
reset with the bridge/panel control.

Change to control the control signals directly rather than
through the automatic POWERON control.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-6-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/rpi-panel-attiny-regulator.c    | 111 ++++++++++--------
 1 file changed, 60 insertions(+), 51 deletions(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index b3629a1e0e502..995915ca4a9be 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -21,11 +21,28 @@
 /* I2C registers of the Atmel microcontroller. */
 #define REG_ID		0x80
 #define REG_PORTA	0x81
-#define REG_PORTA_HF	BIT(2)
-#define REG_PORTA_VF	BIT(3)
 #define REG_PORTB	0x82
+#define REG_PORTC	0x83
 #define REG_POWERON	0x85
 #define REG_PWM		0x86
+#define REG_ADDR_L	0x8c
+#define REG_ADDR_H	0x8d
+#define REG_WRITE_DATA_H	0x90
+#define REG_WRITE_DATA_L	0x91
+
+#define PA_LCD_DITHB		BIT(0)
+#define PA_LCD_MODE		BIT(1)
+#define PA_LCD_LR		BIT(2)
+#define PA_LCD_UD		BIT(3)
+
+#define PB_BRIDGE_PWRDNX_N	BIT(0)
+#define PB_LCD_VCC_N		BIT(1)
+#define PB_LCD_MAIN		BIT(7)
+
+#define PC_LED_EN		BIT(0)
+#define PC_RST_TP_N		BIT(1)
+#define PC_RST_LCD_N		BIT(2)
+#define PC_RST_BRIDGE_N		BIT(3)
 
 struct attiny_lcd {
 	/* lock to serialise overall accesses to the Atmel */
@@ -37,99 +54,91 @@ static const struct regmap_config attiny_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.disable_locking = 1,
-	.max_register = REG_PWM,
+	.max_register = REG_WRITE_DATA_L,
 	.cache_type = REGCACHE_NONE,
 };
 
 static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 {
-	struct mutex *lock = rdev_get_drvdata(rdev);
-	unsigned int data;
-	int ret, i;
-
-	mutex_lock(lock);
-
-	regmap_write(rdev->regmap, REG_POWERON, 1);
-	msleep(80);
+	struct attiny_lcd *state = rdev_get_drvdata(rdev);
 
-	/* Wait for nPWRDWN to go low to indicate poweron is done. */
-	for (i = 0; i < 20; i++) {
-		ret = regmap_read(rdev->regmap, REG_PORTB, &data);
-		if (!ret) {
-			if (data & BIT(0))
-				break;
-		}
-		usleep_range(10000, 12000);
-	}
-	usleep_range(10000, 12000);
+	mutex_lock(&state->lock);
 
-	if (ret)
-		pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret);
+	/* Ensure bridge, and tp stay in reset */
+	regmap_write(rdev->regmap, REG_PORTC, 0);
+	usleep_range(5000, 10000);
 
 	/* Default to the same orientation as the closed source
 	 * firmware used for the panel.  Runtime rotation
 	 * configuration will be supported using VC4's plane
 	 * orientation bits.
 	 */
-	regmap_write(rdev->regmap, REG_PORTA, BIT(2));
+	regmap_write(rdev->regmap, REG_PORTA, PA_LCD_LR);
+	usleep_range(5000, 10000);
+	regmap_write(rdev->regmap, REG_PORTB, PB_LCD_MAIN);
+	usleep_range(5000, 10000);
+	/* Bring controllers out of reset */
+	regmap_write(rdev->regmap, REG_PORTC,
+		     PC_LED_EN | PC_RST_BRIDGE_N | PC_RST_LCD_N | PC_RST_TP_N);
+
+	msleep(80);
+
+	regmap_write(rdev->regmap, REG_ADDR_H, 0x04);
+	usleep_range(5000, 8000);
+	regmap_write(rdev->regmap, REG_ADDR_L, 0x7c);
+	usleep_range(5000, 8000);
+	regmap_write(rdev->regmap, REG_WRITE_DATA_H, 0x00);
+	usleep_range(5000, 8000);
+	regmap_write(rdev->regmap, REG_WRITE_DATA_L, 0x00);
+
+	msleep(100);
 
-	mutex_unlock(lock);
+	mutex_unlock(&state->lock);
 
 	return 0;
 }
 
 static int attiny_lcd_power_disable(struct regulator_dev *rdev)
 {
-	struct mutex *lock = rdev_get_drvdata(rdev);
+	struct attiny_lcd *state = rdev_get_drvdata(rdev);
 
-	mutex_lock(lock);
+	mutex_lock(&state->lock);
 
 	regmap_write(rdev->regmap, REG_PWM, 0);
-	regmap_write(rdev->regmap, REG_POWERON, 0);
+	usleep_range(5000, 10000);
+	regmap_write(rdev->regmap, REG_PORTA, 0);
+	usleep_range(5000, 10000);
+	regmap_write(rdev->regmap, REG_PORTB, PB_LCD_VCC_N);
+	usleep_range(5000, 10000);
+	regmap_write(rdev->regmap, REG_PORTC, 0);
 	msleep(30);
 
-	mutex_unlock(lock);
+	mutex_unlock(&state->lock);
 
 	return 0;
 }
 
 static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev)
 {
-	struct mutex *lock = rdev_get_drvdata(rdev);
+	struct attiny_lcd *state = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret, i;
 
-	mutex_lock(lock);
-
-	for (i = 0; i < 10; i++) {
-		ret = regmap_read(rdev->regmap, REG_POWERON, &data);
-		if (!ret)
-			break;
-		usleep_range(10000, 12000);
-	}
-	if (ret < 0) {
-		mutex_unlock(lock);
-		return ret;
-	}
-
-	if (!(data & BIT(0))) {
-		mutex_unlock(lock);
-		return 0;
-	}
+	mutex_lock(&state->lock);
 
 	for (i = 0; i < 10; i++) {
-		ret = regmap_read(rdev->regmap, REG_PORTB, &data);
+		ret = regmap_read(rdev->regmap, REG_PORTC, &data);
 		if (!ret)
 			break;
 		usleep_range(10000, 12000);
 	}
 
-	mutex_unlock(lock);
+	mutex_unlock(&state->lock);
 
 	if (ret < 0)
 		return ret;
 
-	return data & BIT(0);
+	return data & PC_RST_BRIDGE_N;
 }
 
 static const struct regulator_init_data attiny_regulator_default = {
@@ -256,7 +265,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 	config.regmap = regmap;
 	config.of_node = i2c->dev.of_node;
 	config.init_data = &attiny_regulator_default;
-	config.driver_data = &state->lock;
+	config.driver_data = state;
 
 	rdev = devm_regulator_register(&i2c->dev, &attiny_regulator, &config);
 	if (IS_ERR(rdev)) {
-- 
GitLab


From 8c518eb4039102445b1b7bd6626aba0fef65b753 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:26 -0500
Subject: [PATCH 0088/1586] regulator: rpi-panel: Add GPIO control for panel
 and touch resets

We need independent control of the resets for the panel&bridge,
vs the touch controller.

Expose the reset lines that are on the Atmel's port C via the GPIO
API so that they can be controlled appropriately.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-7-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig                     |   1 +
 .../regulator/rpi-panel-attiny-regulator.c    | 115 +++++++++++++++---
 2 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 1c35fed20d341..22503e4f53272 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -984,6 +984,7 @@ config REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY
 	tristate "Raspberry Pi 7-inch touchscreen panel ATTINY regulator"
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on I2C
+	depends on OF_GPIO
 	select REGMAP_I2C
 	help
 	  This driver supports ATTINY regulator on the Raspberry Pi 7-inch
diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 995915ca4a9be..998233f14085e 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -8,6 +8,7 @@
 #include <linux/backlight.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -44,10 +45,30 @@
 #define PC_RST_LCD_N		BIT(2)
 #define PC_RST_BRIDGE_N		BIT(3)
 
+enum gpio_signals {
+	RST_BRIDGE_N,	/* TC358762 bridge reset */
+	RST_TP_N,	/* Touch controller reset */
+	NUM_GPIO
+};
+
+struct gpio_signal_mappings {
+	unsigned int reg;
+	unsigned int mask;
+};
+
+static const struct gpio_signal_mappings mappings[NUM_GPIO] = {
+	[RST_BRIDGE_N] = { REG_PORTC, PC_RST_BRIDGE_N | PC_RST_LCD_N  },
+	[RST_TP_N] = { REG_PORTC, PC_RST_TP_N },
+};
+
 struct attiny_lcd {
 	/* lock to serialise overall accesses to the Atmel */
 	struct mutex	lock;
 	struct regmap	*regmap;
+	bool gpio_states[NUM_GPIO];
+	u8 port_states[3];
+
+	struct gpio_chip gc;
 };
 
 static const struct regmap_config attiny_regmap_config = {
@@ -58,6 +79,17 @@ static const struct regmap_config attiny_regmap_config = {
 	.cache_type = REGCACHE_NONE,
 };
 
+static int attiny_set_port_state(struct attiny_lcd *state, int reg, u8 val)
+{
+	state->port_states[reg - REG_PORTA] = val;
+	return regmap_write(state->regmap, reg, val);
+};
+
+static u8 attiny_get_port_state(struct attiny_lcd *state, int reg)
+{
+	return state->port_states[reg - REG_PORTA];
+};
+
 static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 {
 	struct attiny_lcd *state = rdev_get_drvdata(rdev);
@@ -65,7 +97,7 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 	mutex_lock(&state->lock);
 
 	/* Ensure bridge, and tp stay in reset */
-	regmap_write(rdev->regmap, REG_PORTC, 0);
+	attiny_set_port_state(state, REG_PORTC, 0);
 	usleep_range(5000, 10000);
 
 	/* Default to the same orientation as the closed source
@@ -73,26 +105,16 @@ static int attiny_lcd_power_enable(struct regulator_dev *rdev)
 	 * configuration will be supported using VC4's plane
 	 * orientation bits.
 	 */
-	regmap_write(rdev->regmap, REG_PORTA, PA_LCD_LR);
+	attiny_set_port_state(state, REG_PORTA, PA_LCD_LR);
 	usleep_range(5000, 10000);
-	regmap_write(rdev->regmap, REG_PORTB, PB_LCD_MAIN);
+	/* Main regulator on, and power to the panel (LCD_VCC_N) */
+	attiny_set_port_state(state, REG_PORTB, PB_LCD_MAIN);
 	usleep_range(5000, 10000);
 	/* Bring controllers out of reset */
-	regmap_write(rdev->regmap, REG_PORTC,
-		     PC_LED_EN | PC_RST_BRIDGE_N | PC_RST_LCD_N | PC_RST_TP_N);
+	attiny_set_port_state(state, REG_PORTC, PC_LED_EN);
 
 	msleep(80);
 
-	regmap_write(rdev->regmap, REG_ADDR_H, 0x04);
-	usleep_range(5000, 8000);
-	regmap_write(rdev->regmap, REG_ADDR_L, 0x7c);
-	usleep_range(5000, 8000);
-	regmap_write(rdev->regmap, REG_WRITE_DATA_H, 0x00);
-	usleep_range(5000, 8000);
-	regmap_write(rdev->regmap, REG_WRITE_DATA_L, 0x00);
-
-	msleep(100);
-
 	mutex_unlock(&state->lock);
 
 	return 0;
@@ -106,11 +128,12 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev)
 
 	regmap_write(rdev->regmap, REG_PWM, 0);
 	usleep_range(5000, 10000);
-	regmap_write(rdev->regmap, REG_PORTA, 0);
+
+	attiny_set_port_state(state, REG_PORTA, 0);
 	usleep_range(5000, 10000);
-	regmap_write(rdev->regmap, REG_PORTB, PB_LCD_VCC_N);
+	attiny_set_port_state(state, REG_PORTB, PB_LCD_VCC_N);
 	usleep_range(5000, 10000);
-	regmap_write(rdev->regmap, REG_PORTC, 0);
+	attiny_set_port_state(state, REG_PORTC, 0);
 	msleep(30);
 
 	mutex_unlock(&state->lock);
@@ -211,6 +234,45 @@ static const struct backlight_ops attiny_bl = {
 	.get_brightness	= attiny_get_brightness,
 };
 
+static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off)
+{
+	return GPIO_LINE_DIRECTION_OUT;
+}
+
+static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
+{
+	struct attiny_lcd *state = gpiochip_get_data(gc);
+	u8 last_val;
+
+	if (off >= NUM_GPIO)
+		return;
+
+	mutex_lock(&state->lock);
+
+	last_val = attiny_get_port_state(state, mappings[off].reg);
+	if (val)
+		last_val |= mappings[off].mask;
+	else
+		last_val &= ~mappings[off].mask;
+
+	attiny_set_port_state(state, mappings[off].reg, last_val);
+
+	if (off == RST_BRIDGE_N && val) {
+		usleep_range(5000, 8000);
+		regmap_write(state->regmap, REG_ADDR_H, 0x04);
+		usleep_range(5000, 8000);
+		regmap_write(state->regmap, REG_ADDR_L, 0x7c);
+		usleep_range(5000, 8000);
+		regmap_write(state->regmap, REG_WRITE_DATA_H, 0x00);
+		usleep_range(5000, 8000);
+		regmap_write(state->regmap, REG_WRITE_DATA_L, 0x00);
+
+		msleep(100);
+	}
+
+	mutex_unlock(&state->lock);
+}
+
 /*
  * I2C driver interface functions
  */
@@ -289,6 +351,23 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 
 	bl->props.brightness = 0xff;
 
+	state->gc.parent = &i2c->dev;
+	state->gc.label = i2c->name;
+	state->gc.owner = THIS_MODULE;
+	state->gc.of_node = i2c->dev.of_node;
+	state->gc.base = -1;
+	state->gc.ngpio = NUM_GPIO;
+
+	state->gc.set = attiny_gpio_set;
+	state->gc.get_direction = attiny_gpio_get_direction;
+	state->gc.can_sleep = true;
+
+	ret = devm_gpiochip_add_data(&i2c->dev, &state->gc, state);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to create gpiochip: %d\n", ret);
+		goto error;
+	}
+
 	return 0;
 
 error:
-- 
GitLab


From 1d746d448f421094a71ba634399d2ee61669513f Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:27 -0500
Subject: [PATCH 0089/1586] regulator: rpi-panel: Remove get_brightness hook

The driver was implementing a get_brightness function that
tried to read back the PWM setting of the display to report
as the current brightness.
The controller on the display does not support that, therefore
we end up reporting a brightness of 0, and that confuses
systemd's backlight service.

Remove the hook so that the framework returns the current
brightness automatically.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-8-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/rpi-panel-attiny-regulator.c    | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 998233f14085e..8090b9a485b5e 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -207,31 +207,8 @@ static int attiny_update_status(struct backlight_device *bl)
 	return ret;
 }
 
-static int attiny_get_brightness(struct backlight_device *bl)
-{
-	struct attiny_lcd *state = bl_get_data(bl);
-	struct regmap *regmap = state->regmap;
-	int ret, brightness, i;
-
-	mutex_lock(&state->lock);
-
-	for (i = 0; i < 10; i++) {
-		ret = regmap_read(regmap, REG_PWM, &brightness);
-		if (!ret)
-			break;
-	}
-
-	mutex_unlock(&state->lock);
-
-	if (ret)
-		return ret;
-
-	return brightness;
-}
-
 static const struct backlight_ops attiny_bl = {
 	.update_status	= attiny_update_status,
-	.get_brightness	= attiny_get_brightness,
 };
 
 static int attiny_gpio_get_direction(struct gpio_chip *gc, unsigned int off)
-- 
GitLab


From 5fa4e8ea649009566a1b080f836ce23d4ce0c416 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:28 -0500
Subject: [PATCH 0090/1586] regulator/rpi-panel-attiny: Use the regmap cache

The I2C to the Atmel is very fussy, and locks up easily on
Pi0-3 particularly on reads.

The LCD power status is controlled solely by this driver, so
rather than reading it back from the Atmel, use the regmap cache
to avoid reading values.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-9-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rpi-panel-attiny-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 8090b9a485b5e..6e408a4b2c219 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -76,7 +76,7 @@ static const struct regmap_config attiny_regmap_config = {
 	.val_bits = 8,
 	.disable_locking = 1,
 	.max_register = REG_WRITE_DATA_L,
-	.cache_type = REGCACHE_NONE,
+	.cache_type = REGCACHE_RBTREE,
 };
 
 static int attiny_set_port_state(struct attiny_lcd *state, int reg, u8 val)
-- 
GitLab


From e4a7e3f741f797d93d97a153b0f6a862d19a1304 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.com>
Date: Mon, 24 Jan 2022 17:01:29 -0500
Subject: [PATCH 0091/1586] regulator/rpi-panel-attiny: Use two transactions
 for I2C read

The I2C to the Atmel is very fussy, and locks up easily on
Pi0-3 particularly on reads.
If running at 100kHz on Pi3, reading the ID register generally
locks up the Atmel, but splitting the register select write and
read into two transactions is reliable.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Link: https://lore.kernel.org/r/20220124220129.158891-10-detlev.casanova@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/rpi-panel-attiny-regulator.c    | 35 ++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index 6e408a4b2c219..f7df0f4b2f874 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -250,6 +250,39 @@ static void attiny_gpio_set(struct gpio_chip *gc, unsigned int off, int val)
 	mutex_unlock(&state->lock);
 }
 
+static int attiny_i2c_read(struct i2c_client *client, u8 reg, unsigned int *buf)
+{
+	struct i2c_msg msgs[1];
+	u8 addr_buf[1] = { reg };
+	u8 data_buf[1] = { 0, };
+	int ret;
+
+	/* Write register address */
+	msgs[0].addr = client->addr;
+	msgs[0].flags = 0;
+	msgs[0].len = ARRAY_SIZE(addr_buf);
+	msgs[0].buf = addr_buf;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs))
+		return -EIO;
+
+	usleep_range(5000, 10000);
+
+	/* Read data from register */
+	msgs[0].addr = client->addr;
+	msgs[0].flags = I2C_M_RD;
+	msgs[0].len = 1;
+	msgs[0].buf = data_buf;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs))
+		return -EIO;
+
+	*buf = data_buf[0];
+	return 0;
+}
+
 /*
  * I2C driver interface functions
  */
@@ -280,7 +313,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c,
 		goto error;
 	}
 
-	ret = regmap_read(regmap, REG_ID, &data);
+	ret = attiny_i2c_read(i2c, REG_ID, &data);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "Failed to read REG_ID reg: %d\n", ret);
 		goto error;
-- 
GitLab


From 735efea69d36347c6c7a6bf6e13f032d09c63c6f Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Wed, 12 Jan 2022 08:05:44 +0000
Subject: [PATCH 0092/1586] crypto: ccp - remove redundant ret variable

Return value from ccp_crypto_enqueue_request() directly instead
of taking this in another redundant variable.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: CGEL ZTE <cgel.zte@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e6dcd8cedd53e..bed331953ff94 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -69,7 +69,6 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
-	int ret;
 
 	if (!ctx->u.aes.key_len)
 		return -EINVAL;
@@ -104,9 +103,7 @@ static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 	rctx->cmd.u.aes.src_len = req->cryptlen;
 	rctx->cmd.u.aes.dst = req->dst;
 
-	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
-
-	return ret;
+	return ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
 }
 
 static int ccp_aes_encrypt(struct skcipher_request *req)
-- 
GitLab


From ab7d88549e2f7ae116afd303f32e1950cb790a1d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 12 Jan 2022 15:05:03 +0100
Subject: [PATCH 0093/1586] hwrng: cavium - HW_RANDOM_CAVIUM should depend on
 ARCH_THUNDER

The Cavium ThunderX Random Number Generator is only present on Cavium
ThunderX SoCs, and not available as an independent PCIe endpoint.  Hence
add a dependency on ARCH_THUNDER, to prevent asking the user about this
driver when configuring a kernel without Cavium Thunder SoC  support.

Fixes: cc2f1908c6b8f625 ("hwrng: cavium - Add Cavium HWRNG driver for ThunderX SoC.")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 9704963f9d500..a087156a58186 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -401,7 +401,7 @@ config HW_RANDOM_MESON
 
 config HW_RANDOM_CAVIUM
 	tristate "Cavium ThunderX Random Number Generator support"
-	depends on HW_RANDOM && PCI && ARM64
+	depends on HW_RANDOM && PCI && ARCH_THUNDER
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
-- 
GitLab


From 8fc5f2ad896b6cc8b7631d546efcf8e358872f76 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 14 Jan 2022 17:40:30 +1100
Subject: [PATCH 0094/1586] crypto: testmgr - Move
 crypto_simd_disabled_for_test out

As testmgr is part of cryptomgr which was designed to be unloadable
as a module, it shouldn't export any symbols for other crypto
modules to use as that would prevent it from being unloaded.  All
its functionality is meant to be accessed through notifiers.

The symbol crypto_simd_disabled_for_test was added to testmgr
which caused it to be pinned as a module if its users were also
loaded.  This patch moves it out of testmgr and into crypto/algapi.c
so cryptomgr can again be unloaded and replaced on demand.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c  | 6 ++++++
 crypto/testmgr.c | 3 ---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index a366cb3e8aa18..9f15e11f5d73b 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -6,6 +6,7 @@
  */
 
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/fips.h>
@@ -21,6 +22,11 @@
 
 static LIST_HEAD(crypto_template_list);
 
+#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
+#endif
+
 static inline void crypto_check_module_sig(struct module *mod)
 {
 	if (fips_enabled && mod && !module_sig_ok(mod))
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5831d4bbc64fa..3a5a3e5cb77bf 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -55,9 +55,6 @@ MODULE_PARM_DESC(noextratests, "disable expensive crypto self-tests");
 static unsigned int fuzz_iterations = 100;
 module_param(fuzz_iterations, uint, 0644);
 MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations");
-
-DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
-EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
 #endif
 
 #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
-- 
GitLab


From 90be188b65f728f82bacdddb43c7899f26da7b1c Mon Sep 17 00:00:00 2001
From: Nathan Huckleberry <nhuck@google.com>
Date: Fri, 14 Jan 2022 18:17:19 -0600
Subject: [PATCH 0095/1586] crypto: x86 - Convert to SPDX identifier

Use SPDX-License-Identifier instead of a verbose license text and
update external link.

Cc: James Guilford <james.guilford@intel.com>
Cc: Sean Gulley <sean.m.gulley@intel.com>
Cc: Chandramouli Narayanan <mouli@linux.intel.com>
Signed-off-by: Nathan Huckleberry <nhuck@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 63 ++++---------------------
 1 file changed, 10 insertions(+), 53 deletions(-)

diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index c799838242a69..43852ba6e19c7 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -1,65 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
 /*
- *	Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
- *
- * This is AES128/192/256 CTR mode optimization implementation. It requires
- * the support of Intel(R) AESNI and AVX instructions.
- *
- * This work was inspired by the AES CTR mode optimization published
- * in Intel Optimized IPSEC Cryptograhpic library.
- * Additional information on it can be found at:
- *    http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
+ * AES CTR mode by8 optimization with AVX instructions. (x86_64)
  *
  * Copyright(c) 2014 Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  * Contact Information:
  * James Guilford <james.guilford@intel.com>
  * Sean Gulley <sean.m.gulley@intel.com>
  * Chandramouli Narayanan <mouli@linux.intel.com>
+ */
+/*
+ * This is AES128/192/256 CTR mode optimization implementation. It requires
+ * the support of Intel(R) AESNI and AVX instructions.
  *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * This work was inspired by the AES CTR mode optimization published
+ * in Intel Optimized IPSEC Cryptographic library.
+ * Additional information on it can be found at:
+ *    https://github.com/intel/intel-ipsec-mb
  */
 
 #include <linux/linkage.h>
-- 
GitLab


From 881fc7fba6c3e7d77d608b9a50b01a89d5e0c61b Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Sat, 15 Jan 2022 10:07:14 +0000
Subject: [PATCH 0096/1586] crypto: sun8i-ss - really disable hash on A80

When adding hashes support to sun8i-ss, I have added them only on A83T.
But I forgot that 0 is a valid algorithm ID, so hashes are enabled on A80 but
with an incorrect ID.
Anyway, even with correct IDs, hashes do not work on A80 and I cannot
find why.
So let's disable all of them on A80.

Fixes: d9b45418a917 ("crypto: sun8i-ss - support hash algorithms")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 80e89066dbd1a..319fe3279a716 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -30,6 +30,8 @@
 static const struct ss_variant ss_a80_variant = {
 	.alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES,
 	},
+	.alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP,
+	},
 	.op_mode = { SS_OP_ECB, SS_OP_CBC,
 	},
 	.ss_clks = {
-- 
GitLab


From a88592cc27efd4ed0ceba79016eb4a3ddb90e05e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 Jan 2022 16:31:29 +1100
Subject: [PATCH 0097/1586] crypto: kdf - Select hmac in addition to sha256

In addition to sha256 we must also enable hmac for the kdf self-test
to work.

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 304b4acee2f0 ("crypto: kdf - select SHA-256 required...")
Fixes: 026a733e6659 ("crypto: kdf - add SP800-108 counter key...")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 6dcc77e95caed..fa1741bb568f9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1861,6 +1861,7 @@ config CRYPTO_JITTERENTROPY
 
 config CRYPTO_KDF800108_CTR
 	tristate
+	select CRYPTO_HMAC
 	select CRYPTO_SHA256
 
 config CRYPTO_USER_API
-- 
GitLab


From 163a4e7fa73c3a617217e559eb5a9b58f1878bc7 Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kevin@bracey.fi>
Date: Tue, 18 Jan 2022 12:23:48 +0200
Subject: [PATCH 0098/1586] lib/crc32: remove unneeded casts

Casts were added in commit 8f243af42ade ("sections: fix const sections
for crc32 table") to cope with the tables not being const. They are no
longer required since commit f5e38b9284e1 ("lib: crc32: constify crc32
lookup table").

Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crc32.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/crc32.c b/lib/crc32.c
index 2a68dfd3b96c8..7f062a2639df4 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -194,13 +194,11 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 #else
 u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
+	return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
 }
 u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
+	return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
 EXPORT_SYMBOL(crc32_le);
@@ -339,8 +337,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 #else
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_be_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_be, CRC32_POLY_BE);
+	return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
 }
 #endif
 EXPORT_SYMBOL(crc32_be);
-- 
GitLab


From 5cb29be47d44d6090f1cdc21f439439dc43b471f Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kevin@bracey.fi>
Date: Tue, 18 Jan 2022 12:23:49 +0200
Subject: [PATCH 0099/1586] lib/crc32: Make crc32_be weak for arch override

crc32_le and __crc32c_le can be overridden - extend this to crc32_be.

Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crc32.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/crc32.c b/lib/crc32.c
index 7f062a2639df4..5649847d0a8d0 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -206,6 +206,7 @@ EXPORT_SYMBOL(__crc32c_le);
 
 u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
 u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
 
 /*
  * This multiplies the polynomials x and y modulo the given modulus.
@@ -330,12 +331,12 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 }
 
 #if CRC_BE_BITS == 1
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
 }
 #else
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
 }
-- 
GitLab


From 1b3dce8b8ab30bb9f1401acefa08a47dc46f0813 Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kevin@bracey.fi>
Date: Tue, 18 Jan 2022 12:23:50 +0200
Subject: [PATCH 0100/1586] lib/crc32test: correct printed bytes count

crc32c_le self test had a stray multiply by two inherited from
the crc32_le+crc32_be test loop.

Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crc32test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/crc32test.c b/lib/crc32test.c
index 61ddce2cff777..9b4af79412c4a 100644
--- a/lib/crc32test.c
+++ b/lib/crc32test.c
@@ -675,7 +675,7 @@ static int __init crc32c_test(void)
 
 	/* pre-warm the cache */
 	for (i = 0; i < 100; i++) {
-		bytes += 2*test[i].length;
+		bytes += test[i].length;
 
 		crc ^= __crc32c_le(test[i].crc, test_buf +
 		    test[i].start, test[i].length);
-- 
GitLab


From 5f2f5eaa3e373c3a07a4f3552fe13d9cde5e23e5 Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kevin@bracey.fi>
Date: Tue, 18 Jan 2022 12:23:51 +0200
Subject: [PATCH 0101/1586] arm64: lib: accelerate crc32_be

It makes no sense to leave crc32_be using the generic code while we
only accelerate the little-endian ops.

Even though the big-endian form doesn't fit as smoothly into the arm64,
we can speed it up and avoid hitting the D cache.

Tested on Cortex-A53. Without acceleration:

    crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
    crc32: self tests passed, processed 225944 bytes in 192240 nsec
    crc32c: CRC_LE_BITS = 64
    crc32c: self tests passed, processed 112972 bytes in 21360 nsec

With acceleration:

    crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
    crc32: self tests passed, processed 225944 bytes in 53480 nsec
    crc32c: CRC_LE_BITS = 64
    crc32c: self tests passed, processed 112972 bytes in 21480 nsec

Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Tested-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/lib/crc32.S | 87 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 0f9e10ecda231..8340dccff46ff 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -11,7 +11,44 @@
 
 	.arch		armv8-a+crc
 
-	.macro		__crc32, c
+	.macro		byteorder, reg, be
+	.if		\be
+CPU_LE( rev		\reg, \reg	)
+	.else
+CPU_BE( rev		\reg, \reg	)
+	.endif
+	.endm
+
+	.macro		byteorder16, reg, be
+	.if		\be
+CPU_LE( rev16		\reg, \reg	)
+	.else
+CPU_BE( rev16		\reg, \reg	)
+	.endif
+	.endm
+
+	.macro		bitorder, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	.endif
+	.endm
+
+	.macro		bitorder16, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	lsr		\reg, \reg, #16
+	.endif
+	.endm
+
+	.macro		bitorder8, reg, be
+	.if		\be
+	rbit		\reg, \reg
+	lsr		\reg, \reg, #24
+	.endif
+	.endm
+
+	.macro		__crc32, c, be=0
+	bitorder	w0, \be
 	cmp		x2, #16
 	b.lt		8f			// less than 16 bytes
 
@@ -24,10 +61,14 @@
 	add		x8, x8, x1
 	add		x1, x1, x7
 	ldp		x5, x6, [x8]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
+	byteorder	x3, \be
+	byteorder	x4, \be
+	byteorder	x5, \be
+	byteorder	x6, \be
+	bitorder	x3, \be
+	bitorder	x4, \be
+	bitorder	x5, \be
+	bitorder	x6, \be
 
 	tst		x7, #8
 	crc32\c\()x	w8, w0, x3
@@ -55,33 +96,43 @@ CPU_BE(	rev		x6, x6		)
 32:	ldp		x3, x4, [x1], #32
 	sub		x2, x2, #32
 	ldp		x5, x6, [x1, #-16]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
+	byteorder	x3, \be
+	byteorder	x4, \be
+	byteorder	x5, \be
+	byteorder	x6, \be
+	bitorder	x3, \be
+	bitorder	x4, \be
+	bitorder	x5, \be
+	bitorder	x6, \be
 	crc32\c\()x	w0, w0, x3
 	crc32\c\()x	w0, w0, x4
 	crc32\c\()x	w0, w0, x5
 	crc32\c\()x	w0, w0, x6
 	cbnz		x2, 32b
-0:	ret
+0:	bitorder	w0, \be
+	ret
 
 8:	tbz		x2, #3, 4f
 	ldr		x3, [x1], #8
-CPU_BE(	rev		x3, x3		)
+	byteorder	x3, \be
+	bitorder	x3, \be
 	crc32\c\()x	w0, w0, x3
 4:	tbz		x2, #2, 2f
 	ldr		w3, [x1], #4
-CPU_BE(	rev		w3, w3		)
+	byteorder	w3, \be
+	bitorder	w3, \be
 	crc32\c\()w	w0, w0, w3
 2:	tbz		x2, #1, 1f
 	ldrh		w3, [x1], #2
-CPU_BE(	rev16		w3, w3		)
+	byteorder16	w3, \be
+	bitorder16	w3, \be
 	crc32\c\()h	w0, w0, w3
 1:	tbz		x2, #0, 0f
 	ldrb		w3, [x1]
+	bitorder8	w3, \be
 	crc32\c\()b	w0, w0, w3
-0:	ret
+0:	bitorder	w0, \be
+	ret
 	.endm
 
 	.align		5
@@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32
 alternative_else_nop_endif
 	__crc32		c
 SYM_FUNC_END(__crc32c_le)
+
+	.align		5
+SYM_FUNC_START(crc32_be)
+alternative_if_not ARM64_HAS_CRC32
+	b		crc32_be_base
+alternative_else_nop_endif
+	__crc32		be=1
+SYM_FUNC_END(crc32_be)
-- 
GitLab


From 642a7d49c249f04007e68c124a148847471dd476 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Tue, 18 Jan 2022 10:35:15 +0000
Subject: [PATCH 0102/1586] crypto: qat - fix access to PFVF interrupt
 registers for GEN4

The logic that detects, enables and disables pfvf interrupts was
expecting a single CSR per VF. Instead, the source and mask register are
two registers with a bit per VF.
Due to this, the driver is reading and setting reserved CSRs and not
masking the correct source of interrupts.

Fix the access to the source and mask register for QAT GEN4 devices by
removing the outer loop in adf_gen4_get_vf2pf_sources(),
adf_gen4_enable_vf2pf_interrupts() and
adf_gen4_disable_vf2pf_interrupts() and changing the helper macros
ADF_4XXX_VM2PF_SOU and ADF_4XXX_VM2PF_MSK.

Fixes: a9dc0d966605 ("crypto: qat - add PFVF support to the GEN4 host driver")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Co-developed-by: Siming Wan <siming.wan@intel.com>
Signed-off-by: Siming Wan <siming.wan@intel.com>
Reviewed-by: Xin Zeng <xin.zeng@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Marco Chiappero <marco.chiappero@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 42 ++++---------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
index 8efbedf63bc80..3b3ea849c5e53 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
@@ -9,15 +9,12 @@
 #include "adf_pfvf_pf_proto.h"
 #include "adf_pfvf_utils.h"
 
-#define ADF_4XXX_MAX_NUM_VFS		16
-
 #define ADF_4XXX_PF2VM_OFFSET(i)	(0x40B010 + ((i) * 0x20))
 #define ADF_4XXX_VM2PF_OFFSET(i)	(0x40B014 + ((i) * 0x20))
 
 /* VF2PF interrupt source registers */
-#define ADF_4XXX_VM2PF_SOU(i)		(0x41A180 + ((i) * 4))
-#define ADF_4XXX_VM2PF_MSK(i)		(0x41A1C0 + ((i) * 4))
-#define ADF_4XXX_VM2PF_INT_EN_MSK	BIT(0)
+#define ADF_4XXX_VM2PF_SOU		0x41A180
+#define ADF_4XXX_VM2PF_MSK		0x41A1C0
 
 #define ADF_PFVF_GEN4_MSGTYPE_SHIFT	2
 #define ADF_PFVF_GEN4_MSGTYPE_MASK	0x3F
@@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
 
 static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
 {
-	int i;
 	u32 sou, mask;
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	u32 vf_mask = 0;
 
-	for (i = 0; i < num_csrs; i++) {
-		sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i));
-		mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i));
-		sou &= ~mask;
-		vf_mask |= sou << i;
-	}
+	sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
+	mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
 
-	return vf_mask;
+	return sou &= ~mask;
 }
 
 static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
 					     u32 vf_mask)
 {
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	unsigned long mask = vf_mask;
 	unsigned int val;
-	int i;
-
-	for_each_set_bit(i, &mask, num_csrs) {
-		unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
 
-		val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK;
-		ADF_CSR_WR(pmisc_addr, offset, val);
-	}
+	val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
+	ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
 }
 
 static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
 					      u32 vf_mask)
 {
-	int num_csrs = ADF_4XXX_MAX_NUM_VFS;
-	unsigned long mask = vf_mask;
 	unsigned int val;
-	int i;
-
-	for_each_set_bit(i, &mask, num_csrs) {
-		unsigned int offset = ADF_4XXX_VM2PF_MSK(i);
 
-		val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK;
-		ADF_CSR_WR(pmisc_addr, offset, val);
-	}
+	val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask;
+	ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
 }
 
 static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev,
-- 
GitLab


From 9b30430ea356f237945e52f8a3a42158877bd5a9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:13:02 -0800
Subject: [PATCH 0103/1586] crypto: rsa-pkcs1pad - only allow with rsa

The pkcs1pad template can be instantiated with an arbitrary akcipher
algorithm, which doesn't make sense; it is specifically an RSA padding
scheme.  Make it check that the underlying algorithm really is RSA.

Fixes: 3d5b1ecdea6f ("crypto: rsa - RSA padding algorithm")
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 8ac3e73e8ea65..1b35457814258 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -621,6 +621,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn);
 
+	if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) {
+		err = -EINVAL;
+		goto err_free_inst;
+	}
+
 	err = -ENAMETOOLONG;
 	hash_name = crypto_attr_alg_name(tb[2]);
 	if (IS_ERR(hash_name)) {
-- 
GitLab


From e316f7179be22912281ce6331d96d7c121fb2b17 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:13:03 -0800
Subject: [PATCH 0104/1586] crypto: rsa-pkcs1pad - correctly get hash from
 source scatterlist

Commit c7381b012872 ("crypto: akcipher - new verify API for public key
algorithms") changed akcipher_alg::verify to take in both the signature
and the actual hash and do the signature verification, rather than just
return the hash expected by the signature as was the case before.  To do
this, it implemented a hack where the signature and hash are
concatenated with each other in one scatterlist.

Obviously, for this to work correctly, akcipher_alg::verify needs to
correctly extract the two items from the scatterlist it is given.
Unfortunately, it doesn't correctly extract the hash in the case where
the signature is longer than the RSA key size, as it assumes that the
signature's length is equal to the RSA key size.  This causes a prefix
of the hash, or even the entire hash, to be taken from the *signature*.

(Note, the case of a signature longer than the RSA key size should not
be allowed in the first place; a separate patch will fix that.)

It is unclear whether the resulting scheme has any useful security
properties.

Fix this by correctly extracting the hash from the scatterlist.

Fixes: c7381b012872 ("crypto: akcipher - new verify API for public key algorithms")
Cc: <stable@vger.kernel.org> # v5.2+
Reviewed-by: Vitaly Chikunov <vt@altlinux.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 1b35457814258..7b223adebabf6 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -495,7 +495,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 			   sg_nents_for_len(req->src,
 					    req->src_len + req->dst_len),
 			   req_ctx->out_buf + ctx->key_size,
-			   req->dst_len, ctx->key_size);
+			   req->dst_len, req->src_len);
 	/* Do the actual verification step. */
 	if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos,
 		   req->dst_len) != 0)
-- 
GitLab


From d3481accd974541e6a5d6a1fb588924a3519c36e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:13:04 -0800
Subject: [PATCH 0105/1586] crypto: rsa-pkcs1pad - restore signature length
 check

RSA PKCS#1 v1.5 signatures are required to be the same length as the RSA
key size.  RFC8017 specifically requires the verifier to check this
(https://datatracker.ietf.org/doc/html/rfc8017#section-8.2.2).

Commit a49de377e051 ("crypto: Add hash param to pkcs1pad") changed the
kernel to allow longer signatures, but didn't explain this part of the
change; it seems to be unrelated to the rest of the commit.

Revert this change, since it doesn't appear to be correct.

We can be pretty sure that no one is relying on overly-long signatures
(which would have to be front-padded with zeroes) being supported, given
that they would have been broken since commit c7381b012872
("crypto: akcipher - new verify API for public key algorithms").

Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad")
Cc: <stable@vger.kernel.org> # v4.6+
Cc: Tadeusz Struk <tadeusz.struk@linaro.org>
Suggested-by: Vitaly Chikunov <vt@altlinux.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 7b223adebabf6..6b556ddeb3a00 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -538,7 +538,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 
 	if (WARN_ON(req->dst) ||
 	    WARN_ON(!req->dst_len) ||
-	    !ctx->key_size || req->src_len < ctx->key_size)
+	    !ctx->key_size || req->src_len != ctx->key_size)
 		return -EINVAL;
 
 	req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);
-- 
GitLab


From a24611ea356c7f3f0ec926da11b9482ac1f414fd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:13:05 -0800
Subject: [PATCH 0106/1586] crypto: rsa-pkcs1pad - fix buffer overread in
 pkcs1pad_verify_complete()

Before checking whether the expected digest_info is present, we need to
check that there are enough bytes remaining.

Fixes: a49de377e051 ("crypto: Add hash param to pkcs1pad")
Cc: <stable@vger.kernel.org> # v4.6+
Cc: Tadeusz Struk <tadeusz.struk@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 6b556ddeb3a00..9d804831c8b3f 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 	pos++;
 
 	if (digest_info) {
+		if (digest_info->size > dst_len - pos)
+			goto done;
 		if (crypto_memneq(out_buf + pos, digest_info->data,
 				  digest_info->size))
 			goto done;
-- 
GitLab


From c2a28fdb2f4e0b1676709426d80a986dd601200e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:13:06 -0800
Subject: [PATCH 0107/1586] crypto: rsa-pkcs1pad - use clearer variable names

The new convention for akcipher_alg::verify makes it unclear which
values are the lengths of the signature and digest.  Add local variables
to make it clearer what is going on.

Also rename the digest_size variable in pkcs1pad_sign(), as it is
actually the digest *info* size, not the digest size which is different.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 9d804831c8b3f..3285e3af43e14 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -385,15 +385,15 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
 	const struct rsa_asn1_template *digest_info = ictx->digest_info;
 	int err;
-	unsigned int ps_end, digest_size = 0;
+	unsigned int ps_end, digest_info_size = 0;
 
 	if (!ctx->key_size)
 		return -EINVAL;
 
 	if (digest_info)
-		digest_size = digest_info->size;
+		digest_info_size = digest_info->size;
 
-	if (req->src_len + digest_size > ctx->key_size - 11)
+	if (req->src_len + digest_info_size > ctx->key_size - 11)
 		return -EOVERFLOW;
 
 	if (req->dst_len < ctx->key_size) {
@@ -406,7 +406,7 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	if (!req_ctx->in_buf)
 		return -ENOMEM;
 
-	ps_end = ctx->key_size - digest_size - req->src_len - 2;
+	ps_end = ctx->key_size - digest_info_size - req->src_len - 2;
 	req_ctx->in_buf[0] = 0x01;
 	memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
 	req_ctx->in_buf[ps_end] = 0x00;
@@ -441,6 +441,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 	struct akcipher_instance *inst = akcipher_alg_instance(tfm);
 	struct pkcs1pad_inst_ctx *ictx = akcipher_instance_ctx(inst);
 	const struct rsa_asn1_template *digest_info = ictx->digest_info;
+	const unsigned int sig_size = req->src_len;
+	const unsigned int digest_size = req->dst_len;
 	unsigned int dst_len;
 	unsigned int pos;
 	u8 *out_buf;
@@ -487,20 +489,19 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 
 	err = 0;
 
-	if (req->dst_len != dst_len - pos) {
+	if (digest_size != dst_len - pos) {
 		err = -EKEYREJECTED;
 		req->dst_len = dst_len - pos;
 		goto done;
 	}
 	/* Extract appended digest. */
 	sg_pcopy_to_buffer(req->src,
-			   sg_nents_for_len(req->src,
-					    req->src_len + req->dst_len),
+			   sg_nents_for_len(req->src, sig_size + digest_size),
 			   req_ctx->out_buf + ctx->key_size,
-			   req->dst_len, req->src_len);
+			   digest_size, sig_size);
 	/* Do the actual verification step. */
 	if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos,
-		   req->dst_len) != 0)
+		   digest_size) != 0)
 		err = -EKEYREJECTED;
 done:
 	kfree_sensitive(req_ctx->out_buf);
@@ -536,14 +537,15 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
+	const unsigned int sig_size = req->src_len;
+	const unsigned int digest_size = req->dst_len;
 	int err;
 
-	if (WARN_ON(req->dst) ||
-	    WARN_ON(!req->dst_len) ||
-	    !ctx->key_size || req->src_len != ctx->key_size)
+	if (WARN_ON(req->dst) || WARN_ON(!digest_size) ||
+	    !ctx->key_size || sig_size != ctx->key_size)
 		return -EINVAL;
 
-	req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL);
+	req_ctx->out_buf = kmalloc(ctx->key_size + digest_size, GFP_KERNEL);
 	if (!req_ctx->out_buf)
 		return -ENOMEM;
 
@@ -556,8 +558,7 @@ static int pkcs1pad_verify(struct akcipher_request *req)
 
 	/* Reuse input buffer, output to a new buffer */
 	akcipher_request_set_crypt(&req_ctx->child_req, req->src,
-				   req_ctx->out_sg, req->src_len,
-				   ctx->key_size);
+				   req_ctx->out_sg, sig_size, ctx->key_size);
 
 	err = crypto_akcipher_encrypt(&req_ctx->child_req);
 	if (err != -EINPROGRESS && err != -EBUSY)
-- 
GitLab


From 66eae850333d639fc278d6f915c6fc01499ea893 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 19 Jan 2022 17:58:40 +1100
Subject: [PATCH 0108/1586] crypto: authenc - Fix sleep in atomic context in
 decrypt_tail

The function crypto_authenc_decrypt_tail discards its flags
argument and always relies on the flags from the original request
when starting its sub-request.

This is clearly wrong as it may cause the SLEEPABLE flag to be
set when it shouldn't.

Fixes: 92d95ba91772 ("crypto: authenc - Convert to new AEAD interface")
Reported-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/authenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/authenc.c b/crypto/authenc.c
index 670bf1a01d00e..17f674a7cdff5 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req,
 		dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen);
 
 	skcipher_request_set_tfm(skreq, ctx->enc);
-	skcipher_request_set_callback(skreq, aead_request_flags(req),
+	skcipher_request_set_callback(skreq, flags,
 				      req->base.complete, req->base.data);
 	skcipher_request_set_crypt(skreq, src, dst,
 				   req->cryptlen - authsize, req->iv);
-- 
GitLab


From 1c16dfbe6cd6b6ce04e2c3d0c1fa33d80b2547f9 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 19 Jan 2022 10:31:09 +0100
Subject: [PATCH 0109/1586] crypto: memneq - avoid implicit unaligned accesses

The C standard does not support dereferencing pointers that are not
aligned with respect to the pointed-to type, and doing so is technically
undefined behavior, even if the underlying hardware supports it.

This means that conditionally dereferencing such pointers based on
whether CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y is not the right thing
to do, and actually results in alignment faults on ARM, which are fixed
up on a slow path. Instead, we should use the unaligned accessors in
such cases: on architectures that don't care about alignment, they will
result in identical codegen whereas, e.g., codegen on ARM will avoid
doubleword loads and stores but use ordinary ones, which are able to
tolerate misalignment.

Link: https://lore.kernel.org/linux-crypto/CAHk-=wiKkdYLY0bv+nXrcJz3NH9mAqPAafX7PpW5EwVtxsEu7Q@mail.gmail.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/memneq.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/crypto/memneq.c b/crypto/memneq.c
index afed1bd16aee0..fb11608b1ec1d 100644
--- a/crypto/memneq.c
+++ b/crypto/memneq.c
@@ -60,6 +60,7 @@
  */
 
 #include <crypto/algapi.h>
+#include <asm/unaligned.h>
 
 #ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
 
@@ -71,7 +72,8 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
 
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 	while (size >= sizeof(unsigned long)) {
-		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
+		neq |= get_unaligned((unsigned long *)a) ^
+		       get_unaligned((unsigned long *)b);
 		OPTIMIZER_HIDE_VAR(neq);
 		a += sizeof(unsigned long);
 		b += sizeof(unsigned long);
@@ -95,18 +97,24 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	if (sizeof(unsigned long) == 8) {
-		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
+		neq |= get_unaligned((unsigned long *)a) ^
+		       get_unaligned((unsigned long *)b);
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
+		neq |= get_unaligned((unsigned long *)(a + 8)) ^
+		       get_unaligned((unsigned long *)(b + 8));
 		OPTIMIZER_HIDE_VAR(neq);
 	} else if (sizeof(unsigned int) == 4) {
-		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
+		neq |= get_unaligned((unsigned int *)a) ^
+		       get_unaligned((unsigned int *)b);
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
+		neq |= get_unaligned((unsigned int *)(a + 4)) ^
+		       get_unaligned((unsigned int *)(b + 4));
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
+		neq |= get_unaligned((unsigned int *)(a + 8)) ^
+		       get_unaligned((unsigned int *)(b + 8));
 		OPTIMIZER_HIDE_VAR(neq);
-		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
+		neq |= get_unaligned((unsigned int *)(a + 12)) ^
+		       get_unaligned((unsigned int *)(b + 12));
 		OPTIMIZER_HIDE_VAR(neq);
 	} else
 #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-- 
GitLab


From 85872d1a6f38d133133784c8027d25d1c5328f4f Mon Sep 17 00:00:00 2001
From: Shijith Thotton <sthotton@marvell.com>
Date: Fri, 21 Jan 2022 16:42:32 +0530
Subject: [PATCH 0110/1586] crypto: octeontx2 - select CONFIG_NET_DEVLINK

OcteonTX2 CPT driver will fail to link without devlink support.

aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_egrp_delete':
otx2_cpt_devlink.c:18: undefined reference to `devlink_priv'
aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_egrp_create':
otx2_cpt_devlink.c:9: undefined reference to `devlink_priv'
aarch64-linux-gnu-ld: otx2_cpt_devlink.o: in function `otx2_cpt_dl_uc_info':
otx2_cpt_devlink.c:27: undefined reference to `devlink_priv'

Fixes: fed8f4d5f946 ("crypto: octeontx2 - parameters for custom engine groups")

Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig
index 9125199f1702b..a48591af12d02 100644
--- a/drivers/crypto/marvell/Kconfig
+++ b/drivers/crypto/marvell/Kconfig
@@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT
 	select CRYPTO_SKCIPHER
 	select CRYPTO_HASH
 	select CRYPTO_AEAD
+	select NET_DEVLINK
 	help
 		This driver allows you to utilize the Marvell Cryptographic
 		Accelerator Unit(CPT) found in OcteonTX2 series of processors.
-- 
GitLab


From 60ef3dde0d6c644769e860d10443a95f1ca9a115 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 16:13:08 +0800
Subject: [PATCH 0111/1586] crypto: hisilicon/sec - fixup icv checking enabled
 on Kunpeng 930

Fixup icv(integrity check value) checking enabled wrong on
Kunpeng 930

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 6a45bd23b3635..bf93c98473653 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -1614,7 +1614,7 @@ static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir,
 		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
 		sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
 	} else {
-		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE2);
 		sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
 	}
 	sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen);
-- 
GitLab


From e764d81d58070e66e61fb1b972c81eb9d2ea971e Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 16:13:09 +0800
Subject: [PATCH 0112/1586] crypto: hisilicon/sec - add some comments for soft
 fallback

Modify the print of information that might lead to user misunderstanding.
Currently only XTS mode need the fallback tfm when using 192bit key.
Others algs not need soft fallback tfm. So others algs can return
directly.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index bf93c98473653..48abd3871c0e4 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -641,13 +641,15 @@ static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm)
 	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
 
 	c_ctx->fallback = false;
+
+	/* Currently, only XTS mode need fallback tfm when using 192bit key */
 	if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ)))
 		return 0;
 
 	c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0,
 						  CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(c_ctx->fbtfm)) {
-		pr_err("failed to alloc fallback tfm!\n");
+		pr_err("failed to alloc xts mode fallback tfm!\n");
 		return PTR_ERR(c_ctx->fbtfm);
 	}
 
@@ -808,7 +810,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	}
 
 	memcpy(c_ctx->c_key, key, keylen);
-	if (c_ctx->fallback) {
+	if (c_ctx->fallback && c_ctx->fbtfm) {
 		ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
 		if (ret) {
 			dev_err(dev, "failed to set fallback skcipher key!\n");
@@ -2032,13 +2034,12 @@ static int sec_skcipher_soft_crypto(struct sec_ctx *ctx,
 				    struct skcipher_request *sreq, bool encrypt)
 {
 	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
 	struct device *dev = ctx->dev;
 	int ret;
 
-	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
-
 	if (!c_ctx->fbtfm) {
-		dev_err(dev, "failed to check fallback tfm\n");
+		dev_err_ratelimited(dev, "the soft tfm isn't supported in the current system.\n");
 		return -EINVAL;
 	}
 
@@ -2256,7 +2257,6 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	if (ctx->sec->qm.ver == QM_HW_V2) {
 		if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
 		    req->cryptlen <= authsize))) {
-			dev_err(dev, "Kunpeng920 not support 0 length!\n");
 			ctx->a_ctx.fallback = true;
 			return -EINVAL;
 		}
-- 
GitLab


From 5e340558c5c7f64504ef1f4d31af152f26705261 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 16:13:10 +0800
Subject: [PATCH 0113/1586] crypto: hisilicon/sec - fix the max length of AAD
 for the CCM mode

Fix the maximum length of AAD for the CCM mode due to the hardware limited.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 48abd3871c0e4..8305eee9a3d34 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -63,6 +63,7 @@
 #define SEC_AUTH_CIPHER		0x1
 #define SEC_MAX_MAC_LEN		64
 #define SEC_MAX_AAD_LEN		65535
+#define SEC_MAX_CCM_AAD_LEN	65279
 #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
 
 #define SEC_PBUF_SZ			512
@@ -2220,6 +2221,10 @@ static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	}
 
 	if (c_mode == SEC_CMODE_CCM) {
+		if (unlikely(req->assoclen > SEC_MAX_CCM_AAD_LEN)) {
+			dev_err_ratelimited(dev, "CCM input aad parameter is too long!\n");
+			return -EINVAL;
+		}
 		ret = aead_iv_demension_check(req);
 		if (ret) {
 			dev_err(dev, "aead input iv param error!\n");
-- 
GitLab


From 566f060f5453e906dd4195c7e94f1fd25d2c99bb Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 16:13:11 +0800
Subject: [PATCH 0114/1586] crypto: hisilicon/sec - fix the CTR mode BD
 configuration

The CTR counter is 32bit rollover default on the BD.
But the NIST standard is 128bit rollover. it cause the
testing failed, so need to fix the BD configuration.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 6 ++++++
 drivers/crypto/hisilicon/sec2/sec_crypto.h | 6 ++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 8305eee9a3d34..7013272134b2b 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -42,6 +42,8 @@
 #define SEC_DE_OFFSET_V3		9
 #define SEC_SCENE_OFFSET_V3	5
 #define SEC_CKEY_OFFSET_V3	13
+#define SEC_CTR_CNT_OFFSET	25
+#define SEC_CTR_CNT_ROLLOVER	2
 #define SEC_SRC_SGL_OFFSET_V3	11
 #define SEC_DST_SGL_OFFSET_V3	14
 #define SEC_CALG_OFFSET_V3	4
@@ -1303,6 +1305,10 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 		cipher = SEC_CIPHER_DEC;
 	sec_sqe3->c_icv_key |= cpu_to_le16(cipher);
 
+	/* Set the CTR counter mode is 128bit rollover */
+	sec_sqe3->auth_mac_key = cpu_to_le32((u32)SEC_CTR_CNT_ROLLOVER <<
+					SEC_CTR_CNT_OFFSET);
+
 	if (req->use_pbuf) {
 		bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3;
 		bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 9f71c358a6d35..5e039b50e9d4c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -354,8 +354,10 @@ struct sec_sqe3 {
 	 * akey_len: 9~14 bits
 	 * a_alg: 15~20 bits
 	 * key_sel: 21~24 bits
-	 * updata_key: 25 bits
-	 * reserved: 26~31 bits
+	 * ctr_count_mode/sm4_xts: 25~26 bits
+	 * sva_prefetch: 27 bits
+	 * key_wrap_num: 28~30 bits
+	 * update_key: 31 bits
 	 */
 	__le32 auth_mac_key;
 	__le32 salt;
-- 
GitLab


From 498382593c7c90eb81111d315eeecba9508ddf58 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 16:13:12 +0800
Subject: [PATCH 0115/1586] crypto: hisilicon/sec - use the correct print
 format

Use the correct print format. Printing an unsigned int value should
use %u instead of %d.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 7013272134b2b..8caba9fd1f19c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -240,7 +240,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 
 	if (unlikely(type != type_supported)) {
 		atomic64_inc(&dfx->err_bd_cnt);
-		pr_err("err bd type [%d]\n", type);
+		pr_err("err bd type [%u]\n", type);
 		return;
 	}
 
-- 
GitLab


From 05b3bade290d6c940701f97f3233c07cfe27205d Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Jan 2022 17:30:42 +0800
Subject: [PATCH 0116/1586] crypto: hisilicon/qm - cleanup warning in
 qm_vf_read_qos

The kernel test rebot report this warning: Uninitialized variable: ret.
The code flow may return value of ret directly. This value is an
uninitialized variable, here is fix it.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index c5b84a5ea3501..3b29c8993b8c7 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
 static int qm_vf_read_qos(struct hisi_qm *qm)
 {
 	int cnt = 0;
-	int ret;
+	int ret = -EINVAL;
 
 	/* reset mailbox qos val */
 	qm->mb_qos = 0;
-- 
GitLab


From 28e9b6d8199a3f124682b143800c2dacdc3d70dd Mon Sep 17 00:00:00 2001
From: Tomas Paukrt <tomaspaukrt@email.cz>
Date: Sat, 22 Jan 2022 18:07:53 +0100
Subject: [PATCH 0117/1586] crypto: mxs-dcp - Fix scatterlist processing

This patch fixes a bug in scatterlist processing that may cause incorrect AES block encryption/decryption.

Fixes: 2e6d793e1bf0 ("crypto: mxs-dcp - Use sg_mapping_iter to copy data")
Signed-off-by: Tomas Paukrt <tomaspaukrt@email.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/mxs-dcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index d19e5ffb5104b..d6f9e2fe863d7 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
 		memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
 	}
 
-	for_each_sg(req->src, src, sg_nents(src), i) {
+	for_each_sg(req->src, src, sg_nents(req->src), i) {
 		src_buf = sg_virt(src);
 		len = sg_dma_len(src);
 		tlen += len;
-- 
GitLab


From 1c4cafd11599abdbc53a520f0b6e6799d037eae1 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sun, 23 Jan 2022 10:38:52 -0800
Subject: [PATCH 0118/1586] padata: replace cpumask_weight with cpumask_empty
 in padata.c

padata_do_parallel() calls cpumask_weight() to check if any bit of a
given cpumask is set. We can do it more efficiently with cpumask_empty()
because cpumask_empty() stops traversing the cpumask as soon as it finds
first set bit, while cpumask_weight() counts all bits unconditionally.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 18d3a5c699d84..e5819bb8bd1dc 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -181,7 +181,7 @@ int padata_do_parallel(struct padata_shell *ps,
 		goto out;
 
 	if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
-		if (!cpumask_weight(pd->cpumask.cbcpu))
+		if (cpumask_empty(pd->cpumask.cbcpu))
 			goto out;
 
 		/* Select an alternate fallback CPU and notify the caller. */
-- 
GitLab


From 31455bbda2081af83f72bb4636348b12b82c37c1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 25 Jan 2022 01:58:36 +0100
Subject: [PATCH 0119/1586] spi: pxa2xx_spi: Convert to use GPIO descriptors

This converts the PXA2xx SPI driver to use GPIO descriptors
exclusively to retrieve GPIO chip select lines.

The device tree and ACPI paths of the driver already use
descriptors, hence ->use_gpio_descriptors is already set and
this codepath is well tested.

Convert all the PXA boards providing chip select GPIOs as
platform data and drop the old GPIO chipselect handling in
favor of the core managing it exclusively.

Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220125005836.494807-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/spi/pxa2xx.rst   |  3 --
 arch/arm/mach-pxa/corgi.c      | 26 ++++++--------
 arch/arm/mach-pxa/hx4700.c     | 10 +++++-
 arch/arm/mach-pxa/icontrol.c   | 26 +++++++++++---
 arch/arm/mach-pxa/littleton.c  | 10 +++++-
 arch/arm/mach-pxa/magician.c   | 12 +++++--
 arch/arm/mach-pxa/poodle.c     | 14 +++++---
 arch/arm/mach-pxa/spitz.c      | 26 ++++++--------
 arch/arm/mach-pxa/stargate2.c  | 20 +++++++++--
 arch/arm/mach-pxa/z2.c         | 20 +++++++++--
 drivers/spi/spi-pxa2xx.c       | 63 +---------------------------------
 include/linux/spi/pxa2xx_spi.h |  1 -
 12 files changed, 117 insertions(+), 114 deletions(-)

diff --git a/Documentation/spi/pxa2xx.rst b/Documentation/spi/pxa2xx.rst
index 6347580826bef..716f65d87d048 100644
--- a/Documentation/spi/pxa2xx.rst
+++ b/Documentation/spi/pxa2xx.rst
@@ -101,7 +101,6 @@ device. All fields are optional.
 	u8 rx_threshold;
 	u8 dma_burst_size;
 	u32 timeout;
-	int gpio_cs;
   };
 
 The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
@@ -146,7 +145,6 @@ field. Below is a sample configuration using the PXA255 NSSP.
 	.rx_threshold = 8, /* SSP hardward FIFO threshold */
 	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
 	.timeout = 235, /* See Intel documentation */
-	.gpio_cs = 2, /* Use external chip select */
   };
 
   static struct pxa2xx_spi_chip cs8405a_chip_info = {
@@ -154,7 +152,6 @@ field. Below is a sample configuration using the PXA255 NSSP.
 	.rx_threshold = 8, /* SSP hardward FIFO threshold */
 	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
 	.timeout = 235, /* See Intel documentation */
-	.gpio_cs = 3, /* Use external chip select */
   };
 
   static struct spi_board_info streetracer_spi_board_info[] __initdata = {
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 593c7f793da53..44659fbc37bab 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -530,6 +530,16 @@ static struct pxa2xx_spi_controller corgi_spi_info = {
 	.num_chipselect	= 3,
 };
 
+static struct gpiod_lookup_table corgi_spi_gpio_table = {
+	.dev_id = "pxa2xx-spi.1",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static void corgi_wait_for_hsync(void)
 {
 	while (gpio_get_value(CORGI_GPIO_HSYNC))
@@ -548,10 +558,6 @@ static struct ads7846_platform_data corgi_ads7846_info = {
 	.wait_for_sync		= corgi_wait_for_hsync,
 };
 
-static struct pxa2xx_spi_chip corgi_ads7846_chip = {
-	.gpio_cs	= CORGI_GPIO_ADS7846_CS,
-};
-
 static void corgi_bl_kick_battery(void)
 {
 	void (*kick_batt)(void);
@@ -580,14 +586,6 @@ static struct corgi_lcd_platform_data corgi_lcdcon_info = {
 	.kick_battery		= corgi_bl_kick_battery,
 };
 
-static struct pxa2xx_spi_chip corgi_lcdcon_chip = {
-	.gpio_cs	= CORGI_GPIO_LCDCON_CS,
-};
-
-static struct pxa2xx_spi_chip corgi_max1111_chip = {
-	.gpio_cs	= CORGI_GPIO_MAX1111_CS,
-};
-
 static struct spi_board_info corgi_spi_devices[] = {
 	{
 		.modalias	= "ads7846",
@@ -595,7 +593,6 @@ static struct spi_board_info corgi_spi_devices[] = {
 		.bus_num	= 1,
 		.chip_select	= 0,
 		.platform_data	= &corgi_ads7846_info,
-		.controller_data= &corgi_ads7846_chip,
 		.irq		= PXA_GPIO_TO_IRQ(CORGI_GPIO_TP_INT),
 	}, {
 		.modalias	= "corgi-lcd",
@@ -603,18 +600,17 @@ static struct spi_board_info corgi_spi_devices[] = {
 		.bus_num	= 1,
 		.chip_select	= 1,
 		.platform_data	= &corgi_lcdcon_info,
-		.controller_data= &corgi_lcdcon_chip,
 	}, {
 		.modalias	= "max1111",
 		.max_speed_hz	= 450000,
 		.bus_num	= 1,
 		.chip_select	= 2,
-		.controller_data= &corgi_max1111_chip,
 	},
 };
 
 static void __init corgi_init_spi(void)
 {
+	gpiod_add_lookup_table(&corgi_spi_gpio_table);
 	pxa2xx_set_spi_info(1, &corgi_spi_info);
 	gpiod_add_lookup_table(&corgi_lcdcon_gpio_table);
 	spi_register_board_info(ARRAY_AND_SIZE(corgi_spi_devices));
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 1d4c5db54be29..e1870fbb19e7e 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -616,7 +616,6 @@ static struct pxa2xx_spi_chip tsc2046_chip = {
 	.tx_threshold = 1,
 	.rx_threshold = 2,
 	.timeout      = 64,
-	.gpio_cs      = GPIO88_HX4700_TSC2046_CS,
 };
 
 static struct spi_board_info tsc2046_board_info[] __initdata = {
@@ -635,6 +634,14 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 	.enable_dma     = 1,
 };
 
+static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+	.dev_id = "pxa2xx-spi.2",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 /*
  * External power
  */
@@ -896,6 +903,7 @@ static void __init hx4700_init(void)
 	pxa_set_i2c_info(NULL);
 	i2c_register_board_info(0, ARRAY_AND_SIZE(i2c_board_info));
 	i2c_register_board_info(1, ARRAY_AND_SIZE(pi2c_board_info));
+	gpiod_add_lookup_table(&pxa_ssp2_gpio_table);
 	pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
 	spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info));
 
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 04a12523cdee0..753fe166ab681 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -13,7 +13,7 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -42,7 +42,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info1 = {
 	.rx_threshold   = 128,
 	.dma_burst_size = 8,
 	.timeout        = 235,
-	.gpio_cs        = ICONTROL_MCP251x_nCS1
 };
 
 static struct pxa2xx_spi_chip mcp251x_chip_info2 = {
@@ -50,7 +49,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info2 = {
 	.rx_threshold   = 128,
 	.dma_burst_size = 8,
 	.timeout        = 235,
-	.gpio_cs        = ICONTROL_MCP251x_nCS2
 };
 
 static struct pxa2xx_spi_chip mcp251x_chip_info3 = {
@@ -58,7 +56,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info3 = {
 	.rx_threshold   = 128,
 	.dma_burst_size = 8,
 	.timeout        = 235,
-	.gpio_cs        = ICONTROL_MCP251x_nCS3
 };
 
 static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
@@ -66,7 +63,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
 	.rx_threshold   = 128,
 	.dma_burst_size = 8,
 	.timeout        = 235,
-	.gpio_cs        = ICONTROL_MCP251x_nCS4
 };
 
 static const struct property_entry mcp251x_properties[] = {
@@ -143,6 +139,24 @@ struct platform_device pxa_spi_ssp4 = {
 	}
 };
 
+static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
+	.dev_id = "pxa2xx-spi.3",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
+	.dev_id = "pxa2xx-spi.4",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static struct platform_device *icontrol_spi_devices[] __initdata = {
 	&pxa_spi_ssp3,
 	&pxa_spi_ssp4,
@@ -175,6 +189,8 @@ static mfp_cfg_t mfp_can_cfg[] __initdata = {
 static void __init icontrol_can_init(void)
 {
 	pxa3xx_mfp_config(ARRAY_AND_SIZE(mfp_can_cfg));
+	gpiod_add_lookup_table(&pxa_ssp3_gpio_table);
+	gpiod_add_lookup_table(&pxa_ssp4_gpio_table);
 	platform_add_devices(ARRAY_AND_SIZE(icontrol_spi_devices));
 	spi_register_board_info(ARRAY_AND_SIZE(mcp251x_board_info));
 }
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 793f61375ee8c..73f5953b3bb6b 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -195,7 +195,6 @@ static struct pxa2xx_spi_controller littleton_spi_info = {
 static struct pxa2xx_spi_chip littleton_tdo24m_chip = {
 	.rx_threshold	= 1,
 	.tx_threshold	= 1,
-	.gpio_cs	= LITTLETON_GPIO_LCD_CS,
 };
 
 static struct spi_board_info littleton_spi_devices[] __initdata = {
@@ -208,8 +207,17 @@ static struct spi_board_info littleton_spi_devices[] __initdata = {
 	},
 };
 
+static struct gpiod_lookup_table littleton_spi_gpio_table = {
+	.dev_id = "pxa2xx-spi.2",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static void __init littleton_init_spi(void)
 {
+	gpiod_add_lookup_table(&littleton_spi_gpio_table);
 	pxa2xx_set_spi_info(2, &littleton_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(littleton_spi_devices));
 }
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index cd9fa465b9b2a..200fd35168e05 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -938,8 +938,6 @@ struct pxa2xx_spi_chip tsc2046_chip_info = {
 	.tx_threshold	= 1,
 	.rx_threshold	= 2,
 	.timeout	= 64,
-	/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
-	.gpio_cs	= GPIO14_MAGICIAN_TSC2046_CS,
 };
 
 static struct pxa2xx_spi_controller magician_spi_info = {
@@ -947,6 +945,15 @@ static struct pxa2xx_spi_controller magician_spi_info = {
 	.enable_dma	= 1,
 };
 
+static struct gpiod_lookup_table magician_spi_gpio_table = {
+	.dev_id = "pxa2xx-spi.2",
+	.table = {
+		/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
+		GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static struct spi_board_info ads7846_spi_board_info[] __initdata = {
 	{
 		.modalias		= "ads7846",
@@ -1031,6 +1038,7 @@ static void __init magician_init(void)
 	} else
 		pr_err("LCD detection: CPLD mapping failed\n");
 
+	gpiod_add_lookup_table(&magician_spi_gpio_table);
 	pxa2xx_set_spi_info(2, &magician_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(ads7846_spi_board_info));
 
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 3a4ecc3c8f8b6..58cfa434afdeb 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -197,6 +197,14 @@ static struct pxa2xx_spi_controller poodle_spi_info = {
 	.num_chipselect	= 1,
 };
 
+static struct gpiod_lookup_table poodle_spi_gpio_table = {
+	.dev_id = "pxa2xx-spi.1",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", POODLE_GPIO_TP_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static struct ads7846_platform_data poodle_ads7846_info = {
 	.model			= 7846,
 	.vref_delay_usecs	= 100,
@@ -205,23 +213,19 @@ static struct ads7846_platform_data poodle_ads7846_info = {
 	.gpio_pendown		= POODLE_GPIO_TP_INT,
 };
 
-static struct pxa2xx_spi_chip poodle_ads7846_chip = {
-	.gpio_cs		= POODLE_GPIO_TP_CS,
-};
-
 static struct spi_board_info poodle_spi_devices[] = {
 	{
 		.modalias	= "ads7846",
 		.max_speed_hz	= 10000,
 		.bus_num	= 1,
 		.platform_data	= &poodle_ads7846_info,
-		.controller_data= &poodle_ads7846_chip,
 		.irq		= PXA_GPIO_TO_IRQ(POODLE_GPIO_TP_INT),
 	},
 };
 
 static void __init poodle_init_spi(void)
 {
+	gpiod_add_lookup_table(&poodle_spi_gpio_table);
 	pxa2xx_set_spi_info(1, &poodle_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(poodle_spi_devices));
 }
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 371008e9bb029..a648e7094e84e 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -510,10 +510,6 @@ static struct ads7846_platform_data spitz_ads7846_info = {
 	.wait_for_sync		= spitz_ads7846_wait_for_hsync,
 };
 
-static struct pxa2xx_spi_chip spitz_ads7846_chip = {
-	.gpio_cs		= SPITZ_GPIO_ADS7846_CS,
-};
-
 static void spitz_bl_kick_battery(void)
 {
 	void (*kick_batt)(void);
@@ -555,14 +551,6 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = {
 	.kick_battery		= spitz_bl_kick_battery,
 };
 
-static struct pxa2xx_spi_chip spitz_lcdcon_chip = {
-	.gpio_cs	= SPITZ_GPIO_LCDCON_CS,
-};
-
-static struct pxa2xx_spi_chip spitz_max1111_chip = {
-	.gpio_cs	= SPITZ_GPIO_MAX1111_CS,
-};
-
 static struct spi_board_info spitz_spi_devices[] = {
 	{
 		.modalias		= "ads7846",
@@ -570,7 +558,6 @@ static struct spi_board_info spitz_spi_devices[] = {
 		.bus_num		= 2,
 		.chip_select		= 0,
 		.platform_data		= &spitz_ads7846_info,
-		.controller_data	= &spitz_ads7846_chip,
 		.irq			= PXA_GPIO_TO_IRQ(SPITZ_GPIO_TP_INT),
 	}, {
 		.modalias		= "corgi-lcd",
@@ -578,13 +565,11 @@ static struct spi_board_info spitz_spi_devices[] = {
 		.bus_num		= 2,
 		.chip_select		= 1,
 		.platform_data		= &spitz_lcdcon_info,
-		.controller_data	= &spitz_lcdcon_chip,
 	}, {
 		.modalias		= "max1111",
 		.max_speed_hz		= 450000,
 		.bus_num		= 2,
 		.chip_select		= 2,
-		.controller_data	= &spitz_max1111_chip,
 	},
 };
 
@@ -592,6 +577,16 @@ static struct pxa2xx_spi_controller spitz_spi_info = {
 	.num_chipselect	= 3,
 };
 
+static struct gpiod_lookup_table spitz_spi_gpio_table = {
+	.dev_id = "pxa2xx-spi.2",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static void __init spitz_spi_init(void)
 {
 	if (machine_is_akita())
@@ -599,6 +594,7 @@ static void __init spitz_spi_init(void)
 	else
 		gpiod_add_lookup_table(&spitz_lcdcon_gpio_table);
 
+	gpiod_add_lookup_table(&spitz_spi_gpio_table);
 	pxa2xx_set_spi_info(2, &spitz_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices));
 }
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 8ca02ec1d44ce..b43e2f4536a5a 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -346,6 +346,22 @@ static struct pxa2xx_spi_controller pxa_ssp_master_2_info = {
 	.num_chipselect = 1,
 };
 
+static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
+	.dev_id = "pxa2xx-spi.1",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", 24, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
+	.dev_id = "pxa2xx-spi.3",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", 39, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 /* An upcoming kernel change will scrap SFRM usage so these
  * drivers have been moved to use GPIOs */
 static struct pxa2xx_spi_chip staccel_chip_info = {
@@ -353,7 +369,6 @@ static struct pxa2xx_spi_chip staccel_chip_info = {
 	.rx_threshold = 8,
 	.dma_burst_size = 8,
 	.timeout = 235,
-	.gpio_cs = 24,
 };
 
 static struct pxa2xx_spi_chip cc2420_info = {
@@ -361,7 +376,6 @@ static struct pxa2xx_spi_chip cc2420_info = {
 	.rx_threshold = 8,
 	.dma_burst_size = 8,
 	.timeout = 235,
-	.gpio_cs = 39,
 };
 
 static struct spi_board_info spi_board_info[] __initdata = {
@@ -410,6 +424,8 @@ static void __init imote2_stargate2_init(void)
 	pxa_set_btuart_info(NULL);
 	pxa_set_stuart_info(NULL);
 
+	gpiod_add_lookup_table(&pxa_ssp1_gpio_table);
+	gpiod_add_lookup_table(&pxa_ssp3_gpio_table);
 	pxa2xx_set_spi_info(1, &pxa_ssp_master_0_info);
 	pxa2xx_set_spi_info(2, &pxa_ssp_master_1_info);
 	pxa2xx_set_spi_info(3, &pxa_ssp_master_2_info);
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index 8e74fbb0a96e2..7eaeda2699270 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -570,7 +570,6 @@ static struct pxa2xx_spi_chip z2_lbs_chip_info = {
 	.rx_threshold	= 8,
 	.tx_threshold	= 8,
 	.timeout	= 1000,
-	.gpio_cs	= GPIO24_ZIPITZ2_WIFI_CS,
 };
 
 static struct libertas_spi_platform_data z2_lbs_pdata = {
@@ -584,7 +583,6 @@ static struct pxa2xx_spi_chip lms283_chip_info = {
 	.rx_threshold	= 1,
 	.tx_threshold	= 1,
 	.timeout	= 64,
-	.gpio_cs	= GPIO88_ZIPITZ2_LCD_CS,
 };
 
 static struct gpiod_lookup_table lms283_gpio_table = {
@@ -624,8 +622,26 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 	.num_chipselect	= 1,
 };
 
+static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
+	.dev_id = "pxa2xx-spi.1",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+	.dev_id = "pxa2xx-spi.2",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static void __init z2_spi_init(void)
 {
+	gpiod_add_lookup_table(&pxa_ssp1_gpio_table);
+	gpiod_add_lookup_table(&pxa_ssp2_gpio_table);
 	pxa2xx_set_spi_info(1, &pxa_ssp1_master_info);
 	pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
 	gpiod_add_lookup_table(&lms283_gpio_table);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index e88f86274eeb0..abb9f0ffd3773 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -13,7 +13,6 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio/consumer.h>
-#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
@@ -1163,57 +1162,6 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
 	return 0;
 }
 
-static void cleanup_cs(struct spi_device *spi)
-{
-	if (!gpio_is_valid(spi->cs_gpio))
-		return;
-
-	gpio_free(spi->cs_gpio);
-	spi->cs_gpio = -ENOENT;
-}
-
-static int setup_cs(struct spi_device *spi, struct chip_data *chip,
-		    struct pxa2xx_spi_chip *chip_info)
-{
-	struct driver_data *drv_data = spi_controller_get_devdata(spi->controller);
-
-	if (chip == NULL)
-		return 0;
-
-	if (chip_info == NULL)
-		return 0;
-
-	if (drv_data->ssp_type == CE4100_SSP)
-		return 0;
-
-	/*
-	 * NOTE: setup() can be called multiple times, possibly with
-	 * different chip_info, release previously requested GPIO.
-	 */
-	cleanup_cs(spi);
-
-	if (gpio_is_valid(chip_info->gpio_cs)) {
-		int gpio = chip_info->gpio_cs;
-		int err;
-
-		err = gpio_request(gpio, "SPI_CS");
-		if (err) {
-			dev_err(&spi->dev, "failed to request chip select GPIO%d\n", gpio);
-			return err;
-		}
-
-		err = gpio_direction_output(gpio, !(spi->mode & SPI_CS_HIGH));
-		if (err) {
-			gpio_free(gpio);
-			return err;
-		}
-
-		spi->cs_gpio = gpio;
-	}
-
-	return 0;
-}
-
 static int setup(struct spi_device *spi)
 {
 	struct pxa2xx_spi_chip *chip_info;
@@ -1222,7 +1170,6 @@ static int setup(struct spi_device *spi)
 	struct driver_data *drv_data =
 		spi_controller_get_devdata(spi->controller);
 	uint tx_thres, tx_hi_thres, rx_thres;
-	int err;
 
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
@@ -1365,21 +1312,13 @@ static int setup(struct spi_device *spi)
 
 	spi_set_ctldata(spi, chip);
 
-	if (drv_data->ssp_type == CE4100_SSP)
-		return 0;
-
-	err = setup_cs(spi, chip, chip_info);
-	if (err)
-		kfree(chip);
-
-	return err;
+	return 0;
 }
 
 static void cleanup(struct spi_device *spi)
 {
 	struct chip_data *chip = spi_get_ctldata(spi);
 
-	cleanup_cs(spi);
 	kfree(chip);
 }
 
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index ca74dce367065..4658e7801b42e 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -42,7 +42,6 @@ struct pxa2xx_spi_chip {
 	u8 rx_threshold;
 	u8 dma_burst_size;
 	u32 timeout;
-	int gpio_cs;
 };
 
 #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
-- 
GitLab


From 1a5a87d541b442293dfcc2253b652bc7b7e02d09 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 22 Jan 2022 01:33:02 +0100
Subject: [PATCH 0120/1586] spi: mt65xx: Convert to GPIO descriptors

The MT65xx driver was already relying on the core to get some
GPIO line numbers so it can be (hopefully) trivially converted
to use descriptors instead.

Cc: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Cc: Mason Zhang <Mason.Zhang@mediatek.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Peter Hess <peter.hess@ph-home.de>
Cc: Leilk Liu <leilk.liu@mediatek.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220122003302.374304-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index a15de10ee286a..4f49b2e93ca7b 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -12,7 +12,7 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/spi-mt65xx.h>
 #include <linux/pm_runtime.h>
@@ -605,8 +605,9 @@ static int mtk_spi_setup(struct spi_device *spi)
 	if (!spi->controller_data)
 		spi->controller_data = (void *)&mtk_default_chip_info;
 
-	if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio))
-		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
+	if (mdata->dev_comp->need_pad_sel && spi->cs_gpiod)
+		/* CS de-asserted, gpiolib will handle inversion */
+		gpiod_direction_output(spi->cs_gpiod, 0);
 
 	return 0;
 }
@@ -730,6 +731,7 @@ static int mtk_spi_probe(struct platform_device *pdev)
 	master->can_dma = mtk_spi_can_dma;
 	master->setup = mtk_spi_setup;
 	master->set_cs_timing = mtk_spi_set_hw_cs_timing;
+	master->use_gpio_descriptors = true;
 
 	of_id = of_match_node(mtk_spi_of_match, pdev->dev.of_node);
 	if (!of_id) {
@@ -853,25 +855,12 @@ static int mtk_spi_probe(struct platform_device *pdev)
 			goto err_disable_runtime_pm;
 		}
 
-		if (!master->cs_gpios && master->num_chipselect > 1) {
+		if (!master->cs_gpiods && master->num_chipselect > 1) {
 			dev_err(&pdev->dev,
 				"cs_gpios not specified and num_chipselect > 1\n");
 			ret = -EINVAL;
 			goto err_disable_runtime_pm;
 		}
-
-		if (master->cs_gpios) {
-			for (i = 0; i < master->num_chipselect; i++) {
-				ret = devm_gpio_request(&pdev->dev,
-							master->cs_gpios[i],
-							dev_name(&pdev->dev));
-				if (ret) {
-					dev_err(&pdev->dev,
-						"can't get CS GPIO %i\n", i);
-					goto err_disable_runtime_pm;
-				}
-			}
-		}
 	}
 
 	if (mdata->dev_comp->dma_ext)
-- 
GitLab


From 2818824ced4be5abc22c450340d548702f166d9a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 20 Jan 2022 01:26:00 +0100
Subject: [PATCH 0121/1586] spi: mpc512x-psc: Convert to use GPIO descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver is already relying on the core to provide
valid GPIO numbers in spi->cs_gpio through
of_spi_get_gpio_numbers(), so we can switch to letting
the core use GPIO descriptors instead.

The driver was assigning a local function to the custom
chipselect callback, but I chose to just open code the
gpiod setting instead, this is easier to read.

The only platform that overrides the cs_control callback
is the mpc832x_rdb.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Anatolij Gustschin <agust@denx.de>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220120002600.216667-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc512x-psc.c | 46 ++++++++++++++---------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 78a9bca8cc689..8a488d8e4c1be 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -23,7 +23,6 @@
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
-#include <linux/gpio.h>
 #include <asm/mpc52xx_psc.h>
 
 enum {
@@ -128,17 +127,28 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi)
 	out_be32(psc_addr(mps, ccr), ccr);
 	mps->bits_per_word = cs->bits_per_word;
 
-	if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
-		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+	if (cs->gpiod) {
+		if (mps->cs_control)
+			/* boardfile override */
+			mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+		else
+			/* gpiolib will deal with the inversion */
+			gpiod_set_value(spi->cs_gpiod, 1);
+	}
 }
 
 static void mpc512x_psc_spi_deactivate_cs(struct spi_device *spi)
 {
 	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
 
-	if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
-		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
-
+	if (spi->cs_gpiod) {
+		if (mps->cs_control)
+			/* boardfile override */
+			mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+		else
+			/* gpiolib will deal with the inversion */
+			gpiod_set_value(spi->cs_gpiod, 0);
+	}
 }
 
 /* extract and scale size field in txsz or rxsz */
@@ -373,18 +383,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
 		if (!cs)
 			return -ENOMEM;
 
-		if (gpio_is_valid(spi->cs_gpio)) {
-			ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev));
-			if (ret) {
-				dev_err(&spi->dev, "can't get CS gpio: %d\n",
-					ret);
-				kfree(cs);
-				return ret;
-			}
-			gpio_direction_output(spi->cs_gpio,
-					spi->mode & SPI_CS_HIGH ? 0 : 1);
-		}
-
 		spi->controller_state = cs;
 	}
 
@@ -396,8 +394,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
 
 static void mpc512x_psc_spi_cleanup(struct spi_device *spi)
 {
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
 	kfree(spi->controller_state);
 }
 
@@ -476,11 +472,6 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
-static void mpc512x_spi_cs_control(struct spi_device *spi, bool onoff)
-{
-	gpio_set_value(spi->cs_gpio, onoff);
-}
-
 static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 					      u32 size, unsigned int irq)
 {
@@ -500,9 +491,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	mps->type = (int)of_device_get_match_data(dev);
 	mps->irq = irq;
 
-	if (pdata == NULL) {
-		mps->cs_control = mpc512x_spi_cs_control;
-	} else {
+	if (pdata) {
 		mps->cs_control = pdata->cs_control;
 		master->bus_num = pdata->bus_num;
 		master->num_chipselect = pdata->max_chipselect;
@@ -513,6 +502,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	master->prepare_transfer_hardware = mpc512x_psc_spi_prep_xfer_hw;
 	master->transfer_one_message = mpc512x_psc_spi_msg_xfer;
 	master->unprepare_transfer_hardware = mpc512x_psc_spi_unprep_xfer_hw;
+	master->use_gpio_descriptors = true;
 	master->cleanup = mpc512x_psc_spi_cleanup;
 	master->dev.of_node = dev->of_node;
 
-- 
GitLab


From 99407f11b5657cd66625c6b55a73d38b67803a8c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 22 Jan 2022 01:48:46 +0100
Subject: [PATCH 0122/1586] spi: pic32: Convert to use GPIO descriptors

The driver already relies on the core looking up GPIO lines
from the core, so this is trivial to switch over to using
GPIO descriptors.

Cc: Purna Chandra Mandal <purna.mandal@microchip.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220122004846.374930-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pic32.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index f86433b29260e..7e5c09a7d4890 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -591,18 +591,16 @@ static int pic32_spi_setup(struct spi_device *spi)
 	 * unreliable/erroneous SPI transactions.
 	 * To avoid that we will always handle /CS by toggling GPIO.
 	 */
-	if (!gpio_is_valid(spi->cs_gpio))
+	if (!spi->cs_gpiod)
 		return -EINVAL;
 
-	gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-
 	return 0;
 }
 
 static void pic32_spi_cleanup(struct spi_device *spi)
 {
-	/* de-activate cs-gpio */
-	gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
+	/* de-activate cs-gpio, gpiolib will handle inversion */
+	gpiod_direction_output(spi->cs_gpiod, 0);
 }
 
 static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
@@ -784,6 +782,7 @@ static int pic32_spi_probe(struct platform_device *pdev)
 	master->unprepare_message	= pic32_spi_unprepare_message;
 	master->prepare_transfer_hardware	= pic32_spi_prepare_hardware;
 	master->unprepare_transfer_hardware	= pic32_spi_unprepare_hardware;
+	master->use_gpio_descriptors = true;
 
 	/* optional DMA support */
 	ret = pic32_spi_dma_prep(pic32s, &pdev->dev);
-- 
GitLab


From 6938e02f8658734209fc1f68dc3a6cd355f4f737 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=8A=9B=E8=B1=AA?= <lhjeff911@gmail.com>
Date: Fri, 28 Jan 2022 22:47:41 +0800
Subject: [PATCH 0123/1586] spi: sp7201: Fix compiler warnings

Fix compiler warming for kernel test

Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021")
Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/CAGcXWkzM6wbhNFLbYoijq7iS_76nYVod1ySFEDu-BRgnBokEQA@mail.gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sunplus-sp7021.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
index cbbb1664017eb..e5bdeb3eba458 100644
--- a/drivers/spi/spi-sunplus-sp7021.c
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -124,7 +124,7 @@ static int sp7021_spi_slave_abort(struct spi_controller *ctlr)
 	return 0;
 }
 
-int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
+static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
 
@@ -142,7 +142,7 @@ int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
 	return 0;
 }
 
-int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
+static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
 	int ret = 0;
@@ -160,7 +160,7 @@ int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
 	return ret;
 }
 
-void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+static void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
 {
 	int i;
 
@@ -171,7 +171,7 @@ void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
 	}
 }
 
-void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len)
+static void sp7021_spi_master_wb(struct sp7021_spi_ctlr *pspim, unsigned int len)
 {
 	int i;
 
@@ -558,6 +558,7 @@ static int __maybe_unused sp7021_spi_controller_resume(struct device *dev)
 	return clk_prepare_enable(pspim->spi_clk);
 }
 
+#ifdef CONFIG_PM
 static int sp7021_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_controller *ctlr = dev_get_drvdata(dev);
@@ -573,6 +574,7 @@ static int sp7021_spi_runtime_resume(struct device *dev)
 
 	return reset_control_deassert(pspim->rstc);
 }
+#endif
 
 static const struct dev_pm_ops sp7021_spi_pm_ops = {
 	SET_RUNTIME_PM_OPS(sp7021_spi_runtime_suspend,
-- 
GitLab


From 7f99cb5e60392fc3494c610776e733b68784280c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 6 Jan 2022 11:35:37 +0100
Subject: [PATCH 0124/1586] x86/CPU/AMD: Use default_groups in kobj_type

There are currently 2 ways to create a set of sysfs files for a
kobj_type, through the default_attrs field, and the default_groups
field. Move the AMD mce sysfs code to use default_groups field which has
been the preferred way since

  aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type")

so that the obsolete default_attrs field can be removed soon.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lore.kernel.org/r/20220106103537.3663852-1-gregkh@linuxfoundation.org
---
 arch/x86/kernel/cpu/mce/amd.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9f4b508886dde..1940d305db1c0 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = {
 	NULL,	/* possibly interrupt_enable if supported, see below */
 	NULL,
 };
+ATTRIBUTE_GROUPS(default);
 
 #define to_block(k)	container_of(k, struct threshold_block, kobj)
 #define to_attr(a)	container_of(a, struct threshold_attr, attr)
@@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj);
 
 static struct kobj_type threshold_ktype = {
 	.sysfs_ops		= &threshold_ops,
-	.default_attrs		= default_attrs,
+	.default_groups		= default_groups,
 	.release		= threshold_block_release,
 };
 
@@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
 	b->threshold_limit	= THRESHOLD_MAX;
 
 	if (b->interrupt_capable) {
-		threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+		default_attrs[2] = &interrupt_enable.attr;
 		b->interrupt_enable = 1;
 	} else {
-		threshold_ktype.default_attrs[2] = NULL;
+		default_attrs[2] = NULL;
 	}
 
 	INIT_LIST_HEAD(&b->miscj);
-- 
GitLab


From 0dcab41d3487acadf64d0667398e032341bd9918 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 31 Jan 2022 15:01:07 -0800
Subject: [PATCH 0125/1586] x86/cpu: Merge Intel and AMD ppin_init() functions

The code to decide whether a system supports the PPIN (Protected
Processor Inventory Number) MSR was cloned from the Intel
implementation. Apart from the X86_FEATURE bit and the MSR numbers it is
identical.

Merge the two functions into common x86 code, but use x86_match_cpu()
instead of the switch (c->x86_model) that was used by the old Intel
code.

No functional change.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220131230111.2004669-2-tony.luck@intel.com
---
 arch/x86/kernel/cpu/amd.c       | 30 -------------
 arch/x86/kernel/cpu/common.c    | 74 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mce/intel.c | 42 -------------------
 3 files changed, 74 insertions(+), 72 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4edb6f0f628c2..bad0fa4c17799 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 	per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id;
 }
 
-static void amd_detect_ppin(struct cpuinfo_x86 *c)
-{
-	unsigned long long val;
-
-	if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
-		return;
-
-	/* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
-	if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
-		goto clear_ppin;
-
-	/* PPIN is locked in disabled mode, clear feature bit */
-	if ((val & 3UL) == 1UL)
-		goto clear_ppin;
-
-	/* If PPIN is disabled, try to enable it */
-	if (!(val & 2UL)) {
-		wrmsrl_safe(MSR_AMD_PPIN_CTL,  val | 2UL);
-		rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
-	}
-
-	/* If PPIN_EN bit is 1, return from here; otherwise fall through */
-	if (val & 2UL)
-		return;
-
-clear_ppin:
-	clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
-}
-
 u32 amd_get_nodes_per_socket(void)
 {
 	return nodes_per_socket;
@@ -947,7 +918,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 	amd_detect_cmp(c);
 	amd_get_topology(c);
 	srat_detect_node(c);
-	amd_detect_ppin(c);
 
 	init_amd_cacheinfo(c);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7b8382c117889..b0bd8a6b5beb4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -88,6 +88,78 @@ EXPORT_SYMBOL_GPL(get_llc_id);
 /* L2 cache ID of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
 
+static struct ppin_info {
+	int	feature;
+	int	msr_ppin_ctl;
+} ppin_info[] = {
+	[X86_VENDOR_INTEL] = {
+		.feature = X86_FEATURE_INTEL_PPIN,
+		.msr_ppin_ctl = MSR_PPIN_CTL,
+	},
+	[X86_VENDOR_AMD] = {
+		.feature = X86_FEATURE_AMD_PPIN,
+		.msr_ppin_ctl = MSR_AMD_PPIN_CTL,
+	},
+};
+
+static const struct x86_cpu_id ppin_cpuids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
+
+	/* Legacy models without CPUID enumeration */
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
+
+	{}
+};
+
+static void ppin_init(struct cpuinfo_x86 *c)
+{
+	const struct x86_cpu_id *id;
+	unsigned long long val;
+	struct ppin_info *info;
+
+	id = x86_match_cpu(ppin_cpuids);
+	if (!id)
+		return;
+
+	/*
+	 * Testing the presence of the MSR is not enough. Need to check
+	 * that the PPIN_CTL allows reading of the PPIN.
+	 */
+	info = (struct ppin_info *)id->driver_data;
+
+	if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+		goto clear_ppin;
+
+	if ((val & 3UL) == 1UL) {
+		/* PPIN locked in disabled mode */
+		goto clear_ppin;
+	}
+
+	/* If PPIN is disabled, try to enable */
+	if (!(val & 2UL)) {
+		wrmsrl_safe(info->msr_ppin_ctl,  val | 2UL);
+		rdmsrl_safe(info->msr_ppin_ctl, &val);
+	}
+
+	/* Is the enable bit set? */
+	if (val & 2UL) {
+		set_cpu_cap(c, info->feature);
+		return;
+	}
+
+clear_ppin:
+	clear_cpu_cap(c, info->feature);
+}
+
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
@@ -1655,6 +1727,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 			c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
 	}
 
+	ppin_init(c);
+
 	/* Init Machine Check Exception if available. */
 	mcheck_cpu_init(c);
 
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index baafbb37be678..95275a5e57e06 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -470,47 +470,6 @@ void intel_clear_lmce(void)
 	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
 }
 
-static void intel_ppin_init(struct cpuinfo_x86 *c)
-{
-	unsigned long long val;
-
-	/*
-	 * Even if testing the presence of the MSR would be enough, we don't
-	 * want to risk the situation where other models reuse this MSR for
-	 * other purposes.
-	 */
-	switch (c->x86_model) {
-	case INTEL_FAM6_IVYBRIDGE_X:
-	case INTEL_FAM6_HASWELL_X:
-	case INTEL_FAM6_BROADWELL_D:
-	case INTEL_FAM6_BROADWELL_X:
-	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ICELAKE_X:
-	case INTEL_FAM6_ICELAKE_D:
-	case INTEL_FAM6_SAPPHIRERAPIDS_X:
-	case INTEL_FAM6_XEON_PHI_KNL:
-	case INTEL_FAM6_XEON_PHI_KNM:
-
-		if (rdmsrl_safe(MSR_PPIN_CTL, &val))
-			return;
-
-		if ((val & 3UL) == 1UL) {
-			/* PPIN locked in disabled mode */
-			return;
-		}
-
-		/* If PPIN is disabled, try to enable */
-		if (!(val & 2UL)) {
-			wrmsrl_safe(MSR_PPIN_CTL,  val | 2UL);
-			rdmsrl_safe(MSR_PPIN_CTL, &val);
-		}
-
-		/* Is the enable bit set? */
-		if (val & 2UL)
-			set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
-	}
-}
-
 /*
  * Enable additional error logs from the integrated
  * memory controller on processors that support this.
@@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
 	intel_init_cmci();
 	intel_init_lmce();
-	intel_ppin_init(c);
 	intel_imc_init(c);
 }
 
-- 
GitLab


From 00a2f23eef7d1fa6c2dfdc613857b84fbf5e2b3b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 31 Jan 2022 15:01:08 -0800
Subject: [PATCH 0126/1586] x86/cpu: X86_FEATURE_INTEL_PPIN finally has a CPUID
 bit

After nine generations of adding to model specific list of CPUs that
support PPIN (Protected Processor Inventory Number) Intel allocated
a CPUID bit to enumerate the MSRs.

CPUID(EAX=7, ECX=1).EBX bit 0 enumerates presence of MSR_PPIN_CTL and
MSR_PPIN. Add it to the "scattered" CPUID bits and add an entry to the
ppin_cpuids[] x86_match_cpu() array to catch Intel CPUs that implement
it.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220131230111.2004669-3-tony.luck@intel.com
---
 arch/x86/kernel/cpu/common.c    | 1 +
 arch/x86/kernel/cpu/scattered.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b0bd8a6b5beb4..0681c69a1f09c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -104,6 +104,7 @@ static struct ppin_info {
 
 static const struct x86_cpu_id ppin_cpuids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]),
+	X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]),
 
 	/* Legacy models without CPUID enumeration */
 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]),
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 21d1f062895a8..4143b1e4c5c6d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,7 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
+	{ X86_FEATURE_INTEL_PPIN,	CPUID_EBX,  0, 0x00000007, 1 },
 	{ X86_FEATURE_CQM_LLC,		CPUID_EDX,  1, 0x0000000f, 0 },
 	{ X86_FEATURE_CQM_OCCUP_LLC,	CPUID_EDX,  0, 0x0000000f, 1 },
 	{ X86_FEATURE_CQM_MBM_TOTAL,	CPUID_EDX,  1, 0x0000000f, 1 },
-- 
GitLab


From 822ccfade55b6be7977b364356fcf2d78d8a373a Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 31 Jan 2022 15:01:09 -0800
Subject: [PATCH 0127/1586] x86/cpu: Read/save PPIN MSR during initialization

Currently, the PPIN (Protected Processor Inventory Number) MSR is read
by every CPU that processes a machine check, CMCI, or just polls machine
check banks from a periodic timer. This is not a "fast" MSR, so this
adds to overhead of processing errors.

Add a new "ppin" field to the cpuinfo_x86 structure. Read and save the
PPIN during initialization. Use this copy in mce_setup() instead of
reading the MSR.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220131230111.2004669-4-tony.luck@intel.com
---
 arch/x86/include/asm/processor.h | 2 ++
 arch/x86/kernel/cpu/common.c     | 4 ++++
 arch/x86/kernel/cpu/mce/core.c   | 7 +------
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2c5f12ae7d042..a87e7c33d5ac1 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,8 @@ struct cpuinfo_x86 {
 	int			x86_cache_mbm_width_offset;
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
+	/* protected processor identification number */
+	u64			ppin;
 	/* cpuid returned max cores value: */
 	u16			x86_max_cores;
 	u16			apicid;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0681c69a1f09c..64deb7727d007 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -91,14 +91,17 @@ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
 static struct ppin_info {
 	int	feature;
 	int	msr_ppin_ctl;
+	int	msr_ppin;
 } ppin_info[] = {
 	[X86_VENDOR_INTEL] = {
 		.feature = X86_FEATURE_INTEL_PPIN,
 		.msr_ppin_ctl = MSR_PPIN_CTL,
+		.msr_ppin = MSR_PPIN
 	},
 	[X86_VENDOR_AMD] = {
 		.feature = X86_FEATURE_AMD_PPIN,
 		.msr_ppin_ctl = MSR_AMD_PPIN_CTL,
+		.msr_ppin = MSR_AMD_PPIN
 	},
 };
 
@@ -153,6 +156,7 @@ static void ppin_init(struct cpuinfo_x86 *c)
 
 	/* Is the enable bit set? */
 	if (val & 2UL) {
+		c->ppin = __rdmsr(info->msr_ppin);
 		set_cpu_cap(c, info->feature);
 		return;
 	}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d4..4f1e825033ce9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -138,12 +138,7 @@ void mce_setup(struct mce *m)
 	m->socketid = cpu_data(m->extcpu).phys_proc_id;
 	m->apicid = cpu_data(m->extcpu).initial_apicid;
 	m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
-
-	if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
-		m->ppin = __rdmsr(MSR_PPIN);
-	else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
-		m->ppin = __rdmsr(MSR_AMD_PPIN);
-
+	m->ppin = cpu_data(m->extcpu).ppin;
 	m->microcode = boot_cpu_data.microcode;
 }
 
-- 
GitLab


From 182ecfaf757de234a5262f51c0e699bec7258a67 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 31 Jan 2022 15:01:10 -0800
Subject: [PATCH 0128/1586] topology/sysfs: Add format parameter to macro
 defining "show" functions for proc

All the simple (non-mask and non-list files in
/sys/devices/system/cpu/cpu0/topology/ are currently printed as decimal
integers.

Refactor the macro that generates the "show" functions to take a format
parameter to allow future files to display in other formats.

No functional change.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220131230111.2004669-5-tony.luck@intel.com
---
 drivers/base/topology.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fc24e89f9592f..044f3664f8f22 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -14,11 +14,11 @@
 #include <linux/hardirq.h>
 #include <linux/topology.h>
 
-#define define_id_show_func(name)					\
+#define define_id_show_func(name, fmt)					\
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	return sysfs_emit(buf, "%d\n", topology_##name(dev->id));	\
+	return sysfs_emit(buf, fmt "\n", topology_##name(dev->id));	\
 }
 
 #define define_siblings_read_func(name, mask)					\
@@ -42,20 +42,20 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj,	\
 					off, count);				\
 }
 
-define_id_show_func(physical_package_id);
+define_id_show_func(physical_package_id, "%d");
 static DEVICE_ATTR_RO(physical_package_id);
 
 #ifdef TOPOLOGY_DIE_SYSFS
-define_id_show_func(die_id);
+define_id_show_func(die_id, "%d");
 static DEVICE_ATTR_RO(die_id);
 #endif
 
 #ifdef TOPOLOGY_CLUSTER_SYSFS
-define_id_show_func(cluster_id);
+define_id_show_func(cluster_id, "%d");
 static DEVICE_ATTR_RO(cluster_id);
 #endif
 
-define_id_show_func(core_id);
+define_id_show_func(core_id, "%d");
 static DEVICE_ATTR_RO(core_id);
 
 define_siblings_read_func(thread_siblings, sibling_cpumask);
@@ -87,7 +87,7 @@ static BIN_ATTR_RO(package_cpus, 0);
 static BIN_ATTR_RO(package_cpus_list, 0);
 
 #ifdef TOPOLOGY_BOOK_SYSFS
-define_id_show_func(book_id);
+define_id_show_func(book_id, "%d");
 static DEVICE_ATTR_RO(book_id);
 define_siblings_read_func(book_siblings, book_cpumask);
 static BIN_ATTR_RO(book_siblings, 0);
@@ -95,7 +95,7 @@ static BIN_ATTR_RO(book_siblings_list, 0);
 #endif
 
 #ifdef TOPOLOGY_DRAWER_SYSFS
-define_id_show_func(drawer_id);
+define_id_show_func(drawer_id, "%d");
 static DEVICE_ATTR_RO(drawer_id);
 define_siblings_read_func(drawer_siblings, drawer_cpumask);
 static BIN_ATTR_RO(drawer_siblings, 0);
-- 
GitLab


From ab28e944197fa78e6af7c4a0ffd6bba9a5bbacf0 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 31 Jan 2022 15:01:11 -0800
Subject: [PATCH 0129/1586] topology/sysfs: Add PPIN in sysfs under cpu
 topology

PPIN is the Protected Processor Identification Number.
This is used to identify the socket as a Field Replaceable Unit (FRU).

Existing code only displays this when reporting errors. But this makes
it inconvenient for large clusters to use it for its intended purpose
of inventory control.

Add ppin to /sys/devices/system/cpu/cpu*/topology to make what
is already available using RDMSR more easily accessible. Make
the file read only for root in case there are still people
concerned about making a unique system "serial number" available.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220131230111.2004669-6-tony.luck@intel.com
---
 Documentation/ABI/stable/sysfs-devices-system-cpu  | 4 ++++
 Documentation/ABI/testing/sysfs-devices-system-cpu | 6 ++++++
 arch/x86/include/asm/topology.h                    | 1 +
 drivers/base/topology.c                            | 4 ++++
 include/linux/topology.h                           | 3 +++
 5 files changed, 18 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 3965ce504484a..902392d7eddf0 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -86,6 +86,10 @@ What:           /sys/devices/system/cpu/cpuX/topology/die_cpus
 Description:    internal kernel map of CPUs within the same die.
 Values:         hexadecimal bitmask.
 
+What:           /sys/devices/system/cpu/cpuX/topology/ppin
+Description:    per-socket protected processor inventory number
+Values:         hexadecimal.
+
 What:           /sys/devices/system/cpu/cpuX/topology/die_cpus_list
 Description:    human-readable list of CPUs within the same die.
                 The format is like 0-3, 8-11, 14,17.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 61f5676a7429a..74962c2007902 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -73,6 +73,7 @@ What:		/sys/devices/system/cpu/cpuX/topology/core_id
 		/sys/devices/system/cpu/cpuX/topology/physical_package_id
 		/sys/devices/system/cpu/cpuX/topology/thread_siblings
 		/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
+		/sys/devices/system/cpu/cpuX/topology/ppin
 Date:		December 2008
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	CPU topology files that describe a logical CPU's relationship
@@ -103,6 +104,11 @@ Description:	CPU topology files that describe a logical CPU's relationship
 		thread_siblings_list: human-readable list of cpuX's hardware
 		threads within the same core as cpuX
 
+		ppin: human-readable Protected Processor Identification
+		Number of the socket the cpu# belongs to. There should be
+		one per physical_package_id. File is readable only to
+		admin.
+
 		See Documentation/admin-guide/cputopology.rst for more information.
 
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 2f0b6be8eaabc..43a89476a5222 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
 #define topology_logical_die_id(cpu)		(cpu_data(cpu).logical_die_id)
 #define topology_die_id(cpu)			(cpu_data(cpu).cpu_die_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_ppin(cpu)			(cpu_data(cpu).ppin)
 
 extern unsigned int __max_die_per_package;
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 044f3664f8f22..e9d1efcda89b3 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -58,6 +58,9 @@ static DEVICE_ATTR_RO(cluster_id);
 define_id_show_func(core_id, "%d");
 static DEVICE_ATTR_RO(core_id);
 
+define_id_show_func(ppin, "0x%llx");
+static DEVICE_ATTR_ADMIN_RO(ppin);
+
 define_siblings_read_func(thread_siblings, sibling_cpumask);
 static BIN_ATTR_RO(thread_siblings, 0);
 static BIN_ATTR_RO(thread_siblings_list, 0);
@@ -145,6 +148,7 @@ static struct attribute *default_attrs[] = {
 #ifdef TOPOLOGY_DRAWER_SYSFS
 	&dev_attr_drawer_id.attr,
 #endif
+	&dev_attr_ppin.attr,
 	NULL
 };
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a6e201758ae9e..f19bc3626297a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_drawer_id
 #define topology_drawer_id(cpu)			((void)(cpu), -1)
 #endif
+#ifndef topology_ppin
+#define topology_ppin(cpu)			((void)(cpu), 0ull)
+#endif
 #ifndef topology_sibling_cpumask
 #define topology_sibling_cpumask(cpu)		cpumask_of(cpu)
 #endif
-- 
GitLab


From 321599693213c81b2ce8530abb27e39528e969f9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 1 Feb 2022 02:26:54 +0100
Subject: [PATCH 0130/1586] spi: st-ssc4: Covert to use GPIO descriptors

This switches the ST SSC SPI controller to use GPIO
descriptors from the core instead of GPIO numbers.
It is already using the core parsing of GPIO numbers
so the switch is pretty straight-forward.

Cc: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220201012654.562578-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-st-ssc4.c | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index 6c44dda9ee8c5..843be803696bc 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -17,7 +17,6 @@
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
@@ -171,11 +170,6 @@ static int spi_st_transfer_one(struct spi_master *master,
 	return t->len;
 }
 
-static void spi_st_cleanup(struct spi_device *spi)
-{
-	gpio_free(spi->cs_gpio);
-}
-
 /* the spi->mode bits understood by this driver: */
 #define MODEBITS  (SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_LOOP | SPI_CS_HIGH)
 static int spi_st_setup(struct spi_device *spi)
@@ -183,29 +177,17 @@ static int spi_st_setup(struct spi_device *spi)
 	struct spi_st *spi_st = spi_master_get_devdata(spi->master);
 	u32 spi_st_clk, sscbrg, var;
 	u32 hz = spi->max_speed_hz;
-	int cs = spi->cs_gpio;
-	int ret;
 
 	if (!hz)  {
 		dev_err(&spi->dev, "max_speed_hz unspecified\n");
 		return -EINVAL;
 	}
 
-	if (!gpio_is_valid(cs)) {
-		dev_err(&spi->dev, "%d is not a valid gpio\n", cs);
+	if (!spi->cs_gpiod) {
+		dev_err(&spi->dev, "no valid gpio assigned\n");
 		return -EINVAL;
 	}
 
-	ret = gpio_request(cs, dev_name(&spi->dev));
-	if (ret) {
-		dev_err(&spi->dev, "could not request gpio:%d\n", cs);
-		return ret;
-	}
-
-	ret = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
-	if (ret)
-		goto out_free_gpio;
-
 	spi_st_clk = clk_get_rate(spi_st->clk);
 
 	/* Set SSC_BRF */
@@ -213,8 +195,7 @@ static int spi_st_setup(struct spi_device *spi)
 	if (sscbrg < 0x07 || sscbrg > BIT(16)) {
 		dev_err(&spi->dev,
 			"baudrate %d outside valid range %d\n", sscbrg, hz);
-		ret = -EINVAL;
-		goto out_free_gpio;
+		return -EINVAL;
 	}
 
 	spi_st->baud = spi_st_clk / (2 * sscbrg);
@@ -263,10 +244,6 @@ static int spi_st_setup(struct spi_device *spi)
 	readl_relaxed(spi_st->base + SSC_RBUF);
 
 	return 0;
-
-out_free_gpio:
-	gpio_free(cs);
-	return ret;
 }
 
 /* Interrupt fired when TX shift register becomes empty */
@@ -309,11 +286,11 @@ static int spi_st_probe(struct platform_device *pdev)
 	master->dev.of_node		= np;
 	master->mode_bits		= MODEBITS;
 	master->setup			= spi_st_setup;
-	master->cleanup			= spi_st_cleanup;
 	master->transfer_one		= spi_st_transfer_one;
 	master->bits_per_word_mask	= SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
 	master->auto_runtime_pm		= true;
 	master->bus_num			= pdev->id;
+	master->use_gpio_descriptors	= true;
 	spi_st				= spi_master_get_devdata(master);
 
 	spi_st->clk = devm_clk_get(&pdev->dev, "ssc");
-- 
GitLab


From b651d1da86aa525c5a5b2bd61f528353c28d589d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 1 Feb 2022 02:29:56 +0100
Subject: [PATCH 0131/1586] spi: bcm2835aux: Convert to use GPIO descriptors

This one is pretty straight forward to switch over, the driver
already relies on inspecting cs_gpio just check cs_gpiod instead
and stop the special handling of requesting the GPIO and stuff
the core will take care of.

Cc: Lukas Wunner <lukas@wunner.de>
Cc: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220201012956.563272-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835aux.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 7d709a8c833bb..e285219223308 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -22,7 +22,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
@@ -445,25 +444,12 @@ static void bcm2835aux_spi_handle_err(struct spi_master *master,
 
 static int bcm2835aux_spi_setup(struct spi_device *spi)
 {
-	int ret;
-
 	/* sanity check for native cs */
 	if (spi->mode & SPI_NO_CS)
 		return 0;
-	if (gpio_is_valid(spi->cs_gpio)) {
-		/* with gpio-cs set the GPIO to the correct level
-		 * and as output (in case the dt has the gpio not configured
-		 * as output but native cs)
-		 */
-		ret = gpio_direction_output(spi->cs_gpio,
-					    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
-		if (ret)
-			dev_err(&spi->dev,
-				"could not set gpio %i as output: %i\n",
-				spi->cs_gpio, ret);
-
-		return ret;
-	}
+
+	if (spi->cs_gpiod)
+		return 0;
 
 	/* for dt-backwards compatibility: only support native on CS0
 	 * known things not supported with broken native CS:
@@ -519,6 +505,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 	master->prepare_message = bcm2835aux_spi_prepare_message;
 	master->unprepare_message = bcm2835aux_spi_unprepare_message;
 	master->dev.of_node = pdev->dev.of_node;
+	master->use_gpio_descriptors = true;
 
 	bs = spi_master_get_devdata(master);
 
-- 
GitLab


From 941bffd7d7f5d6030a54184c5d81b0eb9116ca9a Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:23 +0000
Subject: [PATCH 0132/1586] spi: Make spi_alloc_device and spi_add_device
 public again

This functions were previously made private since they
were not used. However, these functions will be needed
again.

Partial revert of commit da21fde0fdb3
("spi: Make several public functions private to spi.c")

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-2-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       |  6 ++++--
 include/linux/spi/spi.h | 12 ++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4599b121d7442..1eb84101c4ad8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -532,7 +532,7 @@ static DEFINE_MUTEX(board_lock);
  *
  * Return: a pointer to the new device, or NULL.
  */
-static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
+struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 {
 	struct spi_device	*spi;
 
@@ -557,6 +557,7 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 	device_initialize(&spi->dev);
 	return spi;
 }
+EXPORT_SYMBOL_GPL(spi_alloc_device);
 
 static void spi_dev_set_name(struct spi_device *spi)
 {
@@ -652,7 +653,7 @@ static int __spi_add_device(struct spi_device *spi)
  *
  * Return: 0 on success; negative errno on failure
  */
-static int spi_add_device(struct spi_device *spi)
+int spi_add_device(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct device *dev = ctlr->dev.parent;
@@ -673,6 +674,7 @@ static int spi_add_device(struct spi_device *spi)
 	mutex_unlock(&ctlr->add_lock);
 	return status;
 }
+EXPORT_SYMBOL_GPL(spi_add_device);
 
 static int spi_add_device_locked(struct spi_device *spi)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7ab3fed7b8043..0346a3ff27fd8 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1452,7 +1452,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_controller().
+ *
+ * You can also use spi_alloc_device() and spi_add_device() to use a two
+ * stage registration sequence for each spi_device. This gives the caller
+ * some more control over the spi_device structure before it is registered,
+ * but requires that caller to initialize fields that would otherwise
+ * be defined using the board info.
  */
+extern struct spi_device *
+spi_alloc_device(struct spi_controller *ctlr);
+
+extern int
+spi_add_device(struct spi_device *spi);
+
 extern struct spi_device *
 spi_new_device(struct spi_controller *, struct spi_board_info *);
 
-- 
GitLab


From 70dd264bc07aee4f89e65138db11e908701388dd Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:24 +0000
Subject: [PATCH 0133/1586] spi: Create helper API to lookup ACPI info for spi
 device

This can then be used to find a spi resource inside an
ACPI node, and allocate a spi device.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-3-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 46 ++++++++++++++++++++++++++++++++---------
 include/linux/spi/spi.h |  6 ++++++
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1eb84101c4ad8..13f4701f0694d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2410,8 +2410,18 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 	return 1;
 }
 
-static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
-					    struct acpi_device *adev)
+/**
+ * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
+ * @ctlr: controller to which the spi device belongs
+ * @adev: ACPI Device for the spi device
+ *
+ * This should be used to allocate a new spi device from and ACPI Node.
+ * The caller is responsible for calling spi_add_device to register the spi device.
+ *
+ * Return: a pointer to the new device, or ERR_PTR on error.
+ */
+struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+					 struct acpi_device *adev)
 {
 	acpi_handle parent_handle = NULL;
 	struct list_head resource_list;
@@ -2419,10 +2429,6 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	struct spi_device *spi;
 	int ret;
 
-	if (acpi_bus_get_status(adev) || !adev->status.present ||
-	    acpi_device_enumerated(adev))
-		return AE_OK;
-
 	lookup.ctlr		= ctlr;
 	lookup.irq		= -1;
 
@@ -2433,7 +2439,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 
 	if (ret < 0)
 		/* found SPI in _CRS but it points to another controller */
-		return AE_OK;
+		return ERR_PTR(-ENODEV);
 
 	if (!lookup.max_speed_hz &&
 	    ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
@@ -2443,16 +2449,15 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	}
 
 	if (!lookup.max_speed_hz)
-		return AE_OK;
+		return ERR_PTR(-ENODEV);
 
 	spi = spi_alloc_device(ctlr);
 	if (!spi) {
 		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
 			dev_name(&adev->dev));
-		return AE_NO_MEMORY;
+		return ERR_PTR(-ENOMEM);
 	}
 
-
 	ACPI_COMPANION_SET(&spi->dev, adev);
 	spi->max_speed_hz	= lookup.max_speed_hz;
 	spi->mode		|= lookup.mode;
@@ -2460,6 +2465,27 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	spi->bits_per_word	= lookup.bits_per_word;
 	spi->chip_select	= lookup.chip_select;
 
+	return spi;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_device_alloc);
+
+static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
+					    struct acpi_device *adev)
+{
+	struct spi_device *spi;
+
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
+		return AE_OK;
+
+	spi = acpi_spi_device_alloc(ctlr, adev);
+	if (IS_ERR(spi)) {
+		if (PTR_ERR(spi) == -ENOMEM)
+			return AE_NO_MEMORY;
+		else
+			return AE_OK;
+	}
+
 	acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias,
 			  sizeof(spi->modalias));
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0346a3ff27fd8..d159cef12f1a9 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -16,6 +16,7 @@
 #include <linux/gpio/consumer.h>
 
 #include <uapi/linux/spi/spi.h>
+#include <linux/acpi.h>
 
 struct dma_chan;
 struct software_node;
@@ -759,6 +760,11 @@ extern int devm_spi_register_controller(struct device *dev,
 					struct spi_controller *ctlr);
 extern void spi_unregister_controller(struct spi_controller *ctlr);
 
+#if IS_ENABLED(CONFIG_ACPI)
+extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+						struct acpi_device *adev);
+#endif
+
 /*
  * SPI resource management while processing a SPI message
  */
-- 
GitLab


From 92640f98a78c6a3ea1ca32143144241eceb129bd Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:25 +0000
Subject: [PATCH 0134/1586] spi: Support selection of the index of the ACPI Spi
 Resource before alloc

If a node contains more than one SPI resource it may be necessary to
use an index to select which one you want to allocate a spi device for.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-4-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 51 +++++++++++++++++++++++++++++++++++------
 include/linux/spi/spi.h |  3 ++-
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 13f4701f0694d..06c0a308b38b3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2320,6 +2320,8 @@ struct acpi_spi_lookup {
 	int			irq;
 	u8			bits_per_word;
 	u8			chip_select;
+	int			n;
+	int			index;
 };
 
 static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
@@ -2351,6 +2353,8 @@ static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
 		lookup->mode |= SPI_CPHA;
 }
 
+static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
+
 static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 {
 	struct acpi_spi_lookup *lookup = data;
@@ -2364,14 +2368,35 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 		sb = &ares->data.spi_serial_bus;
 		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {
 
+			if (lookup->index != -1 && lookup->n++ != lookup->index)
+				return 1;
+
+			if (lookup->index == -1 && !ctlr)
+				return -ENODEV;
+
 			status = acpi_get_handle(NULL,
 						 sb->resource_source.string_ptr,
 						 &parent_handle);
 
-			if (ACPI_FAILURE(status) ||
-			    ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+			if (ACPI_FAILURE(status))
 				return -ENODEV;
 
+			if (ctlr) {
+				if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+					return -ENODEV;
+			} else {
+				struct acpi_device *adev;
+
+				if (acpi_bus_get_device(parent_handle, &adev))
+					return -ENODEV;
+
+				ctlr = acpi_spi_find_controller_by_adev(adev);
+				if (!ctlr)
+					return -ENODEV;
+
+				lookup->ctlr = ctlr;
+			}
+
 			/*
 			 * ACPI DeviceSelection numbering is handled by the
 			 * host controller driver in Windows and can vary
@@ -2414,14 +2439,21 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
  * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
  * @ctlr: controller to which the spi device belongs
  * @adev: ACPI Device for the spi device
+ * @index: Index of the spi resource inside the ACPI Node
  *
  * This should be used to allocate a new spi device from and ACPI Node.
  * The caller is responsible for calling spi_add_device to register the spi device.
  *
+ * If ctlr is set to NULL, the Controller for the spi device will be looked up
+ * using the resource.
+ * If index is set to -1, index is not used.
+ * Note: If index is -1, ctlr must be set.
+ *
  * Return: a pointer to the new device, or ERR_PTR on error.
  */
 struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
-					 struct acpi_device *adev)
+					 struct acpi_device *adev,
+					 int index)
 {
 	acpi_handle parent_handle = NULL;
 	struct list_head resource_list;
@@ -2429,8 +2461,13 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 	struct spi_device *spi;
 	int ret;
 
+	if (!ctlr && index == -1)
+		return ERR_PTR(-EINVAL);
+
 	lookup.ctlr		= ctlr;
 	lookup.irq		= -1;
+	lookup.index		= index;
+	lookup.n		= 0;
 
 	INIT_LIST_HEAD(&resource_list);
 	ret = acpi_dev_get_resources(adev, &resource_list,
@@ -2443,7 +2480,7 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 
 	if (!lookup.max_speed_hz &&
 	    ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
-	    ACPI_HANDLE(ctlr->dev.parent) == parent_handle) {
+	    ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) {
 		/* Apple does not use _CRS but nested devices for SPI slaves */
 		acpi_spi_parse_apple_properties(adev, &lookup);
 	}
@@ -2451,9 +2488,9 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 	if (!lookup.max_speed_hz)
 		return ERR_PTR(-ENODEV);
 
-	spi = spi_alloc_device(ctlr);
+	spi = spi_alloc_device(lookup.ctlr);
 	if (!spi) {
-		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
+		dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n",
 			dev_name(&adev->dev));
 		return ERR_PTR(-ENOMEM);
 	}
@@ -2478,7 +2515,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	    acpi_device_enumerated(adev))
 		return AE_OK;
 
-	spi = acpi_spi_device_alloc(ctlr, adev);
+	spi = acpi_spi_device_alloc(ctlr, adev, -1);
 	if (IS_ERR(spi)) {
 		if (PTR_ERR(spi) == -ENOMEM)
 			return AE_NO_MEMORY;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index d159cef12f1a9..e5bbb9cbd3d7a 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -762,7 +762,8 @@ extern void spi_unregister_controller(struct spi_controller *ctlr);
 
 #if IS_ENABLED(CONFIG_ACPI)
 extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
-						struct acpi_device *adev);
+						struct acpi_device *adev,
+						int index);
 #endif
 
 /*
-- 
GitLab


From 113962301d2d9a5c11381d9c25ddea7af71be2ff Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:26 +0000
Subject: [PATCH 0135/1586] spi: Add API to count spi acpi resources

Some ACPI nodes may have more than one Spi Resource.
To be able to handle these case, its necessary to have
a way of counting these resources.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-5-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  1 +
 2 files changed, 41 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 06c0a308b38b3..ec9f2ed579e34 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2324,6 +2324,46 @@ struct acpi_spi_lookup {
 	int			index;
 };
 
+static int acpi_spi_count(struct acpi_resource *ares, void *data)
+{
+	struct acpi_resource_spi_serialbus *sb;
+	int *count = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	sb = &ares->data.spi_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI)
+		return 1;
+
+	*count = *count + 1;
+
+	return 1;
+}
+
+/**
+ * acpi_spi_count_resources - Count the number of SpiSerialBus resources
+ * @adev:	ACPI device
+ *
+ * Returns the number of SpiSerialBus resources in the ACPI-device's
+ * resource-list; or a negative error code.
+ */
+int acpi_spi_count_resources(struct acpi_device *adev)
+{
+	LIST_HEAD(r);
+	int count = 0;
+	int ret;
+
+	ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count);
+	if (ret < 0)
+		return ret;
+
+	acpi_dev_free_resource_list(&r);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_count_resources);
+
 static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
 					    struct acpi_spi_lookup *lookup)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e5bbb9cbd3d7a..394b4241d9890 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -764,6 +764,7 @@ extern void spi_unregister_controller(struct spi_controller *ctlr);
 extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 						struct acpi_device *adev,
 						int index);
+int acpi_spi_count_resources(struct acpi_device *adev);
 #endif
 
 /*
-- 
GitLab


From e3dc1399506f894110667ee5c66a6a70f06f3348 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:23 +0000
Subject: [PATCH 0136/1586] spi: Make spi_alloc_device and spi_add_device
 public again

This functions were previously made private since they
were not used. However, these functions will be needed
again.

Partial revert of commit da21fde0fdb3
("spi: Make several public functions private to spi.c")

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-2-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       |  6 ++++--
 include/linux/spi/spi.h | 12 ++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4599b121d7442..1eb84101c4ad8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -532,7 +532,7 @@ static DEFINE_MUTEX(board_lock);
  *
  * Return: a pointer to the new device, or NULL.
  */
-static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
+struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 {
 	struct spi_device	*spi;
 
@@ -557,6 +557,7 @@ static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 	device_initialize(&spi->dev);
 	return spi;
 }
+EXPORT_SYMBOL_GPL(spi_alloc_device);
 
 static void spi_dev_set_name(struct spi_device *spi)
 {
@@ -652,7 +653,7 @@ static int __spi_add_device(struct spi_device *spi)
  *
  * Return: 0 on success; negative errno on failure
  */
-static int spi_add_device(struct spi_device *spi)
+int spi_add_device(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct device *dev = ctlr->dev.parent;
@@ -673,6 +674,7 @@ static int spi_add_device(struct spi_device *spi)
 	mutex_unlock(&ctlr->add_lock);
 	return status;
 }
+EXPORT_SYMBOL_GPL(spi_add_device);
 
 static int spi_add_device_locked(struct spi_device *spi)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7ab3fed7b8043..0346a3ff27fd8 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1452,7 +1452,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_controller().
+ *
+ * You can also use spi_alloc_device() and spi_add_device() to use a two
+ * stage registration sequence for each spi_device. This gives the caller
+ * some more control over the spi_device structure before it is registered,
+ * but requires that caller to initialize fields that would otherwise
+ * be defined using the board info.
  */
+extern struct spi_device *
+spi_alloc_device(struct spi_controller *ctlr);
+
+extern int
+spi_add_device(struct spi_device *spi);
+
 extern struct spi_device *
 spi_new_device(struct spi_controller *, struct spi_board_info *);
 
-- 
GitLab


From 000bee0ed70af79e610444096fb453430220960f Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:24 +0000
Subject: [PATCH 0137/1586] spi: Create helper API to lookup ACPI info for spi
 device

This can then be used to find a spi resource inside an
ACPI node, and allocate a spi device.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-3-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 46 ++++++++++++++++++++++++++++++++---------
 include/linux/spi/spi.h |  6 ++++++
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1eb84101c4ad8..13f4701f0694d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2410,8 +2410,18 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 	return 1;
 }
 
-static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
-					    struct acpi_device *adev)
+/**
+ * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
+ * @ctlr: controller to which the spi device belongs
+ * @adev: ACPI Device for the spi device
+ *
+ * This should be used to allocate a new spi device from and ACPI Node.
+ * The caller is responsible for calling spi_add_device to register the spi device.
+ *
+ * Return: a pointer to the new device, or ERR_PTR on error.
+ */
+struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+					 struct acpi_device *adev)
 {
 	acpi_handle parent_handle = NULL;
 	struct list_head resource_list;
@@ -2419,10 +2429,6 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	struct spi_device *spi;
 	int ret;
 
-	if (acpi_bus_get_status(adev) || !adev->status.present ||
-	    acpi_device_enumerated(adev))
-		return AE_OK;
-
 	lookup.ctlr		= ctlr;
 	lookup.irq		= -1;
 
@@ -2433,7 +2439,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 
 	if (ret < 0)
 		/* found SPI in _CRS but it points to another controller */
-		return AE_OK;
+		return ERR_PTR(-ENODEV);
 
 	if (!lookup.max_speed_hz &&
 	    ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
@@ -2443,16 +2449,15 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	}
 
 	if (!lookup.max_speed_hz)
-		return AE_OK;
+		return ERR_PTR(-ENODEV);
 
 	spi = spi_alloc_device(ctlr);
 	if (!spi) {
 		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
 			dev_name(&adev->dev));
-		return AE_NO_MEMORY;
+		return ERR_PTR(-ENOMEM);
 	}
 
-
 	ACPI_COMPANION_SET(&spi->dev, adev);
 	spi->max_speed_hz	= lookup.max_speed_hz;
 	spi->mode		|= lookup.mode;
@@ -2460,6 +2465,27 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	spi->bits_per_word	= lookup.bits_per_word;
 	spi->chip_select	= lookup.chip_select;
 
+	return spi;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_device_alloc);
+
+static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
+					    struct acpi_device *adev)
+{
+	struct spi_device *spi;
+
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
+		return AE_OK;
+
+	spi = acpi_spi_device_alloc(ctlr, adev);
+	if (IS_ERR(spi)) {
+		if (PTR_ERR(spi) == -ENOMEM)
+			return AE_NO_MEMORY;
+		else
+			return AE_OK;
+	}
+
 	acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias,
 			  sizeof(spi->modalias));
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0346a3ff27fd8..d159cef12f1a9 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -16,6 +16,7 @@
 #include <linux/gpio/consumer.h>
 
 #include <uapi/linux/spi/spi.h>
+#include <linux/acpi.h>
 
 struct dma_chan;
 struct software_node;
@@ -759,6 +760,11 @@ extern int devm_spi_register_controller(struct device *dev,
 					struct spi_controller *ctlr);
 extern void spi_unregister_controller(struct spi_controller *ctlr);
 
+#if IS_ENABLED(CONFIG_ACPI)
+extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
+						struct acpi_device *adev);
+#endif
+
 /*
  * SPI resource management while processing a SPI message
  */
-- 
GitLab


From 87e59b36e5e26122efd55d77adb9fac827987db0 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:25 +0000
Subject: [PATCH 0138/1586] spi: Support selection of the index of the ACPI Spi
 Resource before alloc

If a node contains more than one SPI resource it may be necessary to
use an index to select which one you want to allocate a spi device for.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-4-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 51 +++++++++++++++++++++++++++++++++++------
 include/linux/spi/spi.h |  3 ++-
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 13f4701f0694d..06c0a308b38b3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2320,6 +2320,8 @@ struct acpi_spi_lookup {
 	int			irq;
 	u8			bits_per_word;
 	u8			chip_select;
+	int			n;
+	int			index;
 };
 
 static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
@@ -2351,6 +2353,8 @@ static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
 		lookup->mode |= SPI_CPHA;
 }
 
+static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
+
 static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 {
 	struct acpi_spi_lookup *lookup = data;
@@ -2364,14 +2368,35 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 		sb = &ares->data.spi_serial_bus;
 		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {
 
+			if (lookup->index != -1 && lookup->n++ != lookup->index)
+				return 1;
+
+			if (lookup->index == -1 && !ctlr)
+				return -ENODEV;
+
 			status = acpi_get_handle(NULL,
 						 sb->resource_source.string_ptr,
 						 &parent_handle);
 
-			if (ACPI_FAILURE(status) ||
-			    ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+			if (ACPI_FAILURE(status))
 				return -ENODEV;
 
+			if (ctlr) {
+				if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
+					return -ENODEV;
+			} else {
+				struct acpi_device *adev;
+
+				if (acpi_bus_get_device(parent_handle, &adev))
+					return -ENODEV;
+
+				ctlr = acpi_spi_find_controller_by_adev(adev);
+				if (!ctlr)
+					return -ENODEV;
+
+				lookup->ctlr = ctlr;
+			}
+
 			/*
 			 * ACPI DeviceSelection numbering is handled by the
 			 * host controller driver in Windows and can vary
@@ -2414,14 +2439,21 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
  * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
  * @ctlr: controller to which the spi device belongs
  * @adev: ACPI Device for the spi device
+ * @index: Index of the spi resource inside the ACPI Node
  *
  * This should be used to allocate a new spi device from and ACPI Node.
  * The caller is responsible for calling spi_add_device to register the spi device.
  *
+ * If ctlr is set to NULL, the Controller for the spi device will be looked up
+ * using the resource.
+ * If index is set to -1, index is not used.
+ * Note: If index is -1, ctlr must be set.
+ *
  * Return: a pointer to the new device, or ERR_PTR on error.
  */
 struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
-					 struct acpi_device *adev)
+					 struct acpi_device *adev,
+					 int index)
 {
 	acpi_handle parent_handle = NULL;
 	struct list_head resource_list;
@@ -2429,8 +2461,13 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 	struct spi_device *spi;
 	int ret;
 
+	if (!ctlr && index == -1)
+		return ERR_PTR(-EINVAL);
+
 	lookup.ctlr		= ctlr;
 	lookup.irq		= -1;
+	lookup.index		= index;
+	lookup.n		= 0;
 
 	INIT_LIST_HEAD(&resource_list);
 	ret = acpi_dev_get_resources(adev, &resource_list,
@@ -2443,7 +2480,7 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 
 	if (!lookup.max_speed_hz &&
 	    ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
-	    ACPI_HANDLE(ctlr->dev.parent) == parent_handle) {
+	    ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) {
 		/* Apple does not use _CRS but nested devices for SPI slaves */
 		acpi_spi_parse_apple_properties(adev, &lookup);
 	}
@@ -2451,9 +2488,9 @@ struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 	if (!lookup.max_speed_hz)
 		return ERR_PTR(-ENODEV);
 
-	spi = spi_alloc_device(ctlr);
+	spi = spi_alloc_device(lookup.ctlr);
 	if (!spi) {
-		dev_err(&ctlr->dev, "failed to allocate SPI device for %s\n",
+		dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n",
 			dev_name(&adev->dev));
 		return ERR_PTR(-ENOMEM);
 	}
@@ -2478,7 +2515,7 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 	    acpi_device_enumerated(adev))
 		return AE_OK;
 
-	spi = acpi_spi_device_alloc(ctlr, adev);
+	spi = acpi_spi_device_alloc(ctlr, adev, -1);
 	if (IS_ERR(spi)) {
 		if (PTR_ERR(spi) == -ENOMEM)
 			return AE_NO_MEMORY;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index d159cef12f1a9..e5bbb9cbd3d7a 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -762,7 +762,8 @@ extern void spi_unregister_controller(struct spi_controller *ctlr);
 
 #if IS_ENABLED(CONFIG_ACPI)
 extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
-						struct acpi_device *adev);
+						struct acpi_device *adev,
+						int index);
 #endif
 
 /*
-- 
GitLab


From e612af7acef2459f1afd885f4107748995a05963 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Fri, 21 Jan 2022 17:24:26 +0000
Subject: [PATCH 0139/1586] spi: Add API to count spi acpi resources

Some ACPI nodes may have more than one Spi Resource.
To be able to handle these case, its necessary to have
a way of counting these resources.

Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20220121172431.6876-5-sbinding@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  1 +
 2 files changed, 41 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 06c0a308b38b3..ec9f2ed579e34 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2324,6 +2324,46 @@ struct acpi_spi_lookup {
 	int			index;
 };
 
+static int acpi_spi_count(struct acpi_resource *ares, void *data)
+{
+	struct acpi_resource_spi_serialbus *sb;
+	int *count = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	sb = &ares->data.spi_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI)
+		return 1;
+
+	*count = *count + 1;
+
+	return 1;
+}
+
+/**
+ * acpi_spi_count_resources - Count the number of SpiSerialBus resources
+ * @adev:	ACPI device
+ *
+ * Returns the number of SpiSerialBus resources in the ACPI-device's
+ * resource-list; or a negative error code.
+ */
+int acpi_spi_count_resources(struct acpi_device *adev)
+{
+	LIST_HEAD(r);
+	int count = 0;
+	int ret;
+
+	ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count);
+	if (ret < 0)
+		return ret;
+
+	acpi_dev_free_resource_list(&r);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(acpi_spi_count_resources);
+
 static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
 					    struct acpi_spi_lookup *lookup)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e5bbb9cbd3d7a..394b4241d9890 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -764,6 +764,7 @@ extern void spi_unregister_controller(struct spi_controller *ctlr);
 extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
 						struct acpi_device *adev,
 						int index);
+int acpi_spi_count_resources(struct acpi_device *adev);
 #endif
 
 /*
-- 
GitLab


From b8b87fd954b4b1bdd2d739c8f50bf685351a1a94 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Mon, 31 Jan 2022 13:57:36 -0500
Subject: [PATCH 0140/1586] selinux: Fix selinux_sb_mnt_opts_compat()

selinux_sb_mnt_opts_compat() is called under the sb_lock spinlock and
shouldn't be performing any memory allocations.  Fix this by parsing the
sids at the same time we're chopping up the security mount options
string and then using the pre-parsed sids when doing the comparison.

Fixes: cc274ae7763d ("selinux: fix sleeping function called from invalid context")
Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount")
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 75 ++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9e3658e9e7ca7..85282ccc748f3 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -341,6 +341,10 @@ static void inode_free_security(struct inode *inode)
 
 struct selinux_mnt_opts {
 	const char *fscontext, *context, *rootcontext, *defcontext;
+	u32 fscontext_sid;
+	u32 context_sid;
+	u32 rootcontext_sid;
+	u32 defcontext_sid;
 };
 
 static void selinux_free_mnt_opts(void *mnt_opts)
@@ -597,15 +601,14 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag,
 	return 0;
 }
 
-static int parse_sid(struct super_block *sb, const char *s, u32 *sid,
-		     gfp_t gfp)
+static int parse_sid(struct super_block *sb, const char *s, u32 *sid)
 {
 	int rc = security_context_str_to_sid(&selinux_state, s,
-					     sid, gfp);
+					     sid, GFP_KERNEL);
 	if (rc)
 		pr_warn("SELinux: security_context_str_to_sid"
 		       "(%s) failed for (dev %s, type %s) errno=%d\n",
-		       s, sb->s_id, sb->s_type->name, rc);
+		       s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc);
 	return rc;
 }
 
@@ -672,8 +675,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 	 */
 	if (opts) {
 		if (opts->fscontext) {
-			rc = parse_sid(sb, opts->fscontext, &fscontext_sid,
-					GFP_KERNEL);
+			rc = parse_sid(sb, opts->fscontext, &fscontext_sid);
 			if (rc)
 				goto out;
 			if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
@@ -682,8 +684,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 			sbsec->flags |= FSCONTEXT_MNT;
 		}
 		if (opts->context) {
-			rc = parse_sid(sb, opts->context, &context_sid,
-					GFP_KERNEL);
+			rc = parse_sid(sb, opts->context, &context_sid);
 			if (rc)
 				goto out;
 			if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
@@ -692,8 +693,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 			sbsec->flags |= CONTEXT_MNT;
 		}
 		if (opts->rootcontext) {
-			rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid,
-					GFP_KERNEL);
+			rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid);
 			if (rc)
 				goto out;
 			if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
@@ -702,8 +702,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 			sbsec->flags |= ROOTCONTEXT_MNT;
 		}
 		if (opts->defcontext) {
-			rc = parse_sid(sb, opts->defcontext, &defcontext_sid,
-					GFP_KERNEL);
+			rc = parse_sid(sb, opts->defcontext, &defcontext_sid);
 			if (rc)
 				goto out;
 			if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
@@ -995,21 +994,29 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
 		if (opts->context || opts->defcontext)
 			goto err;
 		opts->context = s;
+		if (selinux_initialized(&selinux_state))
+			parse_sid(NULL, s, &opts->context_sid);
 		break;
 	case Opt_fscontext:
 		if (opts->fscontext)
 			goto err;
 		opts->fscontext = s;
+		if (selinux_initialized(&selinux_state))
+			parse_sid(NULL, s, &opts->fscontext_sid);
 		break;
 	case Opt_rootcontext:
 		if (opts->rootcontext)
 			goto err;
 		opts->rootcontext = s;
+		if (selinux_initialized(&selinux_state))
+			parse_sid(NULL, s, &opts->rootcontext_sid);
 		break;
 	case Opt_defcontext:
 		if (opts->context || opts->defcontext)
 			goto err;
 		opts->defcontext = s;
+		if (selinux_initialized(&selinux_state))
+			parse_sid(NULL, s, &opts->defcontext_sid);
 		break;
 	}
 
@@ -2647,8 +2654,6 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
 {
 	struct selinux_mnt_opts *opts = mnt_opts;
 	struct superblock_security_struct *sbsec = selinux_superblock(sb);
-	u32 sid;
-	int rc;
 
 	/*
 	 * Superblock not initialized (i.e. no options) - reject if any
@@ -2665,34 +2670,36 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
 		return (sbsec->flags & SE_MNTMASK) ? 1 : 0;
 
 	if (opts->fscontext) {
-		rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT);
-		if (rc)
+		if (opts->fscontext_sid == SECSID_NULL)
 			return 1;
-		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
+		else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
+				       opts->fscontext_sid))
 			return 1;
 	}
 	if (opts->context) {
-		rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT);
-		if (rc)
+		if (opts->context_sid == SECSID_NULL)
 			return 1;
-		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
+		else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
+				       opts->context_sid))
 			return 1;
 	}
 	if (opts->rootcontext) {
-		struct inode_security_struct *root_isec;
-
-		root_isec = backing_inode_security(sb->s_root);
-		rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT);
-		if (rc)
-			return 1;
-		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
+		if (opts->rootcontext_sid == SECSID_NULL)
 			return 1;
+		else {
+			struct inode_security_struct *root_isec;
+
+			root_isec = backing_inode_security(sb->s_root);
+			if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
+				       opts->rootcontext_sid))
+				return 1;
+		}
 	}
 	if (opts->defcontext) {
-		rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT);
-		if (rc)
+		if (opts->defcontext_sid == SECSID_NULL)
 			return 1;
-		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
+		else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
+				       opts->defcontext_sid))
 			return 1;
 	}
 	return 0;
@@ -2712,14 +2719,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 		return 0;
 
 	if (opts->fscontext) {
-		rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL);
+		rc = parse_sid(sb, opts->fscontext, &sid);
 		if (rc)
 			return rc;
 		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
 			goto out_bad_option;
 	}
 	if (opts->context) {
-		rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL);
+		rc = parse_sid(sb, opts->context, &sid);
 		if (rc)
 			return rc;
 		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
@@ -2728,14 +2735,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 	if (opts->rootcontext) {
 		struct inode_security_struct *root_isec;
 		root_isec = backing_inode_security(sb->s_root);
-		rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL);
+		rc = parse_sid(sb, opts->rootcontext, &sid);
 		if (rc)
 			return rc;
 		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
 			goto out_bad_option;
 	}
 	if (opts->defcontext) {
-		rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL);
+		rc = parse_sid(sb, opts->defcontext, &sid);
 		if (rc)
 			return rc;
 		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
-- 
GitLab


From 6bc1968c14e91e03c0851b9c5c5330d91305a853 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Mon, 31 Jan 2022 13:57:37 -0500
Subject: [PATCH 0141/1586] selinux: try to use preparsed sid before calling
 parse_sid()

Avoid unnecessary parsing of sids that have already been parsed via
selinux_sb_eat_lsm_opts().

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 88 +++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 85282ccc748f3..b60481192b380 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -675,36 +675,48 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 	 */
 	if (opts) {
 		if (opts->fscontext) {
-			rc = parse_sid(sb, opts->fscontext, &fscontext_sid);
-			if (rc)
-				goto out;
+			if (opts->fscontext_sid == SECSID_NULL) {
+				rc = parse_sid(sb, opts->fscontext, &fscontext_sid);
+				if (rc)
+					goto out;
+			} else
+				fscontext_sid = opts->fscontext_sid;
 			if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
 					fscontext_sid))
 				goto out_double_mount;
 			sbsec->flags |= FSCONTEXT_MNT;
 		}
 		if (opts->context) {
-			rc = parse_sid(sb, opts->context, &context_sid);
-			if (rc)
-				goto out;
+			if (opts->context_sid == SECSID_NULL) {
+				rc = parse_sid(sb, opts->context, &context_sid);
+				if (rc)
+					goto out;
+			} else
+				context_sid = opts->context_sid;
 			if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
 					context_sid))
 				goto out_double_mount;
 			sbsec->flags |= CONTEXT_MNT;
 		}
 		if (opts->rootcontext) {
-			rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid);
-			if (rc)
-				goto out;
+			if (opts->rootcontext_sid == SECSID_NULL) {
+				rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid);
+				if (rc)
+					goto out;
+			} else
+				rootcontext_sid = opts->rootcontext_sid;
 			if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
 					rootcontext_sid))
 				goto out_double_mount;
 			sbsec->flags |= ROOTCONTEXT_MNT;
 		}
 		if (opts->defcontext) {
-			rc = parse_sid(sb, opts->defcontext, &defcontext_sid);
-			if (rc)
-				goto out;
+			if (opts->defcontext_sid == SECSID_NULL) {
+				rc = parse_sid(sb, opts->defcontext, &defcontext_sid);
+				if (rc)
+					goto out;
+			} else
+				defcontext_sid = opts->defcontext_sid;
 			if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
 					defcontext_sid))
 				goto out_double_mount;
@@ -2709,7 +2721,6 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 {
 	struct selinux_mnt_opts *opts = mnt_opts;
 	struct superblock_security_struct *sbsec = selinux_superblock(sb);
-	u32 sid;
 	int rc;
 
 	if (!(sbsec->flags & SE_SBINITIALIZED))
@@ -2719,33 +2730,48 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 		return 0;
 
 	if (opts->fscontext) {
-		rc = parse_sid(sb, opts->fscontext, &sid);
-		if (rc)
-			return rc;
-		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
+		if (opts->fscontext_sid == SECSID_NULL) {
+			rc = parse_sid(sb, opts->fscontext,
+				       &opts->fscontext_sid);
+			if (rc)
+				return rc;
+		}
+		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
+			       opts->fscontext_sid))
 			goto out_bad_option;
 	}
 	if (opts->context) {
-		rc = parse_sid(sb, opts->context, &sid);
-		if (rc)
-			return rc;
-		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
+		if (opts->context_sid == SECSID_NULL) {
+			rc = parse_sid(sb, opts->context, &opts->context_sid);
+			if (rc)
+				return rc;
+		}
+		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
+			       opts->context_sid))
 			goto out_bad_option;
 	}
 	if (opts->rootcontext) {
 		struct inode_security_struct *root_isec;
 		root_isec = backing_inode_security(sb->s_root);
-		rc = parse_sid(sb, opts->rootcontext, &sid);
-		if (rc)
-			return rc;
-		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
+		if (opts->rootcontext_sid == SECSID_NULL) {
+			rc = parse_sid(sb, opts->rootcontext,
+				       &opts->rootcontext_sid);
+			if (rc)
+				return rc;
+		}
+		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
+			       opts->rootcontext_sid))
 			goto out_bad_option;
 	}
 	if (opts->defcontext) {
-		rc = parse_sid(sb, opts->defcontext, &sid);
-		if (rc)
-			return rc;
-		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
+		if (opts->defcontext_sid == SECSID_NULL) {
+			rc = parse_sid(sb, opts->defcontext,
+				       &opts->defcontext_sid);
+			if (rc)
+				return rc;
+		}
+		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
+			       opts->defcontext_sid))
 			goto out_bad_option;
 	}
 	return 0;
@@ -2843,6 +2869,10 @@ static int selinux_fs_context_dup(struct fs_context *fc,
 		if (!opts->defcontext)
 			return -ENOMEM;
 	}
+	opts->fscontext_sid = src->fscontext_sid;
+	opts->context_sid = src->context_sid;
+	opts->rootcontext_sid = src->rootcontext_sid;
+	opts->defcontext_sid = src->defcontext_sid;
 	return 0;
 }
 
-- 
GitLab


From dc6cdced47ce1edbca79bdc5a3cd4e435b5358dd Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Sat, 25 Dec 2021 09:51:46 +0800
Subject: [PATCH 0142/1586] docs/zh_CN: Add rbtree Chinese translation

Translate core-api/rbtree.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20211225015146.1535-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 +-
 .../translations/zh_CN/core-api/rbtree.rst    | 391 ++++++++++++++++++
 2 files changed, 392 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/rbtree.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index d10191c45cf15..26d9913fc8b60 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -42,6 +42,7 @@
    kref
    assoc_array
    xarray
+   rbtree
 
 Todolist:
 
@@ -49,7 +50,6 @@ Todolist:
 
    idr
    circular-buffers
-   rbtree
    generic-radix-tree
    packing
    bus-virt-phys-mapping
diff --git a/Documentation/translations/zh_CN/core-api/rbtree.rst b/Documentation/translations/zh_CN/core-api/rbtree.rst
new file mode 100644
index 0000000000000..a3e1555cb974d
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/rbtree.rst
@@ -0,0 +1,391 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/rbtree.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=========================
+Linux中的红黑树（rbtree）
+=========================
+
+
+:日期: 2007年1月18日
+:作者: Rob Landley <rob@landley.net>
+
+何为红黑树，它们有什么用？
+--------------------------
+
+红黑树是一种自平衡二叉搜索树，被用来存储可排序的键/值数据对。这与基数树（被用来高效
+存储稀疏数组，因此使用长整型下标来插入/访问/删除结点）和哈希表（没有保持排序因而无法
+容易地按序遍历，同时必须调节其大小和哈希函数，然而红黑树可以优雅地伸缩以便存储任意
+数量的键）不同。
+
+红黑树和AVL树类似，但在插入和删除时提供了更快的实时有界的最坏情况性能（分别最多两次
+旋转和三次旋转，来平衡树），查询时间轻微变慢（但时间复杂度仍然是O(log n)）。
+
+引用Linux每周新闻（Linux Weekly News）：
+
+    内核中有多处红黑树的使用案例。最后期限调度器和完全公平排队（CFQ）I/O调度器利用
+    红黑树跟踪请求；数据包CD/DVD驱动程序也是如此。高精度时钟代码使用一颗红黑树组织
+    未完成的定时器请求。ext3文件系统用红黑树跟踪目录项。虚拟内存区域（VMAs）、epoll
+    文件描述符、密码学密钥和在“分层令牌桶”调度器中的网络数据包都由红黑树跟踪。
+
+本文档涵盖了对Linux红黑树实现的使用方法。更多关于红黑树的性质和实现的信息，参见：
+
+  Linux每周新闻关于红黑树的文章
+    https://lwn.net/Articles/184495/
+
+  维基百科红黑树词条
+    https://en.wikipedia.org/wiki/Red-black_tree
+
+红黑树的Linux实现
+-----------------
+
+Linux的红黑树实现在文件“lib/rbtree.c”中。要使用它，需要“#include <linux/rbtree.h>”。
+
+Linux的红黑树实现对速度进行了优化，因此比传统的实现少一个间接层（有更好的缓存局部性）。
+每个rb_node结构体的实例嵌入在它管理的数据结构中，因此不需要靠指针来分离rb_node和它
+管理的数据结构。用户应该编写他们自己的树搜索和插入函数，来调用已提供的红黑树函数，
+而不是使用一个比较回调函数指针。加锁代码也留给红黑树的用户编写。
+
+创建一颗红黑树
+--------------
+
+红黑树中的数据结点是包含rb_node结构体成员的结构体::
+
+  struct mytype {
+  	struct rb_node node;
+  	char *keystring;
+  };
+
+当处理一个指向内嵌rb_node结构体的指针时，包住rb_node的结构体可用标准的container_of()
+宏访问。此外，个体成员可直接用rb_entry(node, type, member)访问。
+
+每颗红黑树的根是一个rb_root数据结构，它由以下方式初始化为空:
+
+  struct rb_root mytree = RB_ROOT;
+
+在一颗红黑树中搜索值
+--------------------
+
+为你的树写一个搜索函数是相当简单的：从树根开始，比较每个值，然后根据需要继续前往左边或
+右边的分支。
+
+示例::
+
+  struct mytype *my_search(struct rb_root *root, char *string)
+  {
+  	struct rb_node *node = root->rb_node;
+
+  	while (node) {
+  		struct mytype *data = container_of(node, struct mytype, node);
+		int result;
+
+		result = strcmp(string, data->keystring);
+
+		if (result < 0)
+  			node = node->rb_left;
+		else if (result > 0)
+  			node = node->rb_right;
+		else
+  			return data;
+	}
+	return NULL;
+  }
+
+在一颗红黑树中插入数据
+----------------------
+
+在树中插入数据的步骤包括：首先搜索插入新结点的位置，然后插入结点并对树再平衡
+（"recoloring"）。
+
+插入的搜索和上文的搜索不同，它要找到嫁接新结点的位置。新结点也需要一个指向它的父节点
+的链接，以达到再平衡的目的。
+
+示例::
+
+  int my_insert(struct rb_root *root, struct mytype *data)
+  {
+  	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+  	/* Figure out where to put new node */
+  	while (*new) {
+  		struct mytype *this = container_of(*new, struct mytype, node);
+  		int result = strcmp(data->keystring, this->keystring);
+
+		parent = *new;
+  		if (result < 0)
+  			new = &((*new)->rb_left);
+  		else if (result > 0)
+  			new = &((*new)->rb_right);
+  		else
+  			return FALSE;
+  	}
+
+  	/* Add new node and rebalance tree. */
+  	rb_link_node(&data->node, parent, new);
+  	rb_insert_color(&data->node, root);
+
+	return TRUE;
+  }
+
+在一颗红黑树中删除或替换已经存在的数据
+--------------------------------------
+
+若要从树中删除一个已经存在的结点，调用::
+
+  void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+示例::
+
+  struct mytype *data = mysearch(&mytree, "walrus");
+
+  if (data) {
+  	rb_erase(&data->node, &mytree);
+  	myfree(data);
+  }
+
+若要用一个新结点替换树中一个已经存在的键值相同的结点，调用::
+
+  void rb_replace_node(struct rb_node *old, struct rb_node *new,
+  			struct rb_root *tree);
+
+通过这种方式替换结点不会对树做重排序：如果新结点的键值和旧结点不同，红黑树可能被
+破坏。
+
+（按排序的顺序）遍历存储在红黑树中的元素
+----------------------------------------
+
+我们提供了四个函数，用于以排序的方式遍历一颗红黑树的内容。这些函数可以在任意红黑树
+上工作，并且不需要被修改或包装（除非加锁的目的）::
+
+  struct rb_node *rb_first(struct rb_root *tree);
+  struct rb_node *rb_last(struct rb_root *tree);
+  struct rb_node *rb_next(struct rb_node *node);
+  struct rb_node *rb_prev(struct rb_node *node);
+
+要开始迭代，需要使用一个指向树根的指针调用rb_first()或rb_last()，它将返回一个指向
+树中第一个或最后一个元素所包含的节点结构的指针。要继续的话，可以在当前结点上调用
+rb_next()或rb_prev()来获取下一个或上一个结点。当没有剩余的结点时，将返回NULL。
+
+迭代器函数返回一个指向被嵌入的rb_node结构体的指针，由此，包住rb_node的结构体可用
+标准的container_of()宏访问。此外，个体成员可直接用rb_entry(node, type, member)
+访问。
+
+示例::
+
+  struct rb_node *node;
+  for (node = rb_first(&mytree); node; node = rb_next(node))
+	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+带缓存的红黑树
+--------------
+
+计算最左边（最小的）结点是二叉搜索树的一个相当常见的任务，例如用于遍历，或用户根据
+他们自己的逻辑依赖一个特定的顺序。为此，用户可以使用'struct rb_root_cached'来优化
+时间复杂度为O(logN)的rb_first()的调用，以简单地获取指针，避免了潜在的昂贵的树迭代。
+维护操作的额外运行时间开销可忽略，不过内存占用较大。
+
+和rb_root结构体类似，带缓存的红黑树由以下方式初始化为空::
+
+  struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+带缓存的红黑树只是一个常规的rb_root，加上一个额外的指针来缓存最左边的节点。这使得
+rb_root_cached可以存在于rb_root存在的任何地方，并且只需增加几个接口来支持带缓存的
+树::
+
+  struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+  void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+  void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+操作和删除也有对应的带缓存的树的调用::
+
+  void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+				  bool, struct rb_augment_callbacks *);
+  void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+				 struct rb_augment_callbacks *);
+
+
+对增强型红黑树的支持
+--------------------
+
+增强型红黑树是一种在每个结点里存储了“一些”附加数据的红黑树，其中结点N的附加数据
+必须是以N为根的子树中所有结点的内容的函数。它是建立在红黑树基础设施之上的可选特性。
+想要使用这个特性的红黑树用户，插入和删除结点时必须调用增强型接口并提供增强型回调函数。
+
+实现增强型红黑树操作的C文件必须包含<linux/rbtree_augmented.h>而不是<linux/rbtree.h>。
+注意，linux/rbtree_augmented.h暴露了一些红黑树实现的细节而你不应依赖它们，请坚持
+使用文档记录的API，并且不要在头文件中包含<linux/rbtree_augmented.h>，以最小化你的
+用户意外地依赖这些实现细节的可能。
+
+插入时，用户必须更新通往被插入节点的路径上的增强信息，然后像往常一样调用rb_link_node()，
+然后是rb_augment_inserted()而不是平时的rb_insert_color()调用。如果
+rb_augment_inserted()再平衡了红黑树，它将回调至一个用户提供的函数来更新受影响的
+子树上的增强信息。
+
+删除一个结点时，用户必须调用rb_erase_augmented()而不是rb_erase()。
+rb_erase_augmented()回调至一个用户提供的函数来更新受影响的子树上的增强信息。
+
+在两种情况下，回调都是通过rb_augment_callbacks结构体提供的。必须定义3个回调：
+
+- 一个传播回调，它更新一个给定结点和它的祖先们的增强数据，直到一个给定的停止点
+  （如果是NULL，将更新一路更新到树根）。
+
+- 一个复制回调，它将一颗给定子树的增强数据复制到一个新指定的子树树根。
+
+- 一个树旋转回调，它将一颗给定的子树的增强值复制到新指定的子树树根上，并重新计算
+  先前的子树树根的增强值。
+
+rb_erase_augmented()编译后的代码可能会内联传播、复制回调，这将导致函数体积更大，
+因此每个增强型红黑树的用户应该只有一个rb_erase_augmented()的调用点，以限制编译后
+的代码大小。
+
+
+使用示例
+^^^^^^^^
+
+区间树是增强型红黑树的一个例子。参考Cormen，Leiserson，Rivest和Stein写的
+《算法导论》。区间树的更多细节：
+
+经典的红黑树只有一个键，它不能直接用来存储像[lo:hi]这样的区间范围，也不能快速查找
+与新的lo:hi重叠的部分，或者查找是否有与新的lo:hi完全匹配的部分。
+
+然而，红黑树可以被增强，以一种结构化的方式来存储这种区间范围，从而使高效的查找和
+精确匹配成为可能。
+
+这个存储在每个节点中的“额外信息”是其所有后代结点中的最大hi（max_hi）值。这个信息
+可以保持在每个结点上，只需查看一下该结点和它的直系子结点们。这将被用于时间复杂度
+为O(log n)的最低匹配查找（所有可能的匹配中最低的起始地址），就像这样::
+
+  struct interval_tree_node *
+  interval_tree_first_match(struct rb_root *root,
+			    unsigned long start, unsigned long last)
+  {
+	struct interval_tree_node *node;
+
+	if (!root->rb_node)
+		return NULL;
+	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+	while (true) {
+		if (node->rb.rb_left) {
+			struct interval_tree_node *left =
+				rb_entry(node->rb.rb_left,
+					 struct interval_tree_node, rb);
+			if (left->__subtree_last >= start) {
+				/*
+				 * Some nodes in left subtree satisfy Cond2.
+				 * Iterate to find the leftmost such node N.
+				 * If it also satisfies Cond1, that's the match
+				 * we are looking for. Otherwise, there is no
+				 * matching interval as nodes to the right of N
+				 * can't satisfy Cond1 either.
+				 */
+				node = left;
+				continue;
+			}
+		}
+		if (node->start <= last) {		/* Cond1 */
+			if (node->last >= start)	/* Cond2 */
+				return node;	/* node is leftmost match */
+			if (node->rb.rb_right) {
+				node = rb_entry(node->rb.rb_right,
+					struct interval_tree_node, rb);
+				if (node->__subtree_last >= start)
+					continue;
+			}
+		}
+		return NULL;	/* No match */
+	}
+  }
+
+插入/删除是通过以下增强型回调来定义的::
+
+  static inline unsigned long
+  compute_subtree_last(struct interval_tree_node *node)
+  {
+	unsigned long max = node->last, subtree_last;
+	if (node->rb.rb_left) {
+		subtree_last = rb_entry(node->rb.rb_left,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	if (node->rb.rb_right) {
+		subtree_last = rb_entry(node->rb.rb_right,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	return max;
+  }
+
+  static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+  {
+	while (rb != stop) {
+		struct interval_tree_node *node =
+			rb_entry(rb, struct interval_tree_node, rb);
+		unsigned long subtree_last = compute_subtree_last(node);
+		if (node->__subtree_last == subtree_last)
+			break;
+		node->__subtree_last = subtree_last;
+		rb = rb_parent(&node->rb);
+	}
+  }
+
+  static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+  }
+
+  static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+	old->__subtree_last = compute_subtree_last(old);
+  }
+
+  static const struct rb_augment_callbacks augment_callbacks = {
+	augment_propagate, augment_copy, augment_rotate
+  };
+
+  void interval_tree_insert(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	unsigned long start = node->start, last = node->last;
+	struct interval_tree_node *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+		if (parent->__subtree_last < last)
+			parent->__subtree_last = last;
+		if (start < parent->start)
+			link = &parent->rb.rb_left;
+		else
+			link = &parent->rb.rb_right;
+	}
+
+	node->__subtree_last = last;
+	rb_link_node(&node->rb, rb_parent, link);
+	rb_insert_augmented(&node->rb, root, &augment_callbacks);
+  }
+
+  void interval_tree_remove(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	rb_erase_augmented(&node->rb, root, &augment_callbacks);
+  }
-- 
GitLab


From 5dbbc145d24ac68fc481fc6fe0e4ec3d7a4b8007 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Tue, 21 Dec 2021 11:18:18 +0800
Subject: [PATCH 0143/1586] docs: scheduler: Fix outdated parameter of
 rebalance_domains

According to the function prototype of rebalance_domains(), its first
parameter is *rq* and the document need to be updated.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Link: https://lore.kernel.org/r/20211221031818.23186-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/sched-domains.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index 84dcdcd2911c6..e57ad28301bde 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -37,10 +37,10 @@ rebalancing event for the current runqueue has arrived. The actual load
 balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
 in softirq context (SCHED_SOFTIRQ).
 
-The latter function takes two arguments: the current CPU and whether it was idle
-at the time the scheduler_tick() happened and iterates over all sched domains
-our CPU is on, starting from its base domain and going up the ->parent chain.
-While doing that, it checks to see if the current domain has exhausted its
+The latter function takes two arguments: the runqueue of current CPU and whether
+the CPU was idle at the time the scheduler_tick() happened and iterates over all
+sched domains our CPU is on, starting from its base domain and going up the ->parent
+chain. While doing that, it checks to see if the current domain has exhausted its
 rebalance interval. If so, it runs load_balance() on that domain. It then checks
 the parent sched_domain (if it exists), and the parent of the parent and so
 forth.
-- 
GitLab


From 0e326df069802dc48e4f095f889cb780e4beaba6 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Thu, 27 Jan 2022 10:56:13 -0500
Subject: [PATCH 0144/1586] selinux: various sparse fixes

When running the SELinux code through sparse, there are a handful of
warnings.  This patch resolves some of these warnings caused by
"__rcu" mismatches.

 % make W=1 C=1 security/selinux/

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c   | 6 +++---
 security/selinux/ibpkey.c  | 2 +-
 security/selinux/netnode.c | 5 +++--
 security/selinux/netport.c | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b60481192b380..b24a1aeeedd43 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2553,7 +2553,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 	if (rc) {
 		clear_itimer();
 
-		spin_lock_irq(&current->sighand->siglock);
+		spin_lock_irq(&unrcu_pointer(current->sighand)->siglock);
 		if (!fatal_signal_pending(current)) {
 			flush_sigqueue(&current->pending);
 			flush_sigqueue(&current->signal->shared_pending);
@@ -2561,13 +2561,13 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 			sigemptyset(&current->blocked);
 			recalc_sigpending();
 		}
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&unrcu_pointer(current->sighand)->siglock);
 	}
 
 	/* Wake up the parent if it is waiting so that it can recheck
 	 * wait permission to the new task SID. */
 	read_lock(&tasklist_lock);
-	__wake_up_parent(current, current->real_parent);
+	__wake_up_parent(current, unrcu_pointer(current->real_parent));
 	read_unlock(&tasklist_lock);
 }
 
diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c
index 20b3b22438205..5839ca7bb9c75 100644
--- a/security/selinux/ibpkey.c
+++ b/security/selinux/ibpkey.c
@@ -104,7 +104,7 @@ static void sel_ib_pkey_insert(struct sel_ib_pkey *pkey)
 
 		tail = list_entry(
 			rcu_dereference_protected(
-				sel_ib_pkey_hash[idx].list.prev,
+				list_tail_rcu(&sel_ib_pkey_hash[idx].list),
 				lockdep_is_held(&sel_ib_pkey_lock)),
 			struct sel_ib_pkey, list);
 		list_del_rcu(&tail->list);
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 889552db0d31a..0ac7df9a93677 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -164,8 +164,9 @@ static void sel_netnode_insert(struct sel_netnode *node)
 	if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
 		struct sel_netnode *tail;
 		tail = list_entry(
-			rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
-						  lockdep_is_held(&sel_netnode_lock)),
+			rcu_dereference_protected(
+				list_tail_rcu(&sel_netnode_hash[idx].list),
+				lockdep_is_held(&sel_netnode_lock)),
 			struct sel_netnode, list);
 		list_del_rcu(&tail->list);
 		kfree_rcu(tail, rcu);
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index 9ba09d11c0f5b..8eec6347cf012 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -113,7 +113,7 @@ static void sel_netport_insert(struct sel_netport *port)
 		struct sel_netport *tail;
 		tail = list_entry(
 			rcu_dereference_protected(
-				sel_netport_hash[idx].list.prev,
+				list_tail_rcu(&sel_netport_hash[idx].list),
 				lockdep_is_held(&sel_netport_lock)),
 			struct sel_netport, list);
 		list_del_rcu(&tail->list);
-- 
GitLab


From 4d266c247d56751c2c97e0c411212b59e90922fc Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Date: Mon, 13 Dec 2021 11:40:24 +0530
Subject: [PATCH 0145/1586] rcu/exp: Fix check for idle context in
 rcu_exp_handler

For PREEMPT_RCU, the rcu_exp_handler() function checks
whether the current CPU is in idle, by calling
rcu_dynticks_curr_cpu_in_eqs(). However, rcu_exp_handler()
is called in IPI handler context. So, it should be checking
the idle context using rcu_is_cpu_rrupt_from_idle(). Fix this
by using rcu_is_cpu_rrupt_from_idle() instead of
rcu_dynticks_curr_cpu_in_eqs(). Non-preempt configuration
already uses the correct check.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_exp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 237a79989abae..1568c8ef185b2 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -656,7 +656,7 @@ static void rcu_exp_handler(void *unused)
 	 */
 	if (!depth) {
 		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
-		    rcu_dynticks_curr_cpu_in_eqs()) {
+		    rcu_is_cpu_rrupt_from_idle()) {
 			rcu_report_exp_rdp(rdp);
 		} else {
 			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
-- 
GitLab


From 63c564da11cbed96ec6cf0b5faf6af0b7e3624d2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 13 Dec 2021 21:00:02 -0800
Subject: [PATCH 0146/1586] rcu: Mark ->expmask access in
 synchronize_rcu_expedited_wait()

This commit adds a READ_ONCE() to an access to the rcu_node structure's
->expmask field to prevent compiler mischief.  Detected by KCSAN.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_exp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 1568c8ef185b2..60197ea24ceb9 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -502,7 +502,8 @@ static void synchronize_rcu_expedited_wait(void)
 		if (synchronize_rcu_expedited_wait_once(1))
 			return;
 		rcu_for_each_leaf_node(rnp) {
-			for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+			mask = READ_ONCE(rnp->expmask);
+			for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
 				rdp = per_cpu_ptr(&rcu_data, cpu);
 				if (rdp->rcu_forced_tick_exp)
 					continue;
-- 
GitLab


From a47f9f131dfe4f765e385fa90e13032eadb00bac Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 13 Dec 2021 11:05:07 -0800
Subject: [PATCH 0147/1586] rcu: Mark accesses to boost_starttime

The boost_starttime shared variable has conflicting unmarked C-language
accesses, which are dangerous at best.  This commit therefore adds
appropriate marking.  This was found by KCSAN.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 422f7e4cc08de..829ae0b7d3c04 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -997,7 +997,7 @@ static int rcu_torture_boost(void *arg)
 			goto checkwait;
 
 		/* Wait for the next test interval. */
-		oldstarttime = boost_starttime;
+		oldstarttime = READ_ONCE(boost_starttime);
 		while (time_before(jiffies, oldstarttime)) {
 			schedule_timeout_interruptible(oldstarttime - jiffies);
 			if (stutter_wait("rcu_torture_boost"))
@@ -1041,10 +1041,11 @@ static int rcu_torture_boost(void *arg)
 		 * interval.  Besides, we are running at RT priority,
 		 * so delays should be relatively rare.
 		 */
-		while (oldstarttime == boost_starttime && !kthread_should_stop()) {
+		while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) {
 			if (mutex_trylock(&boost_mutex)) {
 				if (oldstarttime == boost_starttime) {
-					boost_starttime = jiffies + test_boost_interval * HZ;
+					WRITE_ONCE(boost_starttime,
+						   jiffies + test_boost_interval * HZ);
 					n_rcu_torture_boosts++;
 				}
 				mutex_unlock(&boost_mutex);
-- 
GitLab


From 02e3024175274ed4bf7912e7a1281b300cec76b5 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Date: Sat, 11 Dec 2021 22:31:39 +0530
Subject: [PATCH 0148/1586] rcu/nocb: Handle concurrent nocb kthreads creation

When multiple CPUs in the same nocb gp/cb group concurrently
come online, they might try to concurrently create the same
rcuog kthread. Fix this by using nocb gp CPU's spawn mutex to
provide mutual exclusion for the rcuog kthread creation code.

[ paulmck: Whitespace fixes per kernel test robot feedback. ]

Acked-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.h      | 2 ++
 kernel/rcu/tree_nocb.h | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd085..24dd4b0d805f1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -203,6 +203,8 @@ struct rcu_data {
 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
 	struct timer_list nocb_timer;	/* Enforce finite deferral. */
 	unsigned long nocb_gp_adv_time;	/* Last call_rcu() CB adv (jiffies). */
+	struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */
+					    /* spawning */
 
 	/* The following fields are used by call_rcu, hence own cacheline. */
 	raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp;
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index eeafb546a7a09..1e40519d1a05a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1226,6 +1226,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 	raw_spin_lock_init(&rdp->nocb_gp_lock);
 	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
 	rcu_cblist_init(&rdp->nocb_bypass);
+	mutex_init(&rdp->nocb_gp_kthread_mutex);
 }
 
 /*
@@ -1248,13 +1249,17 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 
 	/* If we didn't spawn the GP kthread first, reorganize! */
 	rdp_gp = rdp->nocb_gp_rdp;
+	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
 	if (!rdp_gp->nocb_gp_kthread) {
 		t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
 				"rcuog/%d", rdp_gp->cpu);
-		if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
+		if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
+			mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
 			return;
+		}
 		WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
 	}
+	mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
 
 	/* Spawn the kthread for this CPU. */
 	t = kthread_run(rcu_nocb_cb_kthread, rdp,
-- 
GitLab


From eae9f147a4b02e132187a2d88a403b9ccc28212a Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Date: Mon, 13 Dec 2021 12:32:09 +0530
Subject: [PATCH 0149/1586] rcu: Remove unused rcu_state.boost

Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.h        | 5 ++---
 kernel/rcu/tree_plugin.h | 2 --
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 24dd4b0d805f1..e9990945483f1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -304,9 +304,8 @@ struct rcu_state {
 
 	/* The following fields are guarded by the root rcu_node's lock. */
 
-	u8	boost ____cacheline_internodealigned_in_smp;
-						/* Subject to priority boost. */
-	unsigned long gp_seq;			/* Grace-period sequence #. */
+	unsigned long gp_seq ____cacheline_internodealigned_in_smp;
+						/* Grace-period sequence #. */
 	unsigned long gp_max;			/* Maximum GP duration in */
 						/*  jiffies. */
 	struct task_struct *gp_kthread;		/* Task for grace periods. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a15..109429e70a642 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1175,8 +1175,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
 		return;
 
-	rcu_state.boost = 1;
-
 	t = kthread_create(rcu_boost_kthread, (void *)rnp,
 			   "rcub/%d", rnp_index);
 	if (WARN_ON_ONCE(IS_ERR(t)))
-- 
GitLab


From 4b4399b2450de38916718ba9947e6cdb69c99c55 Mon Sep 17 00:00:00 2001
From: Zqiang <qiang1.zhang@intel.com>
Date: Wed, 29 Dec 2021 00:05:10 +0800
Subject: [PATCH 0150/1586] rcu: Create per-cpu rcuc kthreads only when
 rcutree.use_softirq=0

The per-CPU "rcuc" kthreads are used only by kernels booted with
rcutree.use_softirq=0, but they are nevertheless unconditionally created
by kernels built with CONFIG_RCU_BOOST=y.  This results in "rcuc"
kthreads being created that are never actually used.  This commit
therefore refrains from creating these kthreads unless the kernel
is actually booted with rcutree.use_softirq=0.

Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0b..4e5e37e5ee3c9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2894,7 +2894,7 @@ static int __init rcu_spawn_core_kthreads(void)
 
 	for_each_possible_cpu(cpu)
 		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+	if (use_softirq)
 		return 0;
 	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
 		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
-- 
GitLab


From c8db27dd0ea8071d2ea29a1a401c4ccc611ec6c1 Mon Sep 17 00:00:00 2001
From: Alison Chaiken <achaiken@aurora.tech>
Date: Tue, 11 Jan 2022 15:32:50 -0800
Subject: [PATCH 0151/1586] rcu: Move kthread_prio bounds-check to a separate
 function

Move the bounds-check of the kthread_prio cmdline parameter to a new
function in order to faciliate a different callsite.

Signed-off-by: Alison Chaiken <achaiken@aurora.tech>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4e5e37e5ee3c9..5bf0312f66760 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4440,26 +4440,10 @@ static int rcu_pm_notify(struct notifier_block *self,
 static int __init rcu_spawn_gp_kthread(void)
 {
 	unsigned long flags;
-	int kthread_prio_in = kthread_prio;
 	struct rcu_node *rnp;
 	struct sched_param sp;
 	struct task_struct *t;
 
-	/* Force priority into range. */
-	if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
-	    && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
-		kthread_prio = 2;
-	else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
-		kthread_prio = 1;
-	else if (kthread_prio < 0)
-		kthread_prio = 0;
-	else if (kthread_prio > 99)
-		kthread_prio = 99;
-
-	if (kthread_prio != kthread_prio_in)
-		pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
-			 kthread_prio, kthread_prio_in);
-
 	rcu_scheduler_fully_active = 1;
 	t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
@@ -4584,6 +4568,28 @@ static void __init rcu_init_one(void)
 	}
 }
 
+/*
+ * Force priority from the kernel command-line into range.
+ */
+static void __init sanitize_kthread_prio(void)
+{
+	int kthread_prio_in = kthread_prio;
+
+	if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
+	    && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
+		kthread_prio = 2;
+	else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
+		kthread_prio = 1;
+	else if (kthread_prio < 0)
+		kthread_prio = 0;
+	else if (kthread_prio > 99)
+		kthread_prio = 99;
+
+	if (kthread_prio != kthread_prio_in)
+		pr_alert("%s: Limited prio to %d from %d\n",
+			 __func__, kthread_prio, kthread_prio_in);
+}
+
 /*
  * Compute the rcu_node tree geometry from kernel parameters.  This cannot
  * replace the definitions in tree.h because those are needed to size
@@ -4744,6 +4750,7 @@ void __init rcu_init(void)
 
 	kfree_rcu_batch_init();
 	rcu_bootup_announce();
+	sanitize_kthread_prio();
 	rcu_init_geometry();
 	rcu_init_one();
 	if (dump_tree)
-- 
GitLab


From 54577e23fa0791599db1a3d86fc8e7a205d3da75 Mon Sep 17 00:00:00 2001
From: Alison Chaiken <achaiken@aurora.tech>
Date: Tue, 11 Jan 2022 15:32:51 -0800
Subject: [PATCH 0152/1586] rcu: Make priority of grace-period thread
 consistent

The priority of RCU grace period threads is set to kthread_prio when
they are launched from rcu_spawn_gp_kthread().  The same is not true
of rcu_spawn_one_nocb_kthread().  Accordingly, add priority elevation
to rcu_spawn_one_nocb_kthread().

Signed-off-by: Alison Chaiken <achaiken@aurora.tech>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_nocb.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 1e40519d1a05a..ea889cbfc3b95 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1239,6 +1239,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	struct rcu_data *rdp_gp;
 	struct task_struct *t;
+	struct sched_param sp;
 
 	if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
 		return;
@@ -1248,6 +1249,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 		return;
 
 	/* If we didn't spawn the GP kthread first, reorganize! */
+	sp.sched_priority = kthread_prio;
 	rdp_gp = rdp->nocb_gp_rdp;
 	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
 	if (!rdp_gp->nocb_gp_kthread) {
@@ -1258,6 +1260,8 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 			return;
 		}
 		WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
+		if (kthread_prio)
+			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	}
 	mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
 
-- 
GitLab


From c8b16a65267e35ecc5621dbc81cbe7e5b0992fce Mon Sep 17 00:00:00 2001
From: Alison Chaiken <achaiken@aurora.tech>
Date: Tue, 11 Jan 2022 15:32:52 -0800
Subject: [PATCH 0153/1586] rcu: Elevate priority of offloaded callback threads

When CONFIG_PREEMPT_RT=y, the rcutree.kthread_prio command-line
parameter signals initialization code to boost the priority of rcuc
callbacks to the designated value.  With the additional
CONFIG_RCU_NOCB_CPU=y configuration and an additional rcu_nocbs
command-line parameter, the callbacks on the listed cores are
offloaded to new rcuop kthreads that are not pinned to the cores whose
post-grace-period work is performed.  While the rcuop kthreads perform
the same function as the rcuc kthreads they offload, the kthread_prio
parameter only boosts the priority of the rcuc kthreads.  Fix this
inconsistency by elevating rcuop kthreads to the same priority as the rcuc
kthreads.

Signed-off-by: Alison Chaiken <achaiken@aurora.tech>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c      | 2 +-
 kernel/rcu/tree_nocb.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5bf0312f66760..9e4c5b281f003 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -153,7 +153,7 @@ static void sync_sched_exp_online_cleanup(int cpu);
 static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
 static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
 
-/* rcuc/rcub kthread realtime priority */
+/* rcuc/rcub/rcuop kthread realtime priority */
 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
 module_param(kthread_prio, int, 0444);
 
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index ea889cbfc3b95..547c41437c767 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1270,6 +1270,9 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
 			"rcuo%c/%d", rcu_state.abbr, cpu);
 	if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
 		return;
+
+	if (kthread_prio)
+		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	WRITE_ONCE(rdp->nocb_cb_kthread, t);
 	WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
 }
-- 
GitLab


From a469948b20ed9898f61e9906fd9b135ad853bb94 Mon Sep 17 00:00:00 2001
From: Alison Chaiken <achaiken@aurora.tech>
Date: Tue, 11 Jan 2022 15:32:53 -0800
Subject: [PATCH 0154/1586] rcu: Update documentation regarding kthread_prio
 cmdline parameter

Inform readers that the priority of RCU no-callback threads will also
be boosted.

Signed-off-by: Alison Chaiken <achaiken@aurora.tech>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f5a27f067db9e..8e2e65122b993 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4504,6 +4504,8 @@
 			(the least-favored priority).  Otherwise, when
 			RCU_BOOST is not set, valid values are 0-99 and
 			the default is zero (non-realtime operation).
+			When RCU_NOCB_CPU is set, also adjust the
+			priority of NOCB callback kthreads.
 
 	rcutree.rcu_nocb_gp_stride= [KNL]
 			Set the number of NOCB callback kthreads in
-- 
GitLab


From 10c535787436d62ea28156a4b91365fd89b5a432 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 21 Jan 2022 12:40:08 -0800
Subject: [PATCH 0155/1586] rcu: Don't deboost before reporting expedited
 quiescent state

Currently rcu_preempt_deferred_qs_irqrestore() releases rnp->boost_mtx
before reporting the expedited quiescent state.  Under heavy real-time
load, this can result in this function being preempted before the
quiescent state is reported, which can in turn prevent the expedited grace
period from completing.  Tim Murray reports that the resulting expedited
grace periods can take hundreds of milliseconds and even more than one
second, when they should normally complete in less than a millisecond.

This was fine given that there were no particular response-time
constraints for synchronize_rcu_expedited(), as it was designed
for throughput rather than latency.  However, some users now need
sub-100-millisecond response-time constratints.

This patch therefore follows Neeraj's suggestion (seconded by Tim and
by Uladzislau Rezki) of simply reversing the two operations.

Reported-by: Tim Murray <timmurray@google.com>
Reported-by: Joel Fernandes <joelaf@google.com>
Reported-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Tested-by: Tim Murray <timmurray@google.com>
Cc: Todd Kjos <tkjos@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: <stable@vger.kernel.org> # 5.4.x
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_plugin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 109429e70a642..02ac057ba3f83 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 
-		/* Unboost if we were boosted. */
-		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
-			rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
-
 		/*
 		 * If this was the last task on the expedited lists,
 		 * then we need to report up the rcu_node hierarchy.
 		 */
 		if (!empty_exp && empty_exp_now)
 			rcu_report_exp_rnp(rnp, true);
+
+		/* Unboost if we were boosted. */
+		if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
+			rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
 	} else {
 		local_irq_restore(flags);
 	}
-- 
GitLab


From c9515875850fefcc79492c5189fe8431e75ddec5 Mon Sep 17 00:00:00 2001
From: Zqiang <qiang1.zhang@intel.com>
Date: Tue, 25 Jan 2022 10:47:44 +0800
Subject: [PATCH 0156/1586] rcu: Add per-CPU rcuc task dumps to RCU CPU stall
 warnings

When the rcutree.use_softirq kernel boot parameter is set to zero, all
RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
If these kthreads are being starved, quiescent states will not be
reported, which in turn means that the grace period will not end, which
can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
stack traces of stalled CPUs' rcuc kthreads, which can help identify
what is preventing those kthreads from running.

Suggested-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        |  3 +++
 kernel/rcu/tree.h        |  1 +
 kernel/rcu/tree_plugin.h |  3 +++
 kernel/rcu/tree_stall.h  | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9e4c5b281f003..bd9b2af247abd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
 	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
 	int spincnt;
 
 	trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
 	for (spincnt = 0; spincnt < 10; spincnt++) {
+		WRITE_ONCE(*j, jiffies);
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
@@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
 	schedule_timeout_idle(2);
 	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
 	*statusp = RCU_KTHREAD_WAITING;
+	WRITE_ONCE(*j, jiffies);
 }
 
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e9990945483f1..b84cc5742c317 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -239,6 +239,7 @@ struct rcu_data {
 					/* rcuc per-CPU kthread or NULL. */
 	unsigned int rcu_cpu_kthread_status;
 	char rcu_cpu_has_work;
+	unsigned long rcuc_activity;
 
 	/* 7) Diagnostic data, including RCU CPU stall warnings. */
 	unsigned int softirq_snap;	/* Snapshot of softirq activity. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 02ac057ba3f83..8167cab1bffc8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
  */
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 #ifdef CONFIG_RCU_BOOST
 	struct sched_param sp;
 
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+
+	WRITE_ONCE(rdp->rcuc_activity, jiffies);
 }
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030b..0c5d8516516af 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
 	return j > 2 * HZ;
 }
 
+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+	unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+	if (jp)
+		*jp = j;
+	return j > 2 * HZ;
+}
+
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
 	       falsepositive ? " (false positive?)" : "");
 }
 
+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+	int cpu;
+	unsigned long j;
+	struct task_struct *rcuc;
+
+	rcuc = rdp->rcu_cpu_kthread_task;
+	if (!rcuc)
+		return;
+
+	cpu = task_cpu(rcuc);
+	if (cpu_is_offline(cpu) || idle_cpu(cpu))
+		return;
+
+	if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+		return;
+
+	pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+	sched_show_task(rcuc);
+	if (!trigger_single_cpu_backtrace(cpu))
+		dump_cpu_task(cpu);
+}
+
 /* Complain about starvation of grace-period kthread.  */
 static void rcu_check_gp_kthread_starvation(void)
 {
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
 	rcu_check_gp_kthread_expired_fqs_timer();
 	rcu_check_gp_kthread_starvation();
 
+	if (!use_softirq)
+		rcuc_kthread_dump(rdp);
+
 	rcu_dump_cpu_stacks();
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-- 
GitLab


From 6f81bd6a4e305d15d9c2a6a350e2876a7a814d7e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 6 Dec 2021 15:12:14 -0800
Subject: [PATCH 0157/1586] rcutorture: Print message before invoking
 ->cb_barrier()

The various ->cb_barrier() functions, for example, rcu_barrier(),
sometimes cause rcutorture hangs.  But currently, the last console
message is the unenlightening "Stopping rcu_torture_stats".  This commit
therefore prints a message of the form "rcu_torture_cleanup: Invoking
rcu_barrier+0x0/0x1e0()" to help point people in the right direction.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 422f7e4cc08de..00400aef58184 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2905,8 +2905,10 @@ rcu_torture_cleanup(void)
 	int i;
 
 	if (torture_cleanup_begin()) {
-		if (cur_ops->cb_barrier != NULL)
+		if (cur_ops->cb_barrier != NULL) {
+			pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
 			cur_ops->cb_barrier();
+		}
 		return;
 	}
 	if (!cur_ops) {
@@ -2961,8 +2963,10 @@ rcu_torture_cleanup(void)
 	 * Wait for all RCU callbacks to fire, then do torture-type-specific
 	 * cleanup operations.
 	 */
-	if (cur_ops->cb_barrier != NULL)
+	if (cur_ops->cb_barrier != NULL) {
+		pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
 		cur_ops->cb_barrier();
+	}
 	if (cur_ops->cleanup != NULL)
 		cur_ops->cleanup();
 
-- 
GitLab


From 2b4a7f20f160e6440848c62a70ee5dc5237a2c8b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 16 Dec 2021 12:23:31 -0800
Subject: [PATCH 0158/1586] torture: Distinguish kthread stopping and being
 asked to stop

Right now, if a given kthread (call it "kthread") realizes that it needs
to stop, "Stopping kthread" is written to the console.  When the cleanup
code decides that it is time to stop that kthread, "Stopping kthread
tasks" is written to the console.  These two events might happen in
either order, especially in the case of time-based torture-test shutdown.

But it is hard to distinguish these, especially for those unfamiliar with
the torture tests.  This commit therefore changes the first case from
"Stopping kthread" to "kthread is stopping" to make things more clear.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index ef27a6c824514..f55d803f995d4 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -911,7 +911,7 @@ void torture_kthread_stopping(char *title)
 {
 	char buf[128];
 
-	snprintf(buf, sizeof(buf), "Stopping %s", title);
+	snprintf(buf, sizeof(buf), "%s is stopping", title);
 	VERBOSE_TOROUT_STRING(buf);
 	while (!kthread_should_stop()) {
 		torture_shutdown_absorb(title);
-- 
GitLab


From 05b724655bf1908abf531dd0bce455e55703a3a8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 16 Dec 2021 15:36:02 -0800
Subject: [PATCH 0159/1586] rcutorture: Increase visibility of forward-progress
 hangs

This commit adds a few pr_alert() calls to rcutorture's forward-progress
testing in order to better diagnose shutdown-time hangs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 00400aef58184..fefc3fa1a9c2a 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2281,6 +2281,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	unsigned long stopat;
 	static DEFINE_TORTURE_RANDOM(trs);
 
+	pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
 	if (!cur_ops->sync)
 		return; // Cannot do need_resched() forward progress testing without ->sync.
 	if (cur_ops->call && cur_ops->cb_barrier) {
@@ -2325,6 +2326,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	if (selfpropcb) {
 		WRITE_ONCE(fcs.stop, 1);
 		cur_ops->sync(); /* Wait for running CB to complete. */
+		pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
 		cur_ops->cb_barrier(); /* Wait for queued callbacks. */
 	}
 
@@ -2353,6 +2355,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 	unsigned long stopat;
 	unsigned long stoppedat;
 
+	pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
 	if (READ_ONCE(rcu_fwd_emergency_stop))
 		return; /* Get out of the way quickly, no GP wait! */
 	if (!cur_ops->call)
@@ -2414,6 +2417,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 	n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb);
 	cver = READ_ONCE(rcu_torture_current_version) - cver;
 	gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
+	pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id);
 	cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
 	(void)rcu_torture_fwd_prog_cbfree(rfp);
 
-- 
GitLab


From e22ef8df415d924428e35c9c112526306e684adc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 17 Dec 2021 12:33:53 -0800
Subject: [PATCH 0160/1586] rcutorture: Make rcu_fwd_cb_nodelay be a counter

Back when only one rcutorture kthread could do forward-progress testing,
it was just fine for rcu_fwd_cb_nodelay to be a non-atomic bool.  It was
set at the start of forward-progress testing and cleared at the end.
But now that there are multiple threads, the value can be cleared while
one of the threads is still doing forward-progress testing.  This commit
therefore makes rcu_fwd_cb_nodelay be an atomic counter, replacing the
WRITE_ONCE() operations with atomic_inc() and atomic_dec().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index fefc3fa1a9c2a..afe95c6948957 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked;	/* Barrier callbacks invoked. */
 static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
 static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
 
-static bool rcu_fwd_cb_nodelay;		/* Short rcu_torture_delay() delays. */
+static atomic_t rcu_fwd_cb_nodelay;	/* Short rcu_torture_delay() delays. */
 
 /*
  * Allocate an element from the rcu_tortures pool.
@@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
 	 * period, and we want a long delay occasionally to trigger
 	 * force_quiescent_state. */
 
-	if (!READ_ONCE(rcu_fwd_cb_nodelay) &&
+	if (!atomic_read(&rcu_fwd_cb_nodelay) &&
 	    !(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
 		started = cur_ops->get_gp_seq();
 		ts = rcu_trace_clock_local();
@@ -1276,7 +1276,7 @@ rcu_torture_writer(void *arg)
 		boot_ended = rcu_inkernel_boot_has_ended();
 		stutter_waited = stutter_wait("rcu_torture_writer");
 		if (stutter_waited &&
-		    !READ_ONCE(rcu_fwd_cb_nodelay) &&
+		    !atomic_read(&rcu_fwd_cb_nodelay) &&
 		    !cur_ops->slow_gps &&
 		    !torture_must_stop() &&
 		    boot_ended)
@@ -2290,7 +2290,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 	}
 
 	/* Tight loop containing cond_resched(). */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+	atomic_inc(&rcu_fwd_cb_nodelay);
 	cur_ops->sync(); /* Later readers see above write. */
 	if  (selfpropcb) {
 		WRITE_ONCE(fcs.stop, 0);
@@ -2335,7 +2335,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp,
 		destroy_rcu_head_on_stack(&fcs.rh);
 	}
 	schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+	atomic_dec(&rcu_fwd_cb_nodelay);
 }
 
 /* Carry out call_rcu() forward-progress testing. */
@@ -2362,7 +2362,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 		return; /* Can't do call_rcu() fwd prog without ->call. */
 
 	/* Loop continuously posting RCU callbacks. */
-	WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+	atomic_inc(&rcu_fwd_cb_nodelay);
 	cur_ops->sync(); /* Later readers see above write. */
 	WRITE_ONCE(rfp->rcu_fwd_startat, jiffies);
 	stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES;
@@ -2435,7 +2435,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 	}
 	schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
 	tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
-	WRITE_ONCE(rcu_fwd_cb_nodelay, false);
+	atomic_dec(&rcu_fwd_cb_nodelay);
 }
 
 
-- 
GitLab


From 02b51a1cf47977d8772c7dcc363ef6a1e6e59f21 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 17 Dec 2021 15:05:05 -0800
Subject: [PATCH 0161/1586] rcutorture: Add end-of-test check to
 rcu_torture_fwd_prog() loop

The second and subsequent forward-progress kthreads loop waiting for
the first forward-progress kthread to start the next test interval.
Unfortunately, if the test ends while one of those kthreads is waiting,
the test will hang.  This hang occurs because that wait loop fails to
check for the end of the test.  This commit therefore adds an end-of-test
check to that wait loop.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index afe95c6948957..e99658efd97f4 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2515,7 +2515,7 @@ static int rcu_torture_fwd_prog(void *args)
 			firsttime = false;
 			WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
 		} else {
-			while (READ_ONCE(rcu_fwd_seq) == oldseq)
+			while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop())
 				schedule_timeout_interruptible(1);
 			oldseq = READ_ONCE(rcu_fwd_seq);
 		}
-- 
GitLab


From 89440d2dad0cc2a781290470cb90402ebba481fc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 28 Dec 2021 15:59:38 -0800
Subject: [PATCH 0162/1586] rcutorture: Fix rcu_fwd_mutex deadlock

The rcu_torture_fwd_cb_hist() function acquires rcu_fwd_mutex, but is
invoked from rcutorture_oom_notify() function, which hold this same
mutex across this call.  This commit fixes the resulting deadlock.

Reported-by: kernel test robot <oliver.sang@intel.com>
Tested-by: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index e99658efd97f4..1c8f40b90f70b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2180,7 +2180,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
 	for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
 		if (rfp->n_launders_hist[i].n_launders > 0)
 			break;
-	mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
 	pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
 		 __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
 	gps_old = rfp->rcu_launder_gp_seq_start;
@@ -2193,7 +2192,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
 		gps_old = gps;
 	}
 	pr_cont("\n");
-	mutex_unlock(&rcu_fwd_mutex);
 }
 
 /* Callback function for continuous-flood RCU callbacks. */
@@ -2431,7 +2429,9 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
 			 n_launders, n_launders_sa,
 			 n_max_gps, n_max_cbs, cver, gps);
 		atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
+		mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
 		rcu_torture_fwd_cb_hist(rfp);
+		mutex_unlock(&rcu_fwd_mutex);
 	}
 	schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
 	tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
-- 
GitLab


From 6b8646a9d37c6324cf994dbefb75f3eb20b109ab Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 3 Jan 2022 06:07:09 -0800
Subject: [PATCH 0163/1586] torture: Wake up kthreads after storing task_struct
 pointer

Currently, _torture_create_kthread() uses kthread_run() to create
torture-test kthreads, which means that the resulting task_struct
pointer is stored after the newly created kthread has been marked
runnable.  This in turn can cause spurious failure of checks for
code being run by a particular kthread.  This commit therefore changes
_torture_create_kthread() to use kthread_create(), then to do an explicit
wake_up_process() after the task_struct pointer has been stored.

Reported-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index f55d803f995d4..789aeb0e1159c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -931,12 +931,14 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
 	int ret = 0;
 
 	VERBOSE_TOROUT_STRING(m);
-	*tp = kthread_run(fn, arg, "%s", s);
+	*tp = kthread_create(fn, arg, "%s", s);
 	if (IS_ERR(*tp)) {
 		ret = PTR_ERR(*tp);
 		TOROUT_ERRSTRING(f);
 		*tp = NULL;
+		return ret;
 	}
+	wake_up_process(*tp);  // Process is sleeping, so ordering provided.
 	torture_shuffle_task_register(*tp);
 	return ret;
 }
-- 
GitLab


From 9c0f1c7fd7c6e1e0f5b84e20c577fbab62563d03 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 Jan 2022 20:29:10 -0800
Subject: [PATCH 0164/1586] rcutorture: Enable limited callback-flooding tests
 of SRCU

This commit allows up to 50,000 callbacks worth of callback-flooding
tests of SRCU.  The goal of this change is to exercise Tree SRCU's
ability to transition from SRCU_SIZE_SMALL to SRCU_SIZE_BIG triggered
by callback-queue-time lock contention.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 1c8f40b90f70b..b41db719085ef 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = {
 	.call		= srcu_torture_call,
 	.cb_barrier	= srcu_torture_barrier,
 	.stats		= srcu_torture_stats,
+	.cbflood_max	= 50000,
 	.irq_capable	= 1,
 	.no_pi_lock	= IS_ENABLED(CONFIG_TINY_SRCU),
 	.name		= "srcu"
@@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = {
 	.call		= srcu_torture_call,
 	.cb_barrier	= srcu_torture_barrier,
 	.stats		= srcu_torture_stats,
+	.cbflood_max	= 50000,
 	.irq_capable	= 1,
 	.no_pi_lock	= IS_ENABLED(CONFIG_TINY_SRCU),
 	.name		= "srcud"
-- 
GitLab


From bee6f2169935658fc405128a8b763ea49f50985c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 1 Dec 2021 19:19:13 -0800
Subject: [PATCH 0165/1586] torture: Drop trailing ^M from console output

Console logs can sometimes have trailing control-M characters, which the
forward-progress evaluation code in kvm-recheck-rcu.sh passes through
to the user output.  Which does not cause a technical problem, but which
can look ugly.  This commit therefore strips the control-M characters.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1c4c2c727dad1..43e1387234d11 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
 	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
 	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
 	    tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`"
 if test -z "$ngps"
 then
 	echo "$configfile ------- " $stopstate
-- 
GitLab


From 010e5773b2050db260395a240e2f7adee3108603 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 2 Dec 2021 11:24:05 -0800
Subject: [PATCH 0166/1586] torture: Allow four-digit repetition numbers for
 --configs parameter

In a clear-cut case of "not thinking big enough", kvm.sh limits the
multipliers for torture-test scenarios to three digits.  Although this is
large enough for any single system that I have ever run rcutorture on,
it does become a problem when you want to use kvm-remote.sh to run as
many instances of TREE09 as fit on a set of 20 systems with 80 CPUs each.

Yes, one could simply say "--configs '800*TREE09 800*TREE09'", but this
commit removes the need for that sort of hacky workaround by permitting
four-digit repetition numbers, thus allowing "--configs '1600*TREE09'".

Five-digit repetition numbers remain off the menu.  Should they ever
really be needed, they can easily be added!

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6de0c183db5b9..348ad177a5acc 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -280,7 +280,7 @@ configs_derep=
 for CF in $configs
 do
 	case $CF in
-	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**)
 		config_reps=`echo $CF | sed -e 's/\*.*$//'`
 		CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
 		;;
-- 
GitLab


From 21fbc62576b67d6db66cae88710f82bd51a556e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 4 Dec 2021 13:53:24 -0800
Subject: [PATCH 0167/1586] torture: Output per-failed-run summary lines from
 torture.sh

Currently, torture.sh lists the failed runs, but it is up to the user
to work out what failed.  This is especially annoying for KCSAN runs,
where RCU's tighter definitions result in failures being reported for
other parts of the kernel.  This commit therefore outputs "Summary:"
lines for each failed run, allowing the user to more quickly identify
which failed runs need focused attention.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index eae88aacca2aa..894f589dd5625 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,7 +414,7 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	cat "$T/failures" | tee -a $T/log
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
 	ret=2
 fi
-- 
GitLab


From a711aaccf6b389b7fa7f3e35b4f99c437a36b9b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 4 Dec 2021 21:00:55 -0800
Subject: [PATCH 0168/1586] torture: Make kvm.sh summaries note runs having
 only KCSAN reports

Runs having only KCSAN reports will normally print a summary line
containing only a "Bugs:" entry.  However, these bugs might or might
not be KCSAN reports.  This commit therefore flags runs in which all the
"Bugs:" entries are KCSAN reports.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/console-badness.sh        |  2 +-
 .../testing/selftests/rcutorture/bin/parse-console.sh  | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index e6a132df61721..69f8a5958cefc 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>
 
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
 grep -v 'ODEBUG: ' |
 grep -v 'This means that this is a DEBUG kernel and it is' |
 grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9f624bd53c277..822eb037a0573 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -138,6 +138,16 @@ then
 	then
 		summary="$summary  Bugs: $n_bugs"
 	fi
+	n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
+	if test "$n_kcsan" -ne 0
+	then
+		if test "$n_bugs" = "$n_kcsan"
+		then
+			summary="$summary (all bugs kcsan)"
+		else
+			summary="$summary  KCSAN: $n_kcsan"
+		fi
+	fi
 	n_calltrace=`grep -c 'Call Trace:' $file`
 	if test "$n_calltrace" -ne 0
 	then
-- 
GitLab


From 9a32ed1cf6cdebff7ccd82b91f8021267ed0d9cb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 6 Dec 2021 09:13:37 -0800
Subject: [PATCH 0169/1586] torture: Indicate which torture.sh runs' bugs are
 all KCSAN reports

This commit further improves torture.sh run summaries by indicating
which runs' "Bugs:" counts are all KCSAN reports, and further printing
an additional end-of-run summary line when all errors reported in all
runs were KCSAN reports.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 894f589dd5625..bddce72ea5ce4 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,8 +414,14 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
+	grep "^  Summary: " "$T/failuresum" |
+		grep -v '^  Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
+	if test -s "$T/nonkcsan"
+	then
+		nonkcsanbug="yes"
+	fi
 	ret=2
 fi
 if test "$do_kcsan" = "yes"
@@ -424,6 +430,10 @@ then
 fi
 echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
 echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+if test -z "$nonkcsanbug" && test -s "$T/failuresum"
+then
+	echo "  All bugs were KCSAN failures."
+fi
 tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
 if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
 then
-- 
GitLab


From 99c80a96a512f32a234687343ff1c8e5e033976b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 17 Dec 2021 16:14:31 -0800
Subject: [PATCH 0170/1586] torture: Compress KCSAN as well as KASAN vmlinux
 files

Compressing KASAN vmlinux files reduces torture.sh res file size from
about 100G to about 50G, which is good, but the KCSAN vmlinux files
are also large.  Compressing them reduces their size from about 700M to
about 100M (but of course your mileage may vary).  This commit therefore
compresses both KASAN and KCSAN vmlinux files.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bddce72ea5ce4..a5f1c5fbefe4c 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -37,7 +37,7 @@ configs_scftorture=
 kcsan_kmake_args=
 
 # Default compression, duration, and apportionment.
-compress_kasan_vmlinux="`identify_qemu_vcpus`"
+compress_concurrency="`identify_qemu_vcpus`"
 duration_base=10
 duration_rcutorture_frac=7
 duration_locktorture_frac=1
@@ -67,7 +67,7 @@ function doyesno () {
 
 usage () {
 	echo "Usage: $scriptname optional arguments:"
-	echo "       --compress-kasan-vmlinux concurrency"
+	echo "       --compress-concurrency concurrency"
 	echo "       --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
@@ -91,9 +91,9 @@ usage () {
 while test $# -gt 0
 do
 	case "$1" in
-	--compress-kasan-vmlinux)
-		checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
-		compress_kasan_vmlinux=$2
+	--compress-concurrency)
+		checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
+		compress_concurrency=$2
 		shift
 		;;
 	--config-rcutorture|--configs-rcutorture)
@@ -435,11 +435,11 @@ then
 	echo "  All bugs were KCSAN failures."
 fi
 tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
-if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
+if test -n "$tdir" && test $compress_concurrency -gt 0
 then
 	# KASAN vmlinux files can approach 1GB in size, so compress them.
-	echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1
-	find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo
+	echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
+	find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
 	ncompresses=0
 	batchno=1
 	if test -s $T/xz-todo
@@ -457,7 +457,7 @@ then
 			do
 				xz "$j" >> "$tdir/log-xz" 2>&1 &
 				ncompresses=$((ncompresses+1))
-				if test $ncompresses -ge $compress_kasan_vmlinux
+				if test $ncompresses -ge $compress_concurrency
 				then
 					echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
 					wait
-- 
GitLab


From 2bc9062e7f394b3c6cbe408a13523c0a4bd13d2a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 20 Dec 2021 20:24:25 -0800
Subject: [PATCH 0171/1586] torture: Make kvm-remote.sh try multiple times to
 download tarball

This commit ups the retries for downloading the build-product tarball
to a given remote system from once to five times, the better to handle
transient network failures.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/kvm-remote.sh      | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index e09b1bc787084..29b068a55b466 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -155,18 +155,23 @@ do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 	cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 	ret=$?
-	if test "$ret" -ne 0
-	then
-		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log"
+	tries=0
+	while test "$ret" -ne 0
+	do
+		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 		sleep 60
 		cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 		ret=$?
 		if test "$ret" -ne 0
 		then
-			echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
-			exit 10
+			if test "$tries" > 5
+			then
+				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+				exit 10
+			fi
 		fi
-	fi
+		tries=$((tries+1))
+	done
 done
 
 # Function to check for presence of a file on the specified system.
-- 
GitLab


From b376005eb3a8aedcde437d3d495418d63ce5451c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 27 Dec 2021 21:21:35 -0800
Subject: [PATCH 0172/1586] torture: Print only one summary line per run

The torture.sh scripts currently duplicates the summary lines, getting
one during the run phase and one during the summary phase of each run.
This commit therefore removes the run phase from consideration so as to
get only one summary line per run.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index a5f1c5fbefe4c..d1cb60085d8f2 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,7 +414,7 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
 	grep "^  Summary: " "$T/failuresum" |
 		grep -v '^  Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
-- 
GitLab


From e31ccc1ddd2ad3e14f02a53cfa22e6b2b98c6dab Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 18 Jan 2022 15:40:49 -0800
Subject: [PATCH 0173/1586] torture: Make kvm-find-errors.sh notice missing
 vmlinux file

Currently, an obtuse compiler diagnostic can fool kvm-find-errors.sh
into believing that the build was successful.  This commit therefore
adds a check for a missing vmlinux file.  Note that in the case of
repeated torture-test scenarios ("--configs '2*TREE01'"), the vmlinux
file will only be present in the first directory, that is, in TREE01
but not TREE01.2.

Link: https://lore.kernel.org/lkml/36bd91e4-8eda-5677-7fde-40295932a640@molgen.mpg.de/
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 2e9e9e2eedb69..5f682fc892dd4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,10 +30,16 @@ editor=${EDITOR-vi}
 files=
 for i in ${rundir}/*/Make.out
 do
+	scenariodir="`dirname $i`"
+	scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
 	if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
 	then
 		egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
 		files="$files $i.diags $i"
+	elif ! test -f ${scenariobasedir}/vmlinux
+	then
+		echo No ${scenariobasedir}/vmlinux file > $i.diags
+		files="$files $i.diags $i"
 	fi
 done
 if test -n "$files"
-- 
GitLab


From a7d89cfb8e1269cb6d22453adba56b8d0218589f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 25 Jan 2022 21:08:55 -0800
Subject: [PATCH 0174/1586] torture: Change KVM environment variable to
 RCUTORTURE

The torture-test scripting's long-standing use of KVM as the environment
variable tracking the pathname of the rcutorture directory now conflicts
with allmodconfig builds due to the virt/kvm/Makefile.kvm file's use
of this as a makefile variable.  This commit therefore changes the
torture-test scripting from KVM to RCUTORTURE, avoiding the name conflict.

Reported-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../testing/selftests/rcutorture/bin/kvm-again.sh  |  4 ++--
 .../selftests/rcutorture/bin/kvm-check-branches.sh |  4 ++--
 .../selftests/rcutorture/bin/kvm-end-run-stats.sh  |  4 ++--
 .../testing/selftests/rcutorture/bin/kvm-remote.sh |  8 ++++----
 tools/testing/selftests/rcutorture/bin/kvm.sh      | 14 +++++++-------
 tools/testing/selftests/rcutorture/bin/torture.sh  |  4 ++--
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 5a0023d183dac..0941f1ddab658 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -47,8 +47,8 @@ else
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 dryrun=
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 370406bbfeedc..f17000a2ccf1f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -49,8 +49,8 @@ fi
 mkdir $resdir/$ds
 echo Results directory: $resdir/$ds
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 echo Using all `identify_qemu_vcpus` CPUs.
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index e4a00779b8c69..ee886b40a5d2c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 default_starttime="`get_starttime`"
 starttime="${2-default_starttime}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 29b068a55b466..8c4c1e4792d02 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -19,8 +19,8 @@ then
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 starttime="`get_starttime`"
@@ -108,8 +108,8 @@ else
 		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 		exit 2
 	fi
-	cp -a "$rundir" "$KVM/res/"
-	oldrun="$KVM/res/$ds"
+	cp -a "$rundir" "$RCUTORTURE/res/"
+	oldrun="$RCUTORTURE/res/$ds"
 fi
 echo | tee -a "$oldrun/remote-log"
 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 348ad177a5acc..55b2c15332827 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG
 
 dur=$((30*60))
 dryrun=""
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
 TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_BUILDONLY=
-TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
 TORTURE_KCONFIG_GDB_ARG=""
 TORTURE_BOOT_GDB_ARG=""
@@ -262,7 +262,7 @@ else
 	exit 1
 fi
 
-CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG
 
 defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
 if test -z "$configs"
@@ -272,7 +272,7 @@ fi
 
 if test -z "$resdir"
 then
-	resdir=$KVM/res
+	resdir=$RCUTORTURE/res
 fi
 
 # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
@@ -386,7 +386,7 @@ END {
 # Generate a script to execute the tests in appropriate batches.
 cat << ___EOF___ > $T/script
 CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
-KVM="$KVM"; export KVM
+RCUTORTURE="$RCUTORTURE"; export RCUTORTURE
 PATH="$PATH"; export PATH
 TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
 TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
@@ -569,7 +569,7 @@ ___EOF___
 awk < $T/cfgcpu.pack \
 	-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
 	-v CONFIGDIR="$CONFIGFRAG/" \
-	-v KVM="$KVM" \
+	-v RCUTORTURE="$RCUTORTURE" \
 	-v ncpus=$cpus \
 	-v jitter="$jitter" \
 	-v rd=$resdir/$ds/ \
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d1cb60085d8f2..e00e60efb2310 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -13,8 +13,8 @@
 scriptname=$0
 args="$*"
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
-- 
GitLab


From e2b665f612ca2ddc61c3d54817a3a780aee6b251 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 1 Feb 2022 14:00:08 -0500
Subject: [PATCH 0175/1586] tools/memory-model: Explain syntactic and semantic
 dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Paul Heidekrüger pointed out that the Linux Kernel Memory Model
documentation doesn't mention the distinction between syntactic and
semantic dependencies.  This is an important difference, because the
compiler can easily break dependencies that are only syntactic, not
semantic.

This patch adds a few paragraphs to the LKMM documentation explaining
these issues and illustrating how they can matter.

Suggested-by: Paul Heidekrüger <paul.heidekrueger@in.tum.de>
Reviewed-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../Documentation/explanation.txt             | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 394ee57d58f2f..ee819a402b698 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -485,6 +485,57 @@ have R ->po X.  It wouldn't make sense for a computation to depend
 somehow on a value that doesn't get loaded from shared memory until
 later in the code!
 
+Here's a trick question: When is a dependency not a dependency?  Answer:
+When it is purely syntactic rather than semantic.  We say a dependency
+between two accesses is purely syntactic if the second access doesn't
+actually depend on the result of the first.  Here is a trivial example:
+
+	r1 = READ_ONCE(x);
+	WRITE_ONCE(y, r1 * 0);
+
+There appears to be a data dependency from the load of x to the store
+of y, since the value to be stored is computed from the value that was
+loaded.  But in fact, the value stored does not really depend on
+anything since it will always be 0.  Thus the data dependency is only
+syntactic (it appears to exist in the code) but not semantic (the
+second access will always be the same, regardless of the value of the
+first access).  Given code like this, a compiler could simply discard
+the value returned by the load from x, which would certainly destroy
+any dependency.  (The compiler is not permitted to eliminate entirely
+the load generated for a READ_ONCE() -- that's one of the nice
+properties of READ_ONCE() -- but it is allowed to ignore the load's
+value.)
+
+It's natural to object that no one in their right mind would write
+code like the above.  However, macro expansions can easily give rise
+to this sort of thing, in ways that often are not apparent to the
+programmer.
+
+Another mechanism that can lead to purely syntactic dependencies is
+related to the notion of "undefined behavior".  Certain program
+behaviors are called "undefined" in the C language specification,
+which means that when they occur there are no guarantees at all about
+the outcome.  Consider the following example:
+
+	int a[1];
+	int i;
+
+	r1 = READ_ONCE(i);
+	r2 = READ_ONCE(a[r1]);
+
+Access beyond the end or before the beginning of an array is one kind
+of undefined behavior.  Therefore the compiler doesn't have to worry
+about what will happen if r1 is nonzero, and it can assume that r1
+will always be zero regardless of the value actually loaded from i.
+(If the assumption turns out to be wrong the resulting behavior will
+be undefined anyway, so the compiler doesn't care!)  Thus the value
+from the load can be discarded, breaking the address dependency.
+
+The LKMM is unaware that purely syntactic dependencies are different
+from semantic dependencies and therefore mistakenly predicts that the
+accesses in the two examples above will be ordered.  This is another
+example of how the compiler can undermine the memory model.  Be warned.
+
 
 THE READS-FROM RELATION: rf, rfi, and rfe
 -----------------------------------------
-- 
GitLab


From fc153c1c58cb8c3bb3b443b4d7dc3211ff5f65fc Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sun, 5 Dec 2021 22:38:15 -0500
Subject: [PATCH 0176/1586] clocksource: Add a Kconfig option for
 WATCHDOG_MAX_SKEW

A watchdog maximum skew of 100us may still be too small for
some systems or archs. It may also be too small when some kernel
debug config options are enabled.  So add a new Kconfig option
CLOCKSOURCE_WATCHDOG_MAX_SKEW_US to allow kernel builders to have more
control on the threshold for marking clocksource as unstable.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/time/Kconfig       | 9 +++++++++
 kernel/time/clocksource.c | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 04bfd62f5e5ca..27b7868b5c30d 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -181,5 +181,14 @@ config HIGH_RES_TIMERS
 	  hardware is not capable then this option only increases
 	  the size of the kernel image.
 
+config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+	int "Clocksource watchdog maximum allowable skew (in μs)"
+	depends on CLOCKSOURCE_WATCHDOG
+	range 50 1000
+	default 100
+	help
+	  Specify the maximum amount of allowable watchdog skew in
+	  microseconds before reporting the clocksource to be unstable.
+
 endmenu
 endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1cf73807b4503..95d7ca35bdf2c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -107,7 +107,13 @@ static u64 suspend_start;
  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
  * a lower bound for cs->uncertainty_margin values when registering clocks.
  */
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#define MAX_SKEW_USEC	CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
+#else
+#define MAX_SKEW_USEC	100
+#endif
+
+#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
-- 
GitLab


From fbdee71bb5d8d054e1bdb5af4c540f2cb86fe296 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 4 Jan 2022 08:16:47 +0100
Subject: [PATCH 0177/1586] block: deprecate autoloading based on dev_t

Make the legacy dev_t based autoloading optional and add a deprecation
warning.  This kind of autoloading has ceased to be useful about 20 years
ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220104071647.164918-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig | 12 ++++++++++++
 block/bdev.c  |  9 ++++++---
 block/genhd.c |  6 ++++++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index d5d4197b7ed2d..205f8d01c6952 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,18 @@ menuconfig BLOCK
 
 if BLOCK
 
+config BLOCK_LEGACY_AUTOLOAD
+	bool "Legacy autoloading support"
+	help
+	  Enable loading modules and creating block device instances based on
+	  accesses through their device special file.  This is a historic Linux
+	  feature and makes no sense in a udev world where device files are
+	  created on demand.
+
+	  Say N here unless booting or other functionality broke without it, in
+	  which case you should also send a report to your distribution and
+	  linux-block@vger.kernel.org.
+
 config BLK_RQ_ALLOC_TIME
 	bool
 
diff --git a/block/bdev.c b/block/bdev.c
index 102837a370517..c687726445660 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -733,12 +733,15 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 	struct inode *inode;
 
 	inode = ilookup(blockdev_superblock, dev);
-	if (!inode) {
+	if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
 		blk_request_module(dev);
 		inode = ilookup(blockdev_superblock, dev);
-		if (!inode)
-			return NULL;
+		if (inode)
+			pr_warn_ratelimited(
+"block device autoloading is deprecated. It will be removed in Linux 5.19\n");
 	}
+	if (!inode)
+		return NULL;
 
 	/* switch from the inode reference to a device mode one: */
 	bdev = &BDEV_I(inode)->bdev;
diff --git a/block/genhd.c b/block/genhd.c
index 626c8406f21a6..6ae990ff02660 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -185,7 +185,9 @@ static struct blk_major_name {
 	struct blk_major_name *next;
 	int major;
 	char name[16];
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 	void (*probe)(dev_t devt);
+#endif
 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
 static DEFINE_MUTEX(major_names_lock);
 static DEFINE_SPINLOCK(major_names_spinlock);
@@ -275,7 +277,9 @@ int __register_blkdev(unsigned int major, const char *name,
 	}
 
 	p->major = major;
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 	p->probe = probe;
+#endif
 	strlcpy(p->name, name, sizeof(p->name));
 	p->next = NULL;
 	index = major_to_index(major);
@@ -679,6 +683,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
 	return badblocks_store(disk->bb, page, len, 0);
 }
 
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 void blk_request_module(dev_t devt)
 {
 	unsigned int major = MAJOR(devt);
@@ -698,6 +703,7 @@ void blk_request_module(dev_t devt)
 		/* Make old-style 2.4 aliases work */
 		request_module("block-major-%d", MAJOR(devt));
 }
+#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
 
 /*
  * print a full list of all partitions - intended for places where the root
-- 
GitLab


From 926597ffce0e3e2f785475df18e1636194209910 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:39:11 +0100
Subject: [PATCH 0178/1586] block: move disk_{block,unblock,flush}_events to
 blk.h

No need to have these declarations in a public header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220124093913.742411-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h           | 3 +++
 include/linux/genhd.h | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index 8bd43b3ad33d5..2cba50d7e6cb1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -445,6 +445,9 @@ int disk_alloc_events(struct gendisk *disk);
 void disk_add_events(struct gendisk *disk);
 void disk_del_events(struct gendisk *disk);
 void disk_release_events(struct gendisk *disk);
+void disk_block_events(struct gendisk *disk);
+void disk_unblock_events(struct gendisk *disk);
+void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern struct device_attribute dev_attr_events;
 extern struct device_attribute dev_attr_events_async;
 extern struct device_attribute dev_attr_events_poll_msecs;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6906a45bc761a..504f9a6674ace 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -185,9 +185,6 @@ static inline int bdev_read_only(struct block_device *bdev)
 	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
 }
 
-extern void disk_block_events(struct gendisk *disk);
-extern void disk_unblock_events(struct gendisk *disk);
-extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
 bool disk_force_media_change(struct gendisk *disk, unsigned int events);
 
-- 
GitLab


From e7243285c0fc87054990fcde630583586ff8ed5f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:39:12 +0100
Subject: [PATCH 0179/1586] block: move blk_drop_partitions to blk.h

No need to have this declaration in a public header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220124093913.742411-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h           | 1 +
 include/linux/genhd.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk.h b/block/blk.h
index 2cba50d7e6cb1..800c5ae387a0b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -426,6 +426,7 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
 int bdev_del_partition(struct gendisk *disk, int partno);
 int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
 		sector_t length);
+void blk_drop_partitions(struct gendisk *disk);
 
 int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 504f9a6674ace..aa4bd985dbe51 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -219,7 +219,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
 }
 
 int bdev_disk_changed(struct gendisk *disk, bool invalidate);
-void blk_drop_partitions(struct gendisk *disk);
 
 struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 		struct lock_class_key *lkclass);
-- 
GitLab


From 322cbb50de711814c42fb088f6d31901502c711a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:39:13 +0100
Subject: [PATCH 0180/1586] block: remove genhd.h

There is no good reason to keep genhd.h separate from the main blkdev.h
header that includes it.  So fold the contents of genhd.h into blkdev.h
and remove genhd.h entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220124093913.742411-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/capability.rst  |   2 +-
 arch/m68k/atari/stdma.c             |   1 -
 arch/m68k/bvme6000/config.c         |   1 -
 arch/m68k/emu/nfblock.c             |   1 -
 arch/m68k/kernel/setup_mm.c         |   1 -
 arch/m68k/mvme147/config.c          |   1 -
 arch/m68k/mvme16x/config.c          |   1 -
 block/blk-cgroup.c                  |   1 -
 block/disk-events.c                 |   2 +-
 block/genhd.c                       |   1 -
 block/holder.c                      |   2 +-
 block/partitions/check.h            |   1 -
 block/partitions/core.c             |   1 -
 block/partitions/efi.h              |   1 -
 block/partitions/ldm.h              |   1 -
 block/sed-opal.c                    |   2 +-
 drivers/base/class.c                |   2 +-
 drivers/base/core.c                 |   2 +-
 drivers/base/devtmpfs.c             |   2 +-
 drivers/block/aoe/aoeblk.c          |   1 -
 drivers/block/aoe/aoecmd.c          |   1 -
 drivers/block/drbd/drbd_int.h       |   1 -
 drivers/block/mtip32xx/mtip32xx.c   |   1 -
 drivers/block/mtip32xx/mtip32xx.h   |   1 -
 drivers/block/rnbd/rnbd-srv-sysfs.c |   1 -
 drivers/block/sunvdc.c              |   1 -
 drivers/block/zram/zram_drv.c       |   1 -
 drivers/cdrom/gdrom.c               |   1 -
 drivers/char/random.c               |   2 +-
 drivers/md/bcache/super.c           |   1 -
 drivers/md/dm-core.h                |   1 -
 drivers/mtd/mtdswap.c               |   2 +-
 drivers/mtd/nand/raw/sharpsl.c      |   1 -
 drivers/nvdimm/blk.c                |   1 -
 drivers/nvdimm/btt.c                |   1 -
 drivers/nvdimm/btt_devs.c           |   1 -
 drivers/nvdimm/bus.c                |   1 -
 drivers/nvdimm/pfn_devs.c           |   1 -
 drivers/s390/block/dasd_int.h       |   1 -
 drivers/s390/block/scm_blk.c        |   1 -
 drivers/s390/block/scm_blk.h        |   1 -
 drivers/scsi/scsi_debug.c           |   1 -
 drivers/scsi/scsicam.c              |   1 -
 drivers/scsi/sd.c                   |   1 -
 drivers/scsi/sr.h                   |   1 -
 drivers/target/target_core_iblock.c |   1 -
 drivers/target/target_core_pscsi.c  |   1 -
 fs/btrfs/check-integrity.c          |   1 -
 fs/dax.c                            |   1 -
 fs/gfs2/sys.c                       |   2 +-
 fs/hfs/mdb.c                        |   2 +-
 fs/hfsplus/wrapper.c                |   1 -
 fs/ksmbd/vfs.c                      |   1 -
 fs/nfs/blocklayout/rpc_pipefs.c     |   1 -
 fs/nfsd/blocklayout.c               |   1 -
 include/linux/blkdev.h              | 273 +++++++++++++++++++++++++-
 include/linux/genhd.h               | 287 ----------------------------
 include/linux/part_stat.h           |   2 +-
 init/do_mounts.c                    |   1 -
 kernel/power/hibernate.c            |   1 -
 kernel/power/swap.c                 |   1 -
 security/integrity/ima/ima_policy.c |   1 -
 62 files changed, 282 insertions(+), 350 deletions(-)
 delete mode 100644 include/linux/genhd.h

diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst
index 160a5148b915f..2ae7f064736ad 100644
--- a/Documentation/block/capability.rst
+++ b/Documentation/block/capability.rst
@@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
 ``capability`` is a bitfield, printed in hexadecimal, indicating which
 capabilities a specific block device supports:
 
-.. kernel-doc:: include/linux/genhd.h
+.. kernel-doc:: include/linux/blkdev.h
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index ba65f942d0c78..ce6818eff75ef 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -30,7 +30,6 @@
 
 #include <linux/types.h>
 #include <linux/kdev_t.h>
-#include <linux/genhd.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 0c6feafbbd110..0fe0f3e888fb3 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -23,7 +23,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/major.h>
-#include <linux/genhd.h>
 #include <linux/rtc.h>
 #include <linux/interrupt.h>
 #include <linux/bcd.h>
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 9c57b245dc12a..267b02cc5655b 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/slab.h>
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 49e573b943268..ee268055bdce3 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/fs.h>
 #include <linux/console.h>
-#include <linux/genhd.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/init.h>
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index dfd6202fd403e..db1430dc411f4 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -22,7 +22,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/major.h>
-#include <linux/genhd.h>
 #include <linux/rtc.h>
 #include <linux/interrupt.h>
 
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index b4422c2dfbbf4..45a07ab3123ab 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -24,7 +24,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/major.h>
-#include <linux/genhd.h>
 #include <linux/rtc.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 650f7e27989f1..671debbae9413 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,7 +23,6 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/slab.h>
-#include <linux/genhd.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
 #include <linux/ctype.h>
diff --git a/block/disk-events.c b/block/disk-events.c
index 8d5496e7592a5..aee25a7e1ab7d 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -4,7 +4,7 @@
  */
 #include <linux/export.h>
 #include <linux/moduleparam.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include "blk.h"
 
 struct disk_events {
diff --git a/block/genhd.c b/block/genhd.c
index 6ae990ff02660..9589d1d59afab 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -8,7 +8,6 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
 #include <linux/kdev_t.h>
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
diff --git a/block/holder.c b/block/holder.c
index 27cddce1b4461..8d750281a1cd9 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/slab.h>
 
 struct bd_holder_disk {
diff --git a/block/partitions/check.h b/block/partitions/check.h
index d5b28e309d64d..4ffa2359b1a37 100644
--- a/block/partitions/check.h
+++ b/block/partitions/check.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/pagemap.h>
 #include <linux/blkdev.h>
-#include <linux/genhd.h>
 #include "../blk.h"
 
 /*
diff --git a/block/partitions/core.c b/block/partitions/core.c
index c2a1635922b1c..2ef8dfa1e5c85 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -8,7 +8,6 @@
 #include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
-#include <linux/genhd.h>
 #include <linux/vmalloc.h>
 #include <linux/blktrace_api.h>
 #include <linux/raid/detect.h>
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 8cc2b88d0aa85..84b9f36b9e479 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -13,7 +13,6 @@
 
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/string.h>
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 8693704dcf5e9..0a747a0c782d5 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -14,7 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
-#include <linux/genhd.h>
 #include <linux/fs.h>
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
diff --git a/block/sed-opal.c b/block/sed-opal.c
index daafadbb88cae..9700197000f20 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -13,7 +13,7 @@
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <uapi/linux/sed-opal.h>
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 7476f393df977..8feb85e186e3b 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -16,7 +16,7 @@
 #include <linux/kdev_t.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include "base.h"
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 7bb957b118611..3d6430eb0c6a1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -21,7 +21,7 @@
 #include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include <linux/pm_runtime.h>
 #include <linux/netdevice.h>
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index f41063ac1aee4..db5a03a0618ea 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -17,7 +17,7 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/device.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 52484bcdedb92..8a91fcac6f829 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -12,7 +12,6 @@
 #include <linux/ioctl.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/genhd.h>
 #include <linux/netdevice.h>
 #include <linux/mutex.h>
 #include <linux/export.h>
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 6af111f568e4a..cc11f89a0928f 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -10,7 +10,6 @@
 #include <linux/blk-mq.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
-#include <linux/genhd.h>
 #include <linux/moduleparam.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f27d5b0f9a0bb..acb1ad3c06035 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -27,7 +27,6 @@
 #include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
-#include <linux/genhd.h>
 #include <linux/idr.h>
 #include <linux/dynamic_debug.h>
 #include <net/tcp.h>
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e6005c2323281..cba956881d55c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -19,7 +19,6 @@
 #include <linux/compat.h>
 #include <linux/fs.h>
 #include <linux/module.h>
-#include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/bio.h>
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 88f4206310e4c..6816beb45352b 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -15,7 +15,6 @@
 #include <linux/rwsem.h>
 #include <linux/ata.h>
 #include <linux/interrupt.h>
-#include <linux/genhd.h>
 
 /* Offset of Subsystem Device ID in pci confoguration space */
 #define PCI_SUBSYSTEM_DEVICEID	0x2E
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index 4db98e0e76f0e..feaa76c5a3423 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -13,7 +13,6 @@
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/stat.h>
-#include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
 #include <linux/device.h>
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 146d85d80e0e7..dd0a1a6fed296 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/blk-mq.h>
 #include <linux/hdreg.h>
-#include <linux/genhd.h>
 #include <linux/cdrom.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index cb253d80d72b9..342dbcb3f2208 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -22,7 +22,6 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/device.h>
-#include <linux/genhd.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index faead41709bcd..8e78b37d0f6a4 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/cdrom.h>
-#include <linux/genhd.h>
 #include <linux/bio.h>
 #include <linux/blk-mq.h>
 #include <linux/interrupt.h>
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 68613f0b68877..f206c87c62028 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -330,7 +330,7 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/nodemask.h>
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 140f35dc0c457..c31a62b963f00 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -18,7 +18,6 @@
 #include <linux/blkdev.h>
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
-#include <linux/genhd.h>
 #include <linux/idr.h>
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index b855fef4f38a6..72d18c3fbf1f6 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -11,7 +11,6 @@
 
 #include <linux/kthread.h>
 #include <linux/ktime.h>
-#include <linux/genhd.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-crypto-profile.h>
 
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index e86b04bc1d6bb..dc7f1532a37f7 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/swap.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 5612ee628425b..52ce5162538a4 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -6,7 +6,6 @@
  *  Based on Sharp's NAND driver sharp_sl.c
  */
 
-#include <linux/genhd.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 228c33b8d1d69..c1db43524d755 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -6,7 +6,6 @@
 
 #include <linux/blkdev.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/nd.h>
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index da3f007a12115..cbd994f7f1fe6 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -11,7 +11,6 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/hdreg.h>
-#include <linux/genhd.h>
 #include <linux/sizes.h>
 #include <linux/ndctl.h>
 #include <linux/fs.h>
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 8b52e5144f084..e5a58520d3982 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -4,7 +4,6 @@
  */
 #include <linux/blkdev.h>
 #include <linux/device.h>
-#include <linux/genhd.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9dc7f3edd42b1..5bbe31b08581b 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,7 +11,6 @@
 #include <linux/blkdev.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
-#include <linux/genhd.h>
 #include <linux/ndctl.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 58eda16f5c534..c31e184bfa45e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -5,7 +5,6 @@
 #include <linux/memremap.h>
 #include <linux/blkdev.h>
 #include <linux/device.h>
-#include <linux/genhd.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 8b458010f88a1..3b7af00a7825f 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -47,7 +47,6 @@
 #include <linux/module.h>
 #include <linux/wait.h>
 #include <linux/blkdev.h>
-#include <linux/genhd.h>
 #include <linux/hdreg.h>
 #include <linux/interrupt.h>
 #include <linux/log2.h>
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 61ecdcb2cc6af..2a9c0ddcade59 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
-#include <linux/genhd.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <asm/eadm.h>
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index a05a4297cfae2..af82b32147741 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -6,7 +6,6 @@
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
-#include <linux/genhd.h>
 #include <linux/list.h>
 
 #include <asm/debug.h>
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 2104973a35cd3..911cc72dd7acd 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -23,7 +23,6 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/genhd.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index 0ffdb8f2995f7..acdc0aceca5ef 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
 #include <linux/pagemap.h>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 62eb9921cc947..2d648d27bfd71 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -38,7 +38,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/bio.h>
-#include <linux/genhd.h>
 #include <linux/hdreg.h>
 #include <linux/errno.h>
 #include <linux/idr.h>
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 339c624e04d86..1609f02ed29ac 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -18,7 +18,6 @@
 #ifndef _SR_H
 #define _SR_H
 
-#include <linux/genhd.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bf8ae4825a06e..6045678365a59 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -20,7 +20,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/bio.h>
-#include <linux/genhd.h>
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 807d06ecadee2..0fae71ac5cc8a 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -17,7 +17,6 @@
 #include <linux/blk_types.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/genhd.h>
 #include <linux/cdrom.h>
 #include <linux/ratelimit.h>
 #include <linux/module.h>
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7e9f90fa0388b..abac86a758401 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -78,7 +78,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
-#include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/string.h>
diff --git a/fs/dax.c b/fs/dax.c
index cd03485867a74..ab0978739eaaa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -11,7 +11,6 @@
 #include <linux/buffer_head.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
-#include <linux/genhd.h>
 #include <linux/highmem.h>
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a6002b2d146d8..d87ea98cf5350 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -15,7 +15,7 @@
 #include <linux/kobject.h>
 #include <linux/uaccess.h>
 #include <linux/gfs2_ondisk.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 5beb826524354..8082eb01127cd 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/cdrom.h>
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <linux/nls.h>
 #include <linux/slab.h>
 
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 51ae6f1eb4a55..4688cc7b36926 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -12,7 +12,6 @@
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
-#include <linux/genhd.h>
 #include <asm/unaligned.h>
 
 #include "hfsplus_fs.h"
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 19d36393974ca..9cebb6ba555b6 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -11,7 +11,6 @@
 #include <linux/writeback.h>
 #include <linux/xattr.h>
 #include <linux/falloc.h>
-#include <linux/genhd.h>
 #include <linux/fsnotify.h>
 #include <linux/dcache.h>
 #include <linux/slab.h>
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index ef9db135c649c..6c977288cc288 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/genhd.h>
 #include <linux/blkdev.h>
 
 #include "blocklayout.h"
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e5c0982a381de..b6d01d51a7465 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -4,7 +4,6 @@
  */
 #include <linux/exportfs.h>
 #include <linux/iomap.h>
-#include <linux/genhd.h>
 #include <linux/slab.h>
 #include <linux/pr.h>
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f35aea98bc351..99a4384bb8a56 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1,9 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions Copyright (C) 1992 Drew Eckhardt
+ */
 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 
-#include <linux/sched.h>
-#include <linux/genhd.h>
+#include <linux/types.h>
+#include <linux/blk_types.h>
+#include <linux/device.h>
 #include <linux/list.h>
 #include <linux/llist.h>
 #include <linux/minmax.h>
@@ -12,11 +16,15 @@
 #include <linux/wait.h>
 #include <linux/bio.h>
 #include <linux/gfp.h>
+#include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/blkzoned.h>
+#include <linux/sched.h>
 #include <linux/sbitmap.h>
 #include <linux/srcu.h>
+#include <linux/uuid.h>
+#include <linux/xarray.h>
 
 struct module;
 struct request_queue;
@@ -33,6 +41,10 @@ struct blk_queue_stats;
 struct blk_stat_callback;
 struct blk_crypto_profile;
 
+extern const struct device_type disk_type;
+extern struct device_type part_type;
+extern struct class block_class;
+
 /* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
 
@@ -45,6 +57,144 @@ struct blk_crypto_profile;
  */
 #define BLKCG_MAX_POLS		6
 
+#define DISK_MAX_PARTS			256
+#define DISK_NAME_LEN			32
+
+#define PARTITION_META_INFO_VOLNAMELTH	64
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH	(UUID_STRING_LEN + 1)
+
+struct partition_meta_info {
+	char uuid[PARTITION_META_INFO_UUIDLTH];
+	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
+};
+
+/**
+ * DOC: genhd capability flags
+ *
+ * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to
+ * removable media.  When set, the device remains present even when media is not
+ * inserted.  Shall not be set for devices which are removed entirely when the
+ * media is removed.
+ *
+ * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events,
+ * doesn't appear in sysfs, and can't be opened from userspace or using
+ * blkdev_get*. Used for the underlying components of multipath devices.
+ *
+ * ``GENHD_FL_NO_PART``: partition support is disabled.  The kernel will not
+ * scan for partitions from add_disk, and users can't add partitions manually.
+ *
+ */
+enum {
+	GENHD_FL_REMOVABLE			= 1 << 0,
+	GENHD_FL_HIDDEN				= 1 << 1,
+	GENHD_FL_NO_PART			= 1 << 2,
+};
+
+enum {
+	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
+	DISK_EVENT_EJECT_REQUEST		= 1 << 1, /* eject requested */
+};
+
+enum {
+	/* Poll even if events_poll_msecs is unset */
+	DISK_EVENT_FLAG_POLL			= 1 << 0,
+	/* Forward events to udev */
+	DISK_EVENT_FLAG_UEVENT			= 1 << 1,
+	/* Block event polling when open for exclusive write */
+	DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE	= 1 << 2,
+};
+
+struct disk_events;
+struct badblocks;
+
+struct blk_integrity {
+	const struct blk_integrity_profile	*profile;
+	unsigned char				flags;
+	unsigned char				tuple_size;
+	unsigned char				interval_exp;
+	unsigned char				tag_size;
+};
+
+struct gendisk {
+	/*
+	 * major/first_minor/minors should not be set by any new driver, the
+	 * block core will take care of allocating them automatically.
+	 */
+	int major;
+	int first_minor;
+	int minors;
+
+	char disk_name[DISK_NAME_LEN];	/* name of major driver */
+
+	unsigned short events;		/* supported events */
+	unsigned short event_flags;	/* flags related to event processing */
+
+	struct xarray part_tbl;
+	struct block_device *part0;
+
+	const struct block_device_operations *fops;
+	struct request_queue *queue;
+	void *private_data;
+
+	int flags;
+	unsigned long state;
+#define GD_NEED_PART_SCAN		0
+#define GD_READ_ONLY			1
+#define GD_DEAD				2
+#define GD_NATIVE_CAPACITY		3
+
+	struct mutex open_mutex;	/* open/close mutex */
+	unsigned open_partitions;	/* number of open partitions */
+
+	struct backing_dev_info	*bdi;
+	struct kobject *slave_dir;
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+	struct list_head slave_bdevs;
+#endif
+	struct timer_rand_state *random;
+	atomic_t sync_io;		/* RAID */
+	struct disk_events *ev;
+#ifdef  CONFIG_BLK_DEV_INTEGRITY
+	struct kobject integrity_kobj;
+#endif	/* CONFIG_BLK_DEV_INTEGRITY */
+#if IS_ENABLED(CONFIG_CDROM)
+	struct cdrom_device_info *cdi;
+#endif
+	int node_id;
+	struct badblocks *bb;
+	struct lockdep_map lockdep_map;
+	u64 diskseq;
+};
+
+static inline bool disk_live(struct gendisk *disk)
+{
+	return !inode_unhashed(disk->part0->bd_inode);
+}
+
+/*
+ * The gendisk is refcounted by the part0 block_device, and the bd_device
+ * therein is also used for device model presentation in sysfs.
+ */
+#define dev_to_disk(device) \
+	(dev_to_bdev(device)->bd_disk)
+#define disk_to_dev(disk) \
+	(&((disk)->part0->bd_device))
+
+#if IS_REACHABLE(CONFIG_CDROM)
+#define disk_to_cdi(disk)	((disk)->cdi)
+#else
+#define disk_to_cdi(disk)	NULL
+#endif
+
+static inline dev_t disk_devt(struct gendisk *disk)
+{
+	return MKDEV(disk->major, disk->first_minor);
+}
+
 static inline int blk_validate_block_size(unsigned long bsize)
 {
 	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
@@ -596,6 +746,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
 #define for_each_bio(_bio)		\
 	for (; _bio; _bio = _bio->bi_next)
 
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
+{
+	return device_add_disk(NULL, disk, NULL);
+}
+void del_gendisk(struct gendisk *gp);
+void invalidate_disk(struct gendisk *disk);
+void set_disk_ro(struct gendisk *disk, bool read_only);
+void disk_uevent(struct gendisk *disk, enum kobject_action action);
+
+static inline int get_disk_ro(struct gendisk *disk)
+{
+	return disk->part0->bd_read_only ||
+		test_bit(GD_READ_ONLY, &disk->state);
+}
+
+static inline int bdev_read_only(struct block_device *bdev)
+{
+	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+}
+
+bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
+bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+
+void add_disk_randomness(struct gendisk *disk) __latent_entropy;
+void rand_initialize_disk(struct gendisk *disk);
+
+static inline sector_t get_start_sect(struct block_device *bdev)
+{
+	return bdev->bd_start_sect;
+}
+
+static inline sector_t bdev_nr_sectors(struct block_device *bdev)
+{
+	return bdev->bd_nr_sectors;
+}
+
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+{
+	return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT;
+}
+
+static inline sector_t get_capacity(struct gendisk *disk)
+{
+	return bdev_nr_sectors(disk->part0);
+}
+
+static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+{
+	return bdev_nr_sectors(sb->s_bdev) >>
+		(sb->s_blocksize_bits - SECTOR_SHIFT);
+}
+
+int bdev_disk_changed(struct gendisk *disk, bool invalidate);
+
+struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+		struct lock_class_key *lkclass);
+void put_disk(struct gendisk *disk);
+struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
+
+/**
+ * blk_alloc_disk - allocate a gendisk structure
+ * @node_id: numa node to allocate on
+ *
+ * Allocate and pre-initialize a gendisk structure for use with BIO based
+ * drivers.
+ *
+ * Context: can sleep
+ */
+#define blk_alloc_disk(node_id)						\
+({									\
+	static struct lock_class_key __key;				\
+									\
+	__blk_alloc_disk(node_id, &__key);				\
+})
+void blk_cleanup_disk(struct gendisk *disk);
+
+int __register_blkdev(unsigned int major, const char *name,
+		void (*probe)(dev_t devt));
+#define register_blkdev(major, name) \
+	__register_blkdev(major, name, NULL)
+void unregister_blkdev(unsigned int major, const char *name);
+
+bool bdev_check_media_change(struct block_device *bdev);
+int __invalidate_device(struct block_device *bdev, bool kill_dirty);
+void set_capacity(struct gendisk *disk, sector_t size);
+
+#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+int bd_register_pending_holders(struct gendisk *disk);
+#else
+static inline int bd_link_disk_holder(struct block_device *bdev,
+				      struct gendisk *disk)
+{
+	return 0;
+}
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+					 struct gendisk *disk)
+{
+}
+static inline int bd_register_pending_holders(struct gendisk *disk)
+{
+	return 0;
+}
+#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
+
+dev_t part_devt(struct gendisk *disk, u8 partno);
+void inc_diskseq(struct gendisk *disk);
+dev_t blk_lookup_devt(const char *name, int partno);
+void blk_request_module(dev_t devt);
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
@@ -1311,6 +1573,7 @@ void invalidate_bdev(struct block_device *bdev);
 int sync_blockdev(struct block_device *bdev);
 int sync_blockdev_nowait(struct block_device *bdev);
 void sync_bdevs(bool wait);
+void printk_all_partitions(void);
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {
@@ -1326,7 +1589,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
 static inline void sync_bdevs(bool wait)
 {
 }
-#endif
+static inline void printk_all_partitions(void)
+{
+}
+#endif /* CONFIG_BLOCK */
+
 int fsync_bdev(struct block_device *bdev);
 
 int freeze_bdev(struct block_device *bdev);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
deleted file mode 100644
index aa4bd985dbe51..0000000000000
--- a/include/linux/genhd.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_GENHD_H
-#define _LINUX_GENHD_H
-
-/*
- * 	genhd.h Copyright (C) 1992 Drew Eckhardt
- *	Generic hard disk header file by  
- * 		Drew Eckhardt
- *
- *		<drew@colorado.edu>
- */
-
-#include <linux/types.h>
-#include <linux/kdev_t.h>
-#include <linux/uuid.h>
-#include <linux/blk_types.h>
-#include <linux/device.h>
-#include <linux/xarray.h>
-
-extern const struct device_type disk_type;
-extern struct device_type part_type;
-extern struct class block_class;
-
-#define DISK_MAX_PARTS			256
-#define DISK_NAME_LEN			32
-
-#define PARTITION_META_INFO_VOLNAMELTH	64
-/*
- * Enough for the string representation of any kind of UUID plus NULL.
- * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
- */
-#define PARTITION_META_INFO_UUIDLTH	(UUID_STRING_LEN + 1)
-
-struct partition_meta_info {
-	char uuid[PARTITION_META_INFO_UUIDLTH];
-	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
-};
-
-/**
- * DOC: genhd capability flags
- *
- * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to
- * removable media.  When set, the device remains present even when media is not
- * inserted.  Shall not be set for devices which are removed entirely when the
- * media is removed.
- *
- * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events,
- * doesn't appear in sysfs, and can't be opened from userspace or using
- * blkdev_get*. Used for the underlying components of multipath devices.
- *
- * ``GENHD_FL_NO_PART``: partition support is disabled.  The kernel will not
- * scan for partitions from add_disk, and users can't add partitions manually.
- *
- */
-enum {
-	GENHD_FL_REMOVABLE			= 1 << 0,
-	GENHD_FL_HIDDEN				= 1 << 1,
-	GENHD_FL_NO_PART			= 1 << 2,
-};
-
-enum {
-	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
-	DISK_EVENT_EJECT_REQUEST		= 1 << 1, /* eject requested */
-};
-
-enum {
-	/* Poll even if events_poll_msecs is unset */
-	DISK_EVENT_FLAG_POLL			= 1 << 0,
-	/* Forward events to udev */
-	DISK_EVENT_FLAG_UEVENT			= 1 << 1,
-	/* Block event polling when open for exclusive write */
-	DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE	= 1 << 2,
-};
-
-struct disk_events;
-struct badblocks;
-
-struct blk_integrity {
-	const struct blk_integrity_profile	*profile;
-	unsigned char				flags;
-	unsigned char				tuple_size;
-	unsigned char				interval_exp;
-	unsigned char				tag_size;
-};
-
-struct gendisk {
-	/*
-	 * major/first_minor/minors should not be set by any new driver, the
-	 * block core will take care of allocating them automatically.
-	 */
-	int major;
-	int first_minor;
-	int minors;
-
-	char disk_name[DISK_NAME_LEN];	/* name of major driver */
-
-	unsigned short events;		/* supported events */
-	unsigned short event_flags;	/* flags related to event processing */
-
-	struct xarray part_tbl;
-	struct block_device *part0;
-
-	const struct block_device_operations *fops;
-	struct request_queue *queue;
-	void *private_data;
-
-	int flags;
-	unsigned long state;
-#define GD_NEED_PART_SCAN		0
-#define GD_READ_ONLY			1
-#define GD_DEAD				2
-#define GD_NATIVE_CAPACITY		3
-
-	struct mutex open_mutex;	/* open/close mutex */
-	unsigned open_partitions;	/* number of open partitions */
-
-	struct backing_dev_info	*bdi;
-	struct kobject *slave_dir;
-#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
-	struct list_head slave_bdevs;
-#endif
-	struct timer_rand_state *random;
-	atomic_t sync_io;		/* RAID */
-	struct disk_events *ev;
-#ifdef  CONFIG_BLK_DEV_INTEGRITY
-	struct kobject integrity_kobj;
-#endif	/* CONFIG_BLK_DEV_INTEGRITY */
-#if IS_ENABLED(CONFIG_CDROM)
-	struct cdrom_device_info *cdi;
-#endif
-	int node_id;
-	struct badblocks *bb;
-	struct lockdep_map lockdep_map;
-	u64 diskseq;
-};
-
-static inline bool disk_live(struct gendisk *disk)
-{
-	return !inode_unhashed(disk->part0->bd_inode);
-}
-
-/*
- * The gendisk is refcounted by the part0 block_device, and the bd_device
- * therein is also used for device model presentation in sysfs.
- */
-#define dev_to_disk(device) \
-	(dev_to_bdev(device)->bd_disk)
-#define disk_to_dev(disk) \
-	(&((disk)->part0->bd_device))
-
-#if IS_REACHABLE(CONFIG_CDROM)
-#define disk_to_cdi(disk)	((disk)->cdi)
-#else
-#define disk_to_cdi(disk)	NULL
-#endif
-
-static inline dev_t disk_devt(struct gendisk *disk)
-{
-	return MKDEV(disk->major, disk->first_minor);
-}
-
-void disk_uevent(struct gendisk *disk, enum kobject_action action);
-
-/* block/genhd.c */
-int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
-				 const struct attribute_group **groups);
-static inline int __must_check add_disk(struct gendisk *disk)
-{
-	return device_add_disk(NULL, disk, NULL);
-}
-extern void del_gendisk(struct gendisk *gp);
-
-void invalidate_disk(struct gendisk *disk);
-
-void set_disk_ro(struct gendisk *disk, bool read_only);
-
-static inline int get_disk_ro(struct gendisk *disk)
-{
-	return disk->part0->bd_read_only ||
-		test_bit(GD_READ_ONLY, &disk->state);
-}
-
-static inline int bdev_read_only(struct block_device *bdev)
-{
-	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
-}
-
-bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
-
-/* drivers/char/random.c */
-extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
-extern void rand_initialize_disk(struct gendisk *disk);
-
-static inline sector_t get_start_sect(struct block_device *bdev)
-{
-	return bdev->bd_start_sect;
-}
-
-static inline sector_t bdev_nr_sectors(struct block_device *bdev)
-{
-	return bdev->bd_nr_sectors;
-}
-
-static inline loff_t bdev_nr_bytes(struct block_device *bdev)
-{
-	return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT;
-}
-
-static inline sector_t get_capacity(struct gendisk *disk)
-{
-	return bdev_nr_sectors(disk->part0);
-}
-
-static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
-{
-	return bdev_nr_sectors(sb->s_bdev) >>
-		(sb->s_blocksize_bits - SECTOR_SHIFT);
-}
-
-int bdev_disk_changed(struct gendisk *disk, bool invalidate);
-
-struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
-		struct lock_class_key *lkclass);
-extern void put_disk(struct gendisk *disk);
-struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
-
-/**
- * blk_alloc_disk - allocate a gendisk structure
- * @node_id: numa node to allocate on
- *
- * Allocate and pre-initialize a gendisk structure for use with BIO based
- * drivers.
- *
- * Context: can sleep
- */
-#define blk_alloc_disk(node_id)						\
-({									\
-	static struct lock_class_key __key;				\
-									\
-	__blk_alloc_disk(node_id, &__key);				\
-})
-void blk_cleanup_disk(struct gendisk *disk);
-
-int __register_blkdev(unsigned int major, const char *name,
-		void (*probe)(dev_t devt));
-#define register_blkdev(major, name) \
-	__register_blkdev(major, name, NULL)
-void unregister_blkdev(unsigned int major, const char *name);
-
-bool bdev_check_media_change(struct block_device *bdev);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
-void set_capacity(struct gendisk *disk, sector_t size);
-
-#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
-int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-int bd_register_pending_holders(struct gendisk *disk);
-#else
-static inline int bd_link_disk_holder(struct block_device *bdev,
-				      struct gendisk *disk)
-{
-	return 0;
-}
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
-					 struct gendisk *disk)
-{
-}
-static inline int bd_register_pending_holders(struct gendisk *disk)
-{
-	return 0;
-}
-#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
-
-dev_t part_devt(struct gendisk *disk, u8 partno);
-void inc_diskseq(struct gendisk *disk);
-dev_t blk_lookup_devt(const char *name, int partno);
-void blk_request_module(dev_t devt);
-#ifdef CONFIG_BLOCK
-void printk_all_partitions(void);
-#else /* CONFIG_BLOCK */
-static inline void printk_all_partitions(void)
-{
-}
-#endif /* CONFIG_BLOCK */
-
-#endif /* _LINUX_GENHD_H */
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index 6f7949b2fd8dc..abeba356bc3f5 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_PART_STAT_H
 #define _LINUX_PART_STAT_H
 
-#include <linux/genhd.h>
+#include <linux/blkdev.h>
 #include <asm/local.h>
 
 struct disk_stats {
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 762b534978d95..7058e14ad5f70 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -8,7 +8,6 @@
 #include <linux/root_dev.h>
 #include <linux/security.h>
 #include <linux/delay.h>
-#include <linux/genhd.h>
 #include <linux/mount.h>
 #include <linux/device.h>
 #include <linux/init.h>
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e6af502c2fd77..a94044197c4a6 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -28,7 +28,6 @@
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
 #include <linux/ctype.h>
-#include <linux/genhd.h>
 #include <linux/ktime.h>
 #include <linux/security.h>
 #include <linux/secretmem.h>
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index ad10359030a4c..f1bd031295752 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -16,7 +16,6 @@
 #include <linux/file.h>
 #include <linux/delay.h>
 #include <linux/bitops.h>
-#include <linux/genhd.h>
 #include <linux/device.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 320ca80aacab5..02882526ba9a3 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -16,7 +16,6 @@
 #include <linux/parser.h>
 #include <linux/slab.h>
 #include <linux/rculist.h>
-#include <linux/genhd.h>
 #include <linux/seq_file.h>
 #include <linux/ima.h>
 
-- 
GitLab


From d5f68a42da7a4516e7503c281a54a58727f07dc3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:49 +0100
Subject: [PATCH 0181/1586] fs: remove mpage_alloc

open code mpage_alloc in it's two callers and simplify the results
because of the context:

 - __mpage_writepage always passes GFP_NOFS and can thus always sleep and
    will never get a NULL return from bio_alloc at all.
 - do_mpage_readpage can only get a non-sleeping context for readahead
   which never sets PF_MEMALLOC and thus doesn't need the retry loop
   either.

Both cases will never have __GFP_HIGH set.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/mpage.c | 35 ++++++-----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/fs/mpage.c b/fs/mpage.c
index 87f5cfef6caa7..06e95d777e940 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -66,29 +66,6 @@ static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
 	return NULL;
 }
 
-static struct bio *
-mpage_alloc(struct block_device *bdev,
-		sector_t first_sector, int nr_vecs,
-		gfp_t gfp_flags)
-{
-	struct bio *bio;
-
-	/* Restrict the given (page cache) mask for slab allocations */
-	gfp_flags &= GFP_KERNEL;
-	bio = bio_alloc(gfp_flags, nr_vecs);
-
-	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-		while (!bio && (nr_vecs /= 2))
-			bio = bio_alloc(gfp_flags, nr_vecs);
-	}
-
-	if (bio) {
-		bio_set_dev(bio, bdev);
-		bio->bi_iter.bi_sector = first_sector;
-	}
-	return bio;
-}
-
 /*
  * support function for mpage_readahead.  The fs supplied get_block might
  * return an up to date buffer.  This is used to map that buffer into
@@ -296,10 +273,11 @@ alloc_new:
 								page))
 				goto out;
 		}
-		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-					bio_max_segs(args->nr_pages), gfp);
+		args->bio = bio_alloc(gfp, bio_max_segs(args->nr_pages));
 		if (args->bio == NULL)
 			goto confused;
+		bio_set_dev(args->bio, bdev);
+		args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 	}
 
 	length = first_hole << blkbits;
@@ -608,10 +586,9 @@ alloc_new:
 								page, wbc))
 				goto out;
 		}
-		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-				BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH);
-		if (bio == NULL)
-			goto confused;
+		bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
+		bio_set_dev(bio, bdev);
+		bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 
 		wbc_init_bio(wbc, bio);
 		bio->bi_write_hint = inode->i_write_hint;
-- 
GitLab


From f0d911927b3c7cf5f9edb5941d0287144a602d0d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:50 +0100
Subject: [PATCH 0182/1586] nilfs2: remove nilfs_alloc_seg_bio

bio_alloc will never fail when it can sleep.  Remove the now simple
nilfs_alloc_seg_bio helper and open code it in the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/nilfs2/segbuf.c | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 43287b0d3e9b6..53b7c6d21cdd8 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -371,29 +371,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
 	return err;
 }
 
-/**
- * nilfs_alloc_seg_bio - allocate a new bio for writing log
- * @nilfs: nilfs object
- * @start: start block number of the bio
- * @nr_vecs: request size of page vector.
- *
- * Return Value: On success, pointer to the struct bio is returned.
- * On error, NULL is returned.
- */
-static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
-				       int nr_vecs)
-{
-	struct bio *bio;
-
-	bio = bio_alloc(GFP_NOIO, nr_vecs);
-	if (likely(bio)) {
-		bio_set_dev(bio, nilfs->ns_bdev);
-		bio->bi_iter.bi_sector =
-			start << (nilfs->ns_blocksize_bits - 9);
-	}
-	return bio;
-}
-
 static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
 				       struct nilfs_write_info *wi)
 {
@@ -414,10 +391,10 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
 	BUG_ON(wi->nr_vecs <= 0);
  repeat:
 	if (!wi->bio) {
-		wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
-					      wi->nr_vecs);
-		if (unlikely(!wi->bio))
-			return -ENOMEM;
+		wi->bio = bio_alloc(GFP_NOIO, wi->nr_vecs);
+		bio_set_dev(wi->bio, wi->nilfs->ns_bdev);
+		wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) <<
+			(wi->nilfs->ns_blocksize_bits - 9);
 	}
 
 	len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
-- 
GitLab


From 5d2ca2132f889bc2c90d6d07fc9fc129cfee8955 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:51 +0100
Subject: [PATCH 0183/1586] nfs/blocklayout: remove bl_alloc_init_bio

bio_alloc will never fail when it can sleep.  Remove the now simple
bl_alloc_init_bio helper and open code it in the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/nfs/blocklayout/blocklayout.c | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index fe860c5387476..38e063af7e98a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -115,23 +115,6 @@ bl_submit_bio(struct bio *bio)
 	return NULL;
 }
 
-static struct bio *bl_alloc_init_bio(unsigned int npg,
-		struct block_device *bdev, sector_t disk_sector,
-		bio_end_io_t end_io, struct parallel_io *par)
-{
-	struct bio *bio;
-
-	npg = bio_max_segs(npg);
-	bio = bio_alloc(GFP_NOIO, npg);
-	if (bio) {
-		bio->bi_iter.bi_sector = disk_sector;
-		bio_set_dev(bio, bdev);
-		bio->bi_end_io = end_io;
-		bio->bi_private = par;
-	}
-	return bio;
-}
-
 static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
 {
 	return offset >= map->start && offset < map->start + map->len;
@@ -171,10 +154,11 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
 
 retry:
 	if (!bio) {
-		bio = bl_alloc_init_bio(npg, map->bdev,
-				disk_addr >> SECTOR_SHIFT, end_io, par);
-		if (!bio)
-			return ERR_PTR(-ENOMEM);
+		bio = bio_alloc(GFP_NOIO, bio_max_segs(npg));
+		bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT;
+		bio_set_dev(bio, map->bdev);
+		bio->bi_end_io = end_io;
+		bio->bi_private = par;
 		bio_set_op_attrs(bio, rw, 0);
 	}
 	if (bio_add_page(bio, page, *len, offset) < *len) {
-- 
GitLab


From 39146b6f66ba5c107d5c5758a17f290846165b4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:52 +0100
Subject: [PATCH 0184/1586] ntfs3: remove ntfs_alloc_bio

bio_alloc will never fail if it is allowed to sleep, so there is no
need for this loop.  Also remove the __GFP_HIGH specifier as it doesn't
make sense here given that we'll always fall back to the mempool anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/ntfs3/fsntfs.c | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 4de9acb169689..4a255e21ecf5f 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1443,17 +1443,6 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
 	return err;
 }
 
-static inline struct bio *ntfs_alloc_bio(u32 nr_vecs)
-{
-	struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
-
-	if (!bio && (current->flags & PF_MEMALLOC)) {
-		while (!bio && (nr_vecs /= 2))
-			bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
-	}
-	return bio;
-}
-
 /*
  * ntfs_bio_pages - Read/write pages from/to disk.
  */
@@ -1496,11 +1485,7 @@ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
 		lbo = ((u64)lcn << cluster_bits) + off;
 		len = ((u64)clen << cluster_bits) - off;
 new_bio:
-		new = ntfs_alloc_bio(nr_pages - page_idx);
-		if (!new) {
-			err = -ENOMEM;
-			goto out;
-		}
+		new = bio_alloc(GFP_NOFS, nr_pages - page_idx);
 		if (bio) {
 			bio_chain(bio, new);
 			submit_bio(bio);
@@ -1599,11 +1584,7 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
 		lbo = (u64)lcn << cluster_bits;
 		len = (u64)clen << cluster_bits;
 new_bio:
-		new = ntfs_alloc_bio(BIO_MAX_VECS);
-		if (!new) {
-			err = -ENOMEM;
-			break;
-		}
+		new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
 		if (bio) {
 			bio_chain(bio, new);
 			submit_bio(bio);
-- 
GitLab


From 53db984e004c7116ce69e2f4a163664453336ae1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:53 +0100
Subject: [PATCH 0185/1586] dm: bio_alloc can't fail if it is allowed to sleep

Remove handling of NULL returns from sleeping bio_alloc calls given that
those can't fail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-crypt.c          |  5 +----
 drivers/md/dm-log-writes.c     | 18 ------------------
 drivers/md/dm-thin.c           | 25 +++++++++----------------
 drivers/md/dm-zoned-metadata.c | 11 -----------
 drivers/md/dm.c                |  2 --
 5 files changed, 10 insertions(+), 51 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d4ae31558826a..20abe3486aba1 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1673,9 +1673,6 @@ retry:
 		mutex_lock(&cc->bio_alloc_lock);
 
 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
-	if (!clone)
-		goto out;
-
 	clone_init(io, clone);
 
 	remaining_size = size;
@@ -1702,7 +1699,7 @@ retry:
 		bio_put(clone);
 		clone = NULL;
 	}
-out:
+
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_unlock(&cc->bio_alloc_lock);
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 139b09b06eda9..25f5e8d2d417b 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -218,10 +218,6 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
 	size_t ret;
 
 	bio = bio_alloc(GFP_KERNEL, 1);
-	if (!bio) {
-		DMERR("Couldn't alloc log bio");
-		goto error;
-	}
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = sector;
 	bio_set_dev(bio, lc->logdev->bdev);
@@ -276,11 +272,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
 		atomic_inc(&lc->io_blocks);
 
 		bio = bio_alloc(GFP_KERNEL, bio_pages);
-		if (!bio) {
-			DMERR("Couldn't alloc inline data bio");
-			goto error;
-		}
-
 		bio->bi_iter.bi_size = 0;
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, lc->logdev->bdev);
@@ -322,7 +313,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
 error_bio:
 	bio_free_pages(bio);
 	bio_put(bio);
-error:
 	put_io_block(lc);
 	return -1;
 }
@@ -364,10 +354,6 @@ static int log_one_block(struct log_writes_c *lc,
 
 	atomic_inc(&lc->io_blocks);
 	bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
-	if (!bio) {
-		DMERR("Couldn't alloc log bio");
-		goto error;
-	}
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = sector;
 	bio_set_dev(bio, lc->logdev->bdev);
@@ -387,10 +373,6 @@ static int log_one_block(struct log_writes_c *lc,
 			submit_bio(bio);
 			bio = bio_alloc(GFP_KERNEL,
 					bio_max_segs(block->vec_cnt - i));
-			if (!bio) {
-				DMERR("Couldn't alloc log bio");
-				goto error;
-			}
 			bio->bi_iter.bi_size = 0;
 			bio->bi_iter.bi_sector = sector;
 			bio_set_dev(bio, lc->logdev->bdev);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ec119d2422d5d..76a9c2e9aeeea 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1180,24 +1180,17 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 	}
 
 	discard_parent = bio_alloc(GFP_NOIO, 1);
-	if (!discard_parent) {
-		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
-		       dm_device_name(tc->pool->pool_md));
-		queue_passdown_pt2(m);
+	discard_parent->bi_end_io = passdown_endio;
+	discard_parent->bi_private = m;
 
-	} else {
-		discard_parent->bi_end_io = passdown_endio;
-		discard_parent->bi_private = m;
-
-		if (m->maybe_shared)
-			passdown_double_checking_shared_status(m, discard_parent);
-		else {
-			struct discard_op op;
+	if (m->maybe_shared)
+		passdown_double_checking_shared_status(m, discard_parent);
+	else {
+		struct discard_op op;
 
-			begin_discard(&op, tc, discard_parent);
-			r = issue_discard(&op, m->data_block, data_end);
-			end_discard(&op, r);
-		}
+		begin_discard(&op, tc, discard_parent);
+		r = issue_discard(&op, m->data_block, data_end);
+		end_discard(&op, r);
 	}
 }
 
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index ee4626d085574..5718b83cc7182 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -551,10 +551,6 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
 		return ERR_PTR(-ENOMEM);
 
 	bio = bio_alloc(GFP_NOIO, 1);
-	if (!bio) {
-		dmz_free_mblock(zmd, mblk);
-		return ERR_PTR(-ENOMEM);
-	}
 
 	spin_lock(&zmd->mblk_lock);
 
@@ -726,10 +722,6 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
 		return -EIO;
 
 	bio = bio_alloc(GFP_NOIO, 1);
-	if (!bio) {
-		set_bit(DMZ_META_ERROR, &mblk->state);
-		return -ENOMEM;
-	}
 
 	set_bit(DMZ_META_WRITING, &mblk->state);
 
@@ -760,9 +752,6 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
 		return -EIO;
 
 	bio = bio_alloc(GFP_NOIO, 1);
-	if (!bio)
-		return -ENOMEM;
-
 	bio->bi_iter.bi_sector = dmz_blk2sect(block);
 	bio_set_dev(bio, dev->bdev);
 	bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dcbd6d201619d..e431f72c10bf4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -520,8 +520,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	struct bio *clone;
 
 	clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
-	if (!clone)
-		return NULL;
 
 	tio = container_of(clone, struct dm_target_io, clone);
 	tio->inside_dm_io = true;
-- 
GitLab


From 3f868c09ea8f40f800c4c644c072d91c9eee0d71 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:54 +0100
Subject: [PATCH 0186/1586] dm-crypt: remove clone_init

Just open code it next to the bio allocations, which saves a few lines
of code, prepares for future changes and allows to remove the duplicate
bi_opf assignment for the bio_clone_fast case in kcryptd_io_read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-crypt.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 20abe3486aba1..3c5ecd35d3483 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -234,7 +234,7 @@ static volatile unsigned long dm_crypt_pages_per_client;
 #define DM_CRYPT_MEMORY_PERCENT			2
 #define DM_CRYPT_MIN_PAGES_PER_CLIENT		(BIO_MAX_VECS * 16)
 
-static void clone_init(struct dm_crypt_io *, struct bio *);
+static void crypt_endio(struct bio *clone);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
 static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
 					     struct scatterlist *sg);
@@ -1673,7 +1673,10 @@ retry:
 		mutex_lock(&cc->bio_alloc_lock);
 
 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
-	clone_init(io, clone);
+	clone->bi_private = io;
+	clone->bi_end_io = crypt_endio;
+	bio_set_dev(clone, cc->dev->bdev);
+	clone->bi_opf = io->base_bio->bi_opf;
 
 	remaining_size = size;
 
@@ -1826,16 +1829,6 @@ static void crypt_endio(struct bio *clone)
 	crypt_dec_pending(io);
 }
 
-static void clone_init(struct dm_crypt_io *io, struct bio *clone)
-{
-	struct crypt_config *cc = io->cc;
-
-	clone->bi_private = io;
-	clone->bi_end_io  = crypt_endio;
-	bio_set_dev(clone, cc->dev->bdev);
-	clone->bi_opf	  = io->base_bio->bi_opf;
-}
-
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 {
 	struct crypt_config *cc = io->cc;
@@ -1850,10 +1843,12 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 	clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
 	if (!clone)
 		return 1;
+	clone->bi_private = io;
+	clone->bi_end_io = crypt_endio;
+	bio_set_dev(clone, cc->dev->bdev);
 
 	crypt_inc_pending(io);
 
-	clone_init(io, clone);
 	clone->bi_iter.bi_sector = cc->start + io->sector;
 
 	if (dm_crypt_integrity_io_alloc(io, clone)) {
-- 
GitLab


From eba33b8ef1b90d8996eceb0569c06a4f784ef2b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:55 +0100
Subject: [PATCH 0187/1586] dm-snap: use blkdev_issue_flush instead of open
 coding it

Use blkdev_issue_flush, which uses an on-stack bio instead of an
opencoded version with a bio embedded into struct dm_snapshot.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-snap.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index dcf34c6b05ad3..0d336b5ec5714 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -141,11 +141,6 @@ struct dm_snapshot {
 	 * for them to be committed.
 	 */
 	struct bio_list bios_queued_during_merge;
-
-	/*
-	 * Flush data after merge.
-	 */
-	struct bio flush_bio;
 };
 
 /*
@@ -1127,17 +1122,6 @@ shut:
 
 static void error_bios(struct bio *bio);
 
-static int flush_data(struct dm_snapshot *s)
-{
-	struct bio *flush_bio = &s->flush_bio;
-
-	bio_reset(flush_bio);
-	bio_set_dev(flush_bio, s->origin->bdev);
-	flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-	return submit_bio_wait(flush_bio);
-}
-
 static void merge_callback(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_snapshot *s = context;
@@ -1151,7 +1135,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
 		goto shut;
 	}
 
-	if (flush_data(s) < 0) {
+	if (blkdev_issue_flush(s->origin->bdev) < 0) {
 		DMERR("Flush after merge failed: shutting down merge");
 		goto shut;
 	}
@@ -1340,7 +1324,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	s->first_merging_chunk = 0;
 	s->num_merging_chunks = 0;
 	bio_list_init(&s->bios_queued_during_merge);
-	bio_init(&s->flush_bio, NULL, 0);
 
 	/* Allocate hash table for COW data */
 	if (init_hash_tables(s)) {
@@ -1528,8 +1511,6 @@ static void snapshot_dtr(struct dm_target *ti)
 
 	dm_exception_store_destroy(s->store);
 
-	bio_uninit(&s->flush_bio);
-
 	dm_put_device(ti, s->cow);
 
 	dm_put_device(ti, s->origin);
-- 
GitLab


From 28d7d128aad5cd2178b158900d58365d1fd3de94 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:56 +0100
Subject: [PATCH 0188/1586] dm-thin: use blkdev_issue_flush instead of open
 coding it

Use blkdev_issue_flush, which uses an on-stack bio instead of an
opencoded version with a bio embedded into struct pool.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-thin.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 76a9c2e9aeeea..411a3f56ed90c 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -282,8 +282,6 @@ struct pool {
 	struct dm_bio_prison_cell **cell_sort_array;
 
 	mempool_t mapping_pool;
-
-	struct bio flush_bio;
 };
 
 static void metadata_operation_failed(struct pool *pool, const char *op, int r);
@@ -2906,7 +2904,6 @@ static void __pool_destroy(struct pool *pool)
 	if (pool->next_mapping)
 		mempool_free(pool->next_mapping, &pool->mapping_pool);
 	mempool_exit(&pool->mapping_pool);
-	bio_uninit(&pool->flush_bio);
 	dm_deferred_set_destroy(pool->shared_read_ds);
 	dm_deferred_set_destroy(pool->all_io_ds);
 	kfree(pool);
@@ -2987,7 +2984,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 	pool->low_water_triggered = false;
 	pool->suspended = true;
 	pool->out_of_data_space = false;
-	bio_init(&pool->flush_bio, NULL, 0);
 
 	pool->shared_read_ds = dm_deferred_set_create();
 	if (!pool->shared_read_ds) {
@@ -3194,13 +3190,8 @@ static void metadata_low_callback(void *context)
 static int metadata_pre_commit_callback(void *context)
 {
 	struct pool *pool = context;
-	struct bio *flush_bio = &pool->flush_bio;
-
-	bio_reset(flush_bio);
-	bio_set_dev(flush_bio, pool->data_dev);
-	flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
-	return submit_bio_wait(flush_bio);
+	return blkdev_issue_flush(pool->data_dev);
 }
 
 static sector_t get_dev_size(struct block_device *bdev)
-- 
GitLab


From 4b1dc86d1857f1007865cab759f2285280692eee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:57 +0100
Subject: [PATCH 0189/1586] drbd: bio_alloc can't fail if it is allow to sleep

Remove handling of NULL returns from sleeping bio_alloc calls given that
those can't fail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6df2539e215ba..fb59b263deeef 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1281,14 +1281,13 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
 {
 	struct bio *bio = bio_alloc(GFP_NOIO, 0);
 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
-	if (!bio || !octx) {
-		drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
+
+	if (!octx) {
+		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
 		/* FIXME: what else can I do now?  disconnecting or detaching
 		 * really does not help to improve the state of the world, either.
 		 */
-		kfree(octx);
-		if (bio)
-			bio_put(bio);
+		bio_put(bio);
 
 		ctx->error = -ENOMEM;
 		put_ldev(device);
@@ -1646,7 +1645,6 @@ int drbd_submit_peer_request(struct drbd_device *device,
 	unsigned data_size = peer_req->i.size;
 	unsigned n_bios = 0;
 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
-	int err = -ENOMEM;
 
 	/* TRIM/DISCARD: for now, always use the helper function
 	 * blkdev_issue_zeroout(..., discard=true).
@@ -1688,10 +1686,6 @@ int drbd_submit_peer_request(struct drbd_device *device,
 	 */
 next_bio:
 	bio = bio_alloc(GFP_NOIO, nr_pages);
-	if (!bio) {
-		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
-		goto fail;
-	}
 	/* > peer_req->i.sector, unless this is the first bio */
 	bio->bi_iter.bi_sector = sector;
 	bio_set_dev(bio, device->ldev->backing_bdev);
@@ -1726,14 +1720,6 @@ next_bio:
 		drbd_submit_bio_noacct(device, fault_type, bio);
 	} while (bios);
 	return 0;
-
-fail:
-	while (bios) {
-		bio = bios;
-		bios = bios->bi_next;
-		bio_put(bio);
-	}
-	return err;
 }
 
 static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
-- 
GitLab


From 1fe0640ff94feae6d21417e2f4f2829b882274b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:58 +0100
Subject: [PATCH 0190/1586] rnbd-srv: simplify bio mapping in process_rdma

The memory mapped in process_rdma is contiguous, so there is no need
to loop over bio_add_page.  Remove rnbd_bio_map_kern and just open code
the bio allocation and mapping in the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jack Wang <jinpu.wang@ionons.com>
Tested-by: Jack Wang <jinpu.wang@ionos.com>
Link: https://lore.kernel.org/r/20220124091107.642561-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-srv-dev.c | 57 -------------------------------
 drivers/block/rnbd/rnbd-srv-dev.h |  5 ---
 drivers/block/rnbd/rnbd-srv.c     | 23 +++++++++----
 3 files changed, 16 insertions(+), 69 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
index b241a099aeae2..98d3e591a0885 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.c
+++ b/drivers/block/rnbd/rnbd-srv-dev.c
@@ -44,60 +44,3 @@ void rnbd_dev_close(struct rnbd_dev *dev)
 	blkdev_put(dev->bdev, dev->blk_open_flags);
 	kfree(dev);
 }
-
-void rnbd_dev_bi_end_io(struct bio *bio)
-{
-	struct rnbd_dev_blk_io *io = bio->bi_private;
-
-	rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
-	bio_put(bio);
-}
-
-/**
- *	rnbd_bio_map_kern	-	map kernel address into bio
- *	@data: pointer to buffer to map
- *	@bs: bio_set to use.
- *	@len: length in bytes
- *	@gfp_mask: allocation flags for bio allocation
- *
- *	Map the kernel address into a bio suitable for io to a block
- *	device. Returns an error pointer in case of error.
- */
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
-			      unsigned int len, gfp_t gfp_mask)
-{
-	unsigned long kaddr = (unsigned long)data;
-	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = kaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
-	int offset, i;
-	struct bio *bio;
-
-	bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
-	if (!bio)
-		return ERR_PTR(-ENOMEM);
-
-	offset = offset_in_page(kaddr);
-	for (i = 0; i < nr_pages; i++) {
-		unsigned int bytes = PAGE_SIZE - offset;
-
-		if (len <= 0)
-			break;
-
-		if (bytes > len)
-			bytes = len;
-
-		if (bio_add_page(bio, virt_to_page(data), bytes,
-				    offset) < bytes) {
-			/* we don't support partial mappings */
-			bio_put(bio);
-			return ERR_PTR(-EINVAL);
-		}
-
-		data += bytes;
-		len -= bytes;
-		offset = 0;
-	}
-
-	return bio;
-}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
index 0eb23850afb95..1a14ece0be726 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ b/drivers/block/rnbd/rnbd-srv-dev.h
@@ -41,11 +41,6 @@ void rnbd_dev_close(struct rnbd_dev *dev);
 
 void rnbd_endio(void *priv, int error);
 
-void rnbd_dev_bi_end_io(struct bio *bio);
-
-struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
-			      unsigned int len, gfp_t gfp_mask);
-
 static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
 {
 	return queue_max_segments(bdev_get_queue(dev->bdev));
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 1ee808fc600cf..6d228af1dcc35 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -114,6 +114,14 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
 	return sess_dev;
 }
 
+static void rnbd_dev_bi_end_io(struct bio *bio)
+{
+	struct rnbd_dev_blk_io *io = bio->bi_private;
+
+	rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
+	bio_put(bio);
+}
+
 static int process_rdma(struct rnbd_srv_session *srv_sess,
 			struct rtrs_srv_op *id, void *data, u32 datalen,
 			const void *usr, size_t usrlen)
@@ -144,12 +152,12 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 	priv->sess_dev = sess_dev;
 	priv->id = id;
 
-	/* Generate bio with pages pointing to the rdma buffer */
-	bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		err = PTR_ERR(bio);
-		rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err);
-		goto sess_dev_put;
+	bio = bio_alloc_bioset(GFP_KERNEL, 1, sess_dev->rnbd_dev->ibd_bio_set);
+	if (bio_add_page(bio, virt_to_page(data), datalen,
+			offset_in_page(data)) != datalen) {
+		rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
+		err = -EINVAL;
+		goto bio_put;
 	}
 
 	io = container_of(bio, struct rnbd_dev_blk_io, bio);
@@ -170,7 +178,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 
 	return 0;
 
-sess_dev_put:
+bio_put:
+	bio_put(bio);
 	rnbd_put_sess_dev(sess_dev);
 err:
 	kfree(priv);
-- 
GitLab


From d7b78de2b1552e3e7ce3a069f075cc2729aa5c34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:10:59 +0100
Subject: [PATCH 0191/1586] rnbd-srv: remove struct rnbd_dev_blk_io

Only the priv field of rnbd_dev_blk_io is used, so store the value of
that in bio->bi_private directly and remove the entire bio_set overhead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Link: https://lore.kernel.org/r/20220124091107.642561-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-srv-dev.c |  4 +---
 drivers/block/rnbd/rnbd-srv-dev.h | 13 ++-----------
 drivers/block/rnbd/rnbd-srv.c     | 27 ++++-----------------------
 drivers/block/rnbd/rnbd-srv.h     |  1 -
 4 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
index 98d3e591a0885..c5d0a03911659 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.c
+++ b/drivers/block/rnbd/rnbd-srv-dev.c
@@ -12,8 +12,7 @@
 #include "rnbd-srv-dev.h"
 #include "rnbd-log.h"
 
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
-			       struct bio_set *bs)
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags)
 {
 	struct rnbd_dev *dev;
 	int ret;
@@ -30,7 +29,6 @@ struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
 
 	dev->blk_open_flags = flags;
 	bdevname(dev->bdev, dev->name);
-	dev->ibd_bio_set = bs;
 
 	return dev;
 
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
index 1a14ece0be726..2c3df02b5e8ec 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ b/drivers/block/rnbd/rnbd-srv-dev.h
@@ -14,25 +14,16 @@
 
 struct rnbd_dev {
 	struct block_device	*bdev;
-	struct bio_set		*ibd_bio_set;
 	fmode_t			blk_open_flags;
 	char			name[BDEVNAME_SIZE];
 };
 
-struct rnbd_dev_blk_io {
-	struct rnbd_dev *dev;
-	void		 *priv;
-	/* have to be last member for front_pad usage of bioset_init */
-	struct bio	bio;
-};
-
 /**
  * rnbd_dev_open() - Open a device
+ * @path:	path to open
  * @flags:	open flags
- * @bs:		bio_set to use during block io,
  */
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
-			       struct bio_set *bs);
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags);
 
 /**
  * rnbd_dev_close() - Close a device
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 6d228af1dcc35..ff9b389976078 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -116,9 +116,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
 
 static void rnbd_dev_bi_end_io(struct bio *bio)
 {
-	struct rnbd_dev_blk_io *io = bio->bi_private;
-
-	rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
+	rnbd_endio(bio->bi_private, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }
 
@@ -131,7 +129,6 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 	struct rnbd_srv_sess_dev *sess_dev;
 	u32 dev_id;
 	int err;
-	struct rnbd_dev_blk_io *io;
 	struct bio *bio;
 	short prio;
 
@@ -152,7 +149,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 	priv->sess_dev = sess_dev;
 	priv->id = id;
 
-	bio = bio_alloc_bioset(GFP_KERNEL, 1, sess_dev->rnbd_dev->ibd_bio_set);
+	bio = bio_alloc(GFP_KERNEL, 1);
 	if (bio_add_page(bio, virt_to_page(data), datalen,
 			offset_in_page(data)) != datalen) {
 		rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
@@ -160,12 +157,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 		goto bio_put;
 	}
 
-	io = container_of(bio, struct rnbd_dev_blk_io, bio);
-	io->dev = sess_dev->rnbd_dev;
-	io->priv = priv;
-
 	bio->bi_end_io = rnbd_dev_bi_end_io;
-	bio->bi_private = io;
+	bio->bi_private = priv;
 	bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
 	bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
 	bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
@@ -260,7 +253,6 @@ static void destroy_sess(struct rnbd_srv_session *srv_sess)
 
 out:
 	xa_destroy(&srv_sess->index_idr);
-	bioset_exit(&srv_sess->sess_bio_set);
 
 	pr_info("RTRS Session %s disconnected\n", srv_sess->sessname);
 
@@ -289,16 +281,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
 		return -ENOMEM;
 
 	srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
-	err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth,
-			  offsetof(struct rnbd_dev_blk_io, bio),
-			  BIOSET_NEED_BVECS);
-	if (err) {
-		pr_err("Allocating srv_session for path %s failed\n",
-		       pathname);
-		kfree(srv_sess);
-		return err;
-	}
-
 	xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
 	INIT_LIST_HEAD(&srv_sess->sess_dev_list);
 	mutex_init(&srv_sess->lock);
@@ -747,8 +729,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
 		goto reject;
 	}
 
-	rnbd_dev = rnbd_dev_open(full_path, open_flags,
-				 &srv_sess->sess_bio_set);
+	rnbd_dev = rnbd_dev_open(full_path, open_flags);
 	if (IS_ERR(rnbd_dev)) {
 		pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n",
 		       full_path, srv_sess->sessname, PTR_ERR(rnbd_dev));
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index e5604bce123ab..be2ae486d407e 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -23,7 +23,6 @@ struct rnbd_srv_session {
 	struct rtrs_srv_sess	*rtrs;
 	char			sessname[NAME_MAX];
 	int			queue_depth;
-	struct bio_set		sess_bio_set;
 
 	struct xarray		index_idr;
 	/* List of struct rnbd_srv_sess_dev */
-- 
GitLab


From 7d8d0c658d48705fca35238a8ff601b7c5cbc0de Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:00 +0100
Subject: [PATCH 0192/1586] xen-blkback: bio_alloc can't fail if it is allow to
 sleep

Remove handling of NULL returns from sleeping bio_alloc calls given that
those can't fail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkback/blkback.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 14e452896d04c..6bb2ad7692065 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1327,9 +1327,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 			bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
-			if (unlikely(bio == NULL))
-				goto fail_put_bio;
-
 			biolist[nbio++] = bio;
 			bio_set_dev(bio, preq.bdev);
 			bio->bi_private = pending_req;
@@ -1346,9 +1343,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 		BUG_ON(operation_flags != REQ_PREFLUSH);
 
 		bio = bio_alloc(GFP_KERNEL, 0);
-		if (unlikely(bio == NULL))
-			goto fail_put_bio;
-
 		biolist[nbio++] = bio;
 		bio_set_dev(bio, preq.bdev);
 		bio->bi_private = pending_req;
@@ -1381,14 +1375,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	free_req(ring, pending_req);
 	msleep(1); /* back off a bit */
 	return -EIO;
-
- fail_put_bio:
-	for (i = 0; i < nbio; i++)
-		bio_put(biolist[i]);
-	atomic_set(&pending_req->pendcnt, 1);
-	__end_block_io_op(pending_req, BLK_STS_RESOURCE);
-	msleep(1); /* back off a bit */
-	return -EIO;
 }
 
 
-- 
GitLab


From 3b005bf6acf009abd700e2c652c86e5c209cf63d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:01 +0100
Subject: [PATCH 0193/1586] block: move blk_next_bio to bio.c

Keep blk_next_bio next to the core bio infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c     | 13 +++++++++++++
 block/blk-lib.c | 13 -------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 4312a8085396b..1536579ed490a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -344,6 +344,19 @@ void bio_chain(struct bio *bio, struct bio *parent)
 }
 EXPORT_SYMBOL(bio_chain);
 
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
+{
+	struct bio *new = bio_alloc(gfp, nr_pages);
+
+	if (bio) {
+		bio_chain(bio, new);
+		submit_bio(bio);
+	}
+
+	return new;
+}
+EXPORT_SYMBOL_GPL(blk_next_bio);
+
 static void bio_alloc_rescue(struct work_struct *work)
 {
 	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f09beadcbe30..9245b300ef73e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,19 +10,6 @@
 
 #include "blk.h"
 
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
-{
-	struct bio *new = bio_alloc(gfp, nr_pages);
-
-	if (bio) {
-		bio_chain(bio, new);
-		submit_bio(bio);
-	}
-
-	return new;
-}
-EXPORT_SYMBOL_GPL(blk_next_bio);
-
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, int flags,
 		struct bio **biop)
-- 
GitLab


From 0a3140ea0fae377c9eaa031b7db1670ae422ed47 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Mon, 24 Jan 2022 10:11:02 +0100
Subject: [PATCH 0194/1586] block: pass a block_device and opf to blk_next_bio

All callers need to set the block_device and operation, so lift that into
the common code.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124091107.642561-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  6 +++++-
 block/blk-lib.c           | 19 +++++--------------
 block/blk-zoned.c         |  9 +++------
 block/blk.h               |  2 --
 drivers/nvme/target/zns.c |  6 +++---
 include/linux/bio.h       |  3 ++-
 6 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 1536579ed490a..a0166f29a05c3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -344,10 +344,14 @@ void bio_chain(struct bio *bio, struct bio *parent)
 }
 EXPORT_SYMBOL(bio_chain);
 
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
+struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
+		unsigned int nr_pages, unsigned int opf, gfp_t gfp)
 {
 	struct bio *new = bio_alloc(gfp, nr_pages);
 
+	bio_set_dev(new, bdev);
+	new->bi_opf = opf;
+
 	if (bio) {
 		bio_chain(bio, new);
 		submit_bio(bio);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9245b300ef73e..1b8ced45e4e55 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -82,11 +82,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 		WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
 
-		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio_set_dev(bio, bdev);
-		bio_set_op_attrs(bio, op, 0);
-
 		bio->bi_iter.bi_size = req_sects << 9;
 		sector += req_sects;
 		nr_sects -= req_sects;
@@ -176,14 +173,12 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	max_write_same_sectors = bio_allowed_max_sectors(q);
 
 	while (nr_sects) {
-		bio = blk_next_bio(bio, 1, gfp_mask);
+		bio = blk_next_bio(bio, bdev, 1, REQ_OP_WRITE_SAME, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio_set_dev(bio, bdev);
 		bio->bi_vcnt = 1;
 		bio->bi_io_vec->bv_page = page;
 		bio->bi_io_vec->bv_offset = 0;
 		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
-		bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
 
 		if (nr_sects > max_write_same_sectors) {
 			bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -252,10 +247,8 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 		return -EOPNOTSUPP;
 
 	while (nr_sects) {
-		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = REQ_OP_WRITE_ZEROES;
 		if (flags & BLKDEV_ZERO_NOUNMAP)
 			bio->bi_opf |= REQ_NOUNMAP;
 
@@ -303,11 +296,9 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
 		return -EPERM;
 
 	while (nr_sects != 0) {
-		bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
-				   gfp_mask);
+		bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
+				   REQ_OP_WRITE, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio_set_dev(bio, bdev);
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 774ecc598bee2..5ab755d792c81 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -215,9 +215,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
 			continue;
 		}
 
-		bio = blk_next_bio(bio, 0, gfp_mask);
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
+		bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
+				   gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		sector += zone_sectors;
 
@@ -306,9 +305,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	}
 
 	while (sector < end_sector) {
-		bio = blk_next_bio(bio, 0, gfp_mask);
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = op | REQ_SYNC;
+		bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		sector += zone_sectors;
 
diff --git a/block/blk.h b/block/blk.h
index 800c5ae387a0b..abb663a2a147b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -406,8 +406,6 @@ extern int blk_iolatency_init(struct request_queue *q);
 static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
 #endif
 
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
-
 #ifdef CONFIG_BLK_DEV_ZONED
 void blk_queue_free_zone_bitmaps(struct request_queue *q);
 void blk_queue_clear_zone_settings(struct request_queue *q);
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 46bc30fe85d2b..247de74247fab 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -412,10 +412,10 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
 
 	while (sector < get_capacity(bdev->bd_disk)) {
 		if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) {
-			bio = blk_next_bio(bio, 0, GFP_KERNEL);
-			bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC;
+			bio = blk_next_bio(bio, bdev, 0,
+				zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC,
+				GFP_KERNEL);
 			bio->bi_iter.bi_sector = sector;
-			bio_set_dev(bio, bdev);
 			/* This may take a while, so be nice to others */
 			cond_resched();
 		}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 117d7f248ac96..edeae54074ede 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -790,6 +790,7 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 		bio->bi_opf |= REQ_NOWAIT;
 }
 
-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
+		unsigned int nr_pages, unsigned int opf, gfp_t gfp);
 
 #endif /* __LINUX_BIO_H */
-- 
GitLab


From 609be1066731fea86436f5f91022f82e592ab456 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:03 +0100
Subject: [PATCH 0195/1586] block: pass a block_device and opf to
 bio_alloc_bioset

Pass the block_device and operation that we plan to use this bio for to
bio_alloc_bioset to optimize the assigment.  NULL/0 can be passed, both
for the passthrough case on a raw request_queue and to temporarily avoid
refactoring some nasty code.

Also move the gfp_mask argument after the nr_vecs argument for a much
more logical calling convention matching what most of the kernel does.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                         | 30 +++++++++++++++++------------
 block/bounce.c                      |  6 ++----
 drivers/block/drbd/drbd_actlog.c    |  5 ++---
 drivers/block/drbd/drbd_bitmap.c    |  7 +++----
 drivers/md/bcache/request.c         | 12 +++++-------
 drivers/md/dm-crypt.c               |  5 ++---
 drivers/md/dm-io.c                  |  5 ++---
 drivers/md/dm-writecache.c          |  7 ++++---
 drivers/md/dm.c                     |  5 +++--
 drivers/md/md.c                     | 16 +++++++--------
 drivers/md/raid1.c                  |  3 ++-
 drivers/md/raid10.c                 |  6 ++----
 drivers/md/raid5-cache.c            |  8 ++++----
 drivers/md/raid5-ppl.c              | 11 +++++------
 drivers/target/target_core_iblock.c |  6 ++----
 fs/btrfs/extent_io.c                |  2 +-
 fs/f2fs/data.c                      |  7 +++----
 fs/iomap/buffered-io.c              |  6 +++---
 include/linux/bio.h                 |  7 ++++---
 19 files changed, 75 insertions(+), 79 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index a0166f29a05c3..9afc0c2aca6e4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -417,8 +417,10 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 
 /**
  * bio_alloc_bioset - allocate a bio for I/O
+ * @bdev:	block device to allocate the bio for (can be %NULL)
+ * @nr_vecs:	number of bvecs to pre-allocate
+ * @opf:	operation and flags for bio
  * @gfp_mask:   the GFP_* mask given to the slab allocator
- * @nr_iovecs:	number of iovecs to pre-allocate
  * @bs:		the bio_set to allocate from.
  *
  * Allocate a bio from the mempools in @bs.
@@ -447,15 +449,16 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  *
  * Returns: Pointer to new bio on success, NULL on failure.
  */
-struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
+struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
+			     unsigned int opf, gfp_t gfp_mask,
 			     struct bio_set *bs)
 {
 	gfp_t saved_gfp = gfp_mask;
 	struct bio *bio;
 	void *p;
 
-	/* should not use nobvec bioset for nr_iovecs > 0 */
-	if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
+	/* should not use nobvec bioset for nr_vecs > 0 */
+	if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
 		return NULL;
 
 	/*
@@ -492,26 +495,29 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
 		return NULL;
 
 	bio = p + bs->front_pad;
-	if (nr_iovecs > BIO_INLINE_VECS) {
+	if (nr_vecs > BIO_INLINE_VECS) {
 		struct bio_vec *bvl = NULL;
 
-		bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+		bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
 		if (!bvl && gfp_mask != saved_gfp) {
 			punt_bios_to_rescuer(bs);
 			gfp_mask = saved_gfp;
-			bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+			bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
 		}
 		if (unlikely(!bvl))
 			goto err_free;
 
-		bio_init(bio, bvl, nr_iovecs);
-	} else if (nr_iovecs) {
+		bio_init(bio, bvl, nr_vecs);
+	} else if (nr_vecs) {
 		bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
 	} else {
 		bio_init(bio, NULL, 0);
 	}
 
 	bio->bi_pool = bs;
+	if (bdev)
+		bio_set_dev(bio, bdev);
+	bio->bi_opf = opf;
 	return bio;
 
 err_free:
@@ -767,7 +773,7 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 {
 	struct bio *b;
 
-	b = bio_alloc_bioset(gfp_mask, 0, bs);
+	b = bio_alloc_bioset(NULL, 0, 0, gfp_mask, bs);
 	if (!b)
 		return NULL;
 
@@ -1743,7 +1749,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
 	struct bio *bio;
 
 	if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
-		return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+		return bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs);
 
 	cache = per_cpu_ptr(bs->cache, get_cpu());
 	if (cache->free_list) {
@@ -1757,7 +1763,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
 		return bio;
 	}
 	put_cpu();
-	bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
+	bio = bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs);
 	bio_set_flag(bio, BIO_PERCPU_CACHE);
 	return bio;
 }
diff --git a/block/bounce.c b/block/bounce.c
index 7af1a72835b99..330ddde25b460 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -165,12 +165,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
 	 *    asking for trouble and would force extra work on
 	 *    __bio_clone_fast() anyways.
 	 */
-	bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
-			       &bounce_bio_set);
-	bio->bi_bdev		= bio_src->bi_bdev;
+	bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src),
+			       bio_src->bi_opf, GFP_NOIO, &bounce_bio_set);
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
-	bio->bi_opf		= bio_src->bi_opf;
 	bio->bi_ioprio		= bio_src->bi_ioprio;
 	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 72cf7603d51fc..f5bcded3640da 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -138,15 +138,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 		op_flags |= REQ_FUA | REQ_PREFLUSH;
 	op_flags |= REQ_SYNC;
 
-	bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
-	bio_set_dev(bio, bdev->md_bdev);
+	bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO,
+			       &drbd_md_io_bio_set);
 	bio->bi_iter.bi_sector = sector;
 	err = -EIO;
 	if (bio_add_page(bio, device->md_io.page, size, 0) != size)
 		goto out;
 	bio->bi_private = device;
 	bio->bi_end_io = drbd_md_endio;
-	bio_set_op_attrs(bio, op, op_flags);
 
 	if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
 		/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index c1f816f896a89..df25eecf80af0 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -976,12 +976,13 @@ static void drbd_bm_endio(struct bio *bio)
 
 static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
 {
-	struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
 	struct drbd_device *device = ctx->device;
+	unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
+	struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
+					   GFP_NOIO, &drbd_md_io_bio_set);
 	struct drbd_bitmap *b = device->bitmap;
 	struct page *page;
 	unsigned int len;
-	unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
 
 	sector_t on_disk_sector =
 		device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1006,14 +1007,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
 		bm_store_page_idx(page, page_nr);
 	} else
 		page = b->bm_pages[page_nr];
-	bio_set_dev(bio, device->ldev->md_bdev);
 	bio->bi_iter.bi_sector = on_disk_sector;
 	/* bio_add_page of a single page to an empty bio will always succeed,
 	 * according to api.  Do we want to assert that? */
 	bio_add_page(bio, page, len, 0);
 	bio->bi_private = ctx;
 	bio->bi_end_io = drbd_bm_endio;
-	bio_set_op_attrs(bio, op, 0);
 
 	if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
 		bio_io_error(bio);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index d15aae6c51c13..c4b7e434de8ac 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -913,14 +913,13 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	/* btree_search_recurse()'s btree iterator is no good anymore */
 	ret = miss == bio ? MAP_DONE : -EINTR;
 
-	cache_bio = bio_alloc_bioset(GFP_NOWAIT,
+	cache_bio = bio_alloc_bioset(miss->bi_bdev,
 			DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
-			&dc->disk.bio_split);
+			0, GFP_NOWAIT, &dc->disk.bio_split);
 	if (!cache_bio)
 		goto out_submit;
 
 	cache_bio->bi_iter.bi_sector	= miss->bi_iter.bi_sector;
-	bio_copy_dev(cache_bio, miss);
 	cache_bio->bi_iter.bi_size	= s->insert_bio_sectors << 9;
 
 	cache_bio->bi_end_io	= backing_request_endio;
@@ -1025,16 +1024,15 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 			 */
 			struct bio *flush;
 
-			flush = bio_alloc_bioset(GFP_NOIO, 0,
-						 &dc->disk.bio_split);
+			flush = bio_alloc_bioset(bio->bi_bdev, 0,
+						 REQ_OP_WRITE | REQ_PREFLUSH,
+						 GFP_NOIO, &dc->disk.bio_split);
 			if (!flush) {
 				s->iop.status = BLK_STS_RESOURCE;
 				goto insert_data;
 			}
-			bio_copy_dev(flush, bio);
 			flush->bi_end_io = backing_request_endio;
 			flush->bi_private = cl;
-			flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 			/* I/O request sent to backing device */
 			closure_bio_submit(s->iop.c, flush, cl);
 		}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3c5ecd35d3483..f7e4435b7439a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1672,11 +1672,10 @@ retry:
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_lock(&cc->bio_alloc_lock);
 
-	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
+	clone = bio_alloc_bioset(cc->dev->bdev, nr_iovecs, io->base_bio->bi_opf,
+				 GFP_NOIO, &cc->bs);
 	clone->bi_private = io;
 	clone->bi_end_io = crypt_endio;
-	bio_set_dev(clone, cc->dev->bdev);
-	clone->bi_opf = io->base_bio->bi_opf;
 
 	remaining_size = size;
 
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2d3cda0acacb6..23e038f8dc845 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -345,11 +345,10 @@ static void do_region(int op, int op_flags, unsigned region,
 						(PAGE_SIZE >> SECTOR_SHIFT)));
 		}
 
-		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
+		bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags,
+				       GFP_NOIO, &io->client->bios);
 		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
-		bio_set_dev(bio, where->bdev);
 		bio->bi_end_io = endio;
-		bio_set_op_attrs(bio, op, op_flags);
 		store_io_and_region_in_bio(bio, io, region);
 
 		if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 4f31591d2d25e..5630b470ba429 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1821,11 +1821,11 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
 
 		max_pages = e->wc_list_contiguous;
 
-		bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set);
+		bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE,
+				       GFP_NOIO, &wc->bio_set);
 		wb = container_of(bio, struct writeback_struct, bio);
 		wb->wc = wc;
 		bio->bi_end_io = writecache_writeback_endio;
-		bio_set_dev(bio, wc->dev->bdev);
 		bio->bi_iter.bi_sector = read_original_sector(wc, e);
 		if (max_pages <= WB_LIST_INLINE ||
 		    unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
@@ -1852,7 +1852,8 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
 			wb->wc_list[wb->wc_list_n++] = f;
 			e = f;
 		}
-		bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
+		if (WC_MODE_FUA(wc))
+			bio->bi_opf |= REQ_FUA;
 		if (writecache_has_error(wc)) {
 			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e431f72c10bf4..069e29013b6be 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -519,7 +519,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	struct dm_target_io *tio;
 	struct bio *clone;
 
-	clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
+	clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs);
 
 	tio = container_of(clone, struct dm_target_io, clone);
 	tio->inside_dm_io = true;
@@ -552,7 +552,8 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
 		/* the dm_target_io embedded in ci->io is available */
 		tio = &ci->io->tio;
 	} else {
-		struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
+		struct bio *clone = bio_alloc_bioset(NULL, 0, 0, gfp_mask,
+						     &ci->io->md->bs);
 		if (!clone)
 			return NULL;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5881d05a76ebc..40fc1f7e65c5d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -562,11 +562,11 @@ static void submit_flushes(struct work_struct *ws)
 			atomic_inc(&rdev->nr_pending);
 			atomic_inc(&rdev->nr_pending);
 			rcu_read_unlock();
-			bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
+			bi = bio_alloc_bioset(rdev->bdev, 0,
+					      REQ_OP_WRITE | REQ_PREFLUSH,
+					      GFP_NOIO, &mddev->bio_set);
 			bi->bi_end_io = md_end_flush;
 			bi->bi_private = rdev;
-			bio_set_dev(bi, rdev->bdev);
-			bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 			atomic_inc(&mddev->flush_pending);
 			submit_bio(bi);
 			rcu_read_lock();
@@ -955,7 +955,6 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	 * If an error occurred, call md_error
 	 */
 	struct bio *bio;
-	int ff = 0;
 
 	if (!page)
 		return;
@@ -963,11 +962,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	if (test_bit(Faulty, &rdev->flags))
 		return;
 
-	bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
+	bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
+			       1,
+			       REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA,
+			       GFP_NOIO, &mddev->sync_set);
 
 	atomic_inc(&rdev->nr_pending);
 
-	bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
 	bio->bi_iter.bi_sector = sector;
 	bio_add_page(bio, page, size, 0);
 	bio->bi_private = rdev;
@@ -976,8 +977,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
 	    test_bit(FailFast, &rdev->flags) &&
 	    !test_bit(LastDev, &rdev->flags))
-		ff = MD_FAILFAST;
-	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
+		bio->bi_opf |= MD_FAILFAST;
 
 	atomic_inc(&mddev->pending_writes);
 	submit_bio(bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e2d8acb1e9881..43276f8fdc815 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1126,7 +1126,8 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
 	int i = 0;
 	struct bio *behind_bio = NULL;
 
-	behind_bio = bio_alloc_bioset(GFP_NOIO, vcnt, &r1_bio->mddev->bio_set);
+	behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO,
+				      &r1_bio->mddev->bio_set);
 	if (!behind_bio)
 		return;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2b969f70a31fb..cb7c58050708e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4892,14 +4892,12 @@ read_more:
 		return sectors_done;
 	}
 
-	read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set);
-
-	bio_set_dev(read_bio, rdev->bdev);
+	read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
+				    GFP_KERNEL, &mddev->bio_set);
 	read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
 			       + rdev->data_offset);
 	read_bio->bi_private = r10_bio;
 	read_bio->bi_end_io = end_reshape_read;
-	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	r10_bio->master_bio = read_bio;
 	r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
 
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0b5dcaabbc155..66313adf99875 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -735,10 +735,9 @@ static void r5l_submit_current_io(struct r5l_log *log)
 
 static struct bio *r5l_bio_alloc(struct r5l_log *log)
 {
-	struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs);
+	struct bio *bio = bio_alloc_bioset(log->rdev->bdev, BIO_MAX_VECS,
+					   REQ_OP_WRITE, GFP_NOIO, &log->bs);
 
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-	bio_set_dev(bio, log->rdev->bdev);
 	bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
 
 	return bio;
@@ -1634,7 +1633,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 {
 	struct page *page;
 
-	ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs);
+	ctx->ra_bio = bio_alloc_bioset(NULL, BIO_MAX_VECS, 0, GFP_KERNEL,
+				       &log->bs);
 	if (!ctx->ra_bio)
 		return -ENOMEM;
 
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 4ab417915d7f1..054d3bb252d48 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -496,11 +496,10 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 		if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
 			struct bio *prev = bio;
 
-			bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS,
+			bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS,
+					       prev->bi_opf, GFP_NOIO,
 					       &ppl_conf->bs);
-			bio->bi_opf = prev->bi_opf;
 			bio->bi_write_hint = prev->bi_write_hint;
-			bio_copy_dev(bio, prev);
 			bio->bi_iter.bi_sector = bio_end_sector(prev);
 			bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
 
@@ -637,10 +636,10 @@ static void ppl_do_flush(struct ppl_io_unit *io)
 			struct bio *bio;
 			char b[BDEVNAME_SIZE];
 
-			bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs);
-			bio_set_dev(bio, bdev);
+			bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+					       REQ_OP_WRITE | REQ_PREFLUSH,
+					       &ppl_conf->flush_bs);
 			bio->bi_private = io;
-			bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 			bio->bi_end_io = ppl_flush_endio;
 
 			pr_debug("%s: dev: %s\n", __func__,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 6045678365a59..3c92ba3748192 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -352,18 +352,16 @@ static struct bio *iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num,
 	 * Only allocate as many vector entries as the bio code allows us to,
 	 * we'll loop later on until we have handled the whole request.
 	 */
-	bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num),
-				&ib_dev->ibd_bio_set);
+	bio = bio_alloc_bioset(ib_dev->ibd_bd, bio_max_segs(sg_num), opf,
+			       GFP_NOIO, &ib_dev->ibd_bio_set);
 	if (!bio) {
 		pr_err("Unable to allocate memory for bio\n");
 		return NULL;
 	}
 
-	bio_set_dev(bio, ib_dev->ibd_bd);
 	bio->bi_private = cmd;
 	bio->bi_end_io = &iblock_bio_done;
 	bio->bi_iter.bi_sector = lba;
-	bio->bi_opf = opf;
 
 	return bio;
 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 409bad3928db3..421d921a05716 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3143,7 +3143,7 @@ struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
 	struct bio *bio;
 
 	ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
-	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
+	bio = bio_alloc_bioset(NULL, nr_iovecs, 0, GFP_NOFS, &btrfs_bioset);
 	btrfs_bio_init(btrfs_bio(bio));
 	return bio;
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c417864c66ae..e71dde8de0db0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -394,7 +394,7 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 	struct f2fs_sb_info *sbi = fio->sbi;
 	struct bio *bio;
 
-	bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset);
+	bio = bio_alloc_bioset(NULL, npages, 0, GFP_NOIO, &f2fs_bioset);
 
 	f2fs_target_device(sbi, fio->new_blkaddr, bio);
 	if (is_read_io(fio->op)) {
@@ -985,8 +985,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 	struct bio_post_read_ctx *ctx = NULL;
 	unsigned int post_read_steps = 0;
 
-	bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
-			       bio_max_segs(nr_pages), &f2fs_bioset);
+	bio = bio_alloc_bioset(NULL, bio_max_segs(nr_pages), REQ_OP_READ,
+			       for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
@@ -994,7 +994,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 
 	f2fs_target_device(sbi, blkaddr, bio);
 	bio->bi_end_io = f2fs_read_end_io;
-	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
 
 	if (fscrypt_inode_uses_fs_layer_crypto(inode))
 		post_read_steps |= STEP_DECRYPT;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c938bbad075e1..340d373cb1bf9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1196,10 +1196,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
 	struct iomap_ioend *ioend;
 	struct bio *bio;
 
-	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
-	bio_set_dev(bio, wpc->iomap.bdev);
+	bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
+			       REQ_OP_WRITE | wbc_to_write_flags(wbc),
+			       GFP_NOFS, &iomap_ioend_bioset);
 	bio->bi_iter.bi_sector = sector;
-	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 	bio->bi_write_hint = inode->i_write_hint;
 	wbc_init_bio(wbc, bio);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index edeae54074ede..2f63ae9a71e1a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -405,8 +405,9 @@ extern void bioset_exit(struct bio_set *);
 extern int biovec_init_pool(mempool_t *pool, int pool_entries);
 extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
 
-struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs,
-		struct bio_set *bs);
+struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
+			     unsigned int opf, gfp_t gfp_mask,
+			     struct bio_set *bs);
 struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
 		struct bio_set *bs);
 struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
@@ -419,7 +420,7 @@ extern struct bio_set fs_bio_set;
 
 static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
 {
-	return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
+	return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set);
 }
 
 void submit_bio(struct bio *bio);
-- 
GitLab


From b77c88c2100ce6a5ec8126c13599b5a7f6663e32 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:04 +0100
Subject: [PATCH 0196/1586] block: pass a block_device and opf to
 bio_alloc_kiocb

Pass the block_device and operation that we plan to use this bio for to
bio_alloc_kiocb to optimize the assigment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 12 ++++++++----
 block/fops.c        | 17 ++++++++---------
 include/linux/bio.h |  4 ++--
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 9afc0c2aca6e4..6c3efb0fd12b1 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1731,7 +1731,9 @@ EXPORT_SYMBOL(bioset_init_from_src);
 /**
  * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
  * @kiocb:	kiocb describing the IO
+ * @bdev:	block device to allocate the bio for (can be %NULL)
  * @nr_vecs:	number of iovecs to pre-allocate
+ * @opf:	operation and flags for bio
  * @bs:		bio_set to allocate from
  *
  * Description:
@@ -1742,14 +1744,14 @@ EXPORT_SYMBOL(bioset_init_from_src);
  *    MUST be done from process context, not hard/soft IRQ.
  *
  */
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
-			    struct bio_set *bs)
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
+		unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
 {
 	struct bio_alloc_cache *cache;
 	struct bio *bio;
 
 	if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
-		return bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs);
+		return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
 
 	cache = per_cpu_ptr(bs->cache, get_cpu());
 	if (cache->free_list) {
@@ -1758,12 +1760,14 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
 		cache->nr--;
 		put_cpu();
 		bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
+		bio_set_dev(bio, bdev);
+		bio->bi_opf = opf;
 		bio->bi_pool = bs;
 		bio_set_flag(bio, BIO_PERCPU_CACHE);
 		return bio;
 	}
 	put_cpu();
-	bio = bio_alloc_bioset(NULL, nr_vecs, 0, GFP_KERNEL, bs);
+	bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
 	bio_set_flag(bio, BIO_PERCPU_CACHE);
 	return bio;
 }
diff --git a/block/fops.c b/block/fops.c
index 26bf15c770d21..3a62b8b912750 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -190,6 +190,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	loff_t pos = iocb->ki_pos;
 	int ret = 0;
 
@@ -197,7 +198,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	atomic_set(&dio->ref, 1);
@@ -223,7 +224,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	blk_start_plug(&plug);
 
 	for (;;) {
-		bio_set_dev(bio, bdev);
 		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
 		bio->bi_write_hint = iocb->ki_hint;
 		bio->bi_private = dio;
@@ -238,11 +238,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		}
 
 		if (is_read) {
-			bio->bi_opf = REQ_OP_READ;
 			if (dio->flags & DIO_SHOULD_DIRTY)
 				bio_set_pages_dirty(bio);
 		} else {
-			bio->bi_opf = dio_bio_write_op(iocb);
 			task_io_account_write(bio->bi_iter.bi_size);
 		}
 		if (iocb->ki_flags & IOCB_NOWAIT)
@@ -259,6 +257,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		atomic_inc(&dio->ref);
 		submit_bio(bio);
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
+		bio_set_dev(bio, bdev);
+		bio->bi_opf = opf;
 	}
 
 	blk_finish_plug(&plug);
@@ -311,6 +311,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 					unsigned int nr_pages)
 {
 	struct block_device *bdev = iocb->ki_filp->private_data;
+	bool is_read = iov_iter_rw(iter) == READ;
+	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	loff_t pos = iocb->ki_pos;
@@ -320,11 +322,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->flags = 0;
 	dio->iocb = iocb;
-	bio_set_dev(bio, bdev);
 	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
 	bio->bi_write_hint = iocb->ki_hint;
 	bio->bi_end_io = blkdev_bio_end_io_async;
@@ -347,14 +348,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	}
 	dio->size = bio->bi_iter.bi_size;
 
-	if (iov_iter_rw(iter) == READ) {
-		bio->bi_opf = REQ_OP_READ;
+	if (is_read) {
 		if (iter_is_iovec(iter)) {
 			dio->flags |= DIO_SHOULD_DIRTY;
 			bio_set_pages_dirty(bio);
 		}
 	} else {
-		bio->bi_opf = dio_bio_write_op(iocb);
 		task_io_account_write(bio->bi_iter.bi_size);
 	}
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2f63ae9a71e1a..5c5ada2ebb270 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -408,8 +408,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
 			     unsigned int opf, gfp_t gfp_mask,
 			     struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
-		struct bio_set *bs);
+struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
+		unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
 struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
 extern void bio_put(struct bio *);
 
-- 
GitLab


From 07888c665b405b1cd3577ddebfeb74f4717a84c4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:05 +0100
Subject: [PATCH 0197/1586] block: pass a block_device and opf to bio_alloc

Pass the block_device and operation that we plan to use this bio for to
bio_alloc to optimize the assignment.  NULL/0 can be passed, both for the
passthrough case on a raw request_queue and to temporarily avoid
refactoring some nasty code.

Also move the gfp_mask argument after the nr_vecs argument for a much
more logical calling convention matching what most of the kernel does.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                         |  5 +----
 block/fops.c                        |  4 +---
 drivers/block/drbd/drbd_receiver.c  | 10 ++++------
 drivers/block/rnbd/rnbd-srv.c       |  5 ++---
 drivers/block/xen-blkback/blkback.c | 11 +++++------
 drivers/block/zram/zram_drv.c       | 11 ++++-------
 drivers/md/dm-log-writes.c          | 21 ++++++++-------------
 drivers/md/dm-thin.c                |  9 ++++-----
 drivers/md/dm-zoned-metadata.c      | 15 ++++++---------
 drivers/nvdimm/nd_virtio.c          |  6 +++---
 drivers/nvme/target/io-cmd-bdev.c   | 12 ++++++------
 drivers/nvme/target/passthru.c      |  5 +++--
 drivers/nvme/target/zns.c           |  6 +++---
 drivers/scsi/ufs/ufshpb.c           |  4 ++--
 drivers/target/target_core_iblock.c |  5 ++---
 fs/btrfs/disk-io.c                  |  6 +++---
 fs/buffer.c                         | 14 ++++++--------
 fs/crypto/bio.c                     | 13 +++++++------
 fs/direct-io.c                      |  5 +----
 fs/erofs/zdata.c                    |  5 ++---
 fs/ext4/page-io.c                   |  3 +--
 fs/ext4/readpage.c                  |  8 ++++----
 fs/gfs2/lops.c                      |  8 +++-----
 fs/gfs2/meta_io.c                   |  4 +---
 fs/gfs2/ops_fstype.c                |  4 +---
 fs/hfsplus/wrapper.c                |  4 +---
 fs/iomap/buffered-io.c              | 16 ++++++++--------
 fs/iomap/direct-io.c                |  8 ++------
 fs/jfs/jfs_logmgr.c                 | 11 ++---------
 fs/jfs/jfs_metapage.c               |  9 +++------
 fs/mpage.c                          |  7 +++----
 fs/nfs/blocklayout/blocklayout.c    |  4 +---
 fs/nilfs2/segbuf.c                  |  4 ++--
 fs/ntfs3/fsntfs.c                   |  8 ++------
 fs/ocfs2/cluster/heartbeat.c        |  4 +---
 fs/squashfs/block.c                 | 11 ++++++-----
 fs/xfs/xfs_bio_io.c                 | 10 ++++------
 fs/xfs/xfs_buf.c                    |  4 +---
 fs/zonefs/super.c                   |  5 ++---
 include/linux/bio.h                 |  5 +++--
 kernel/power/swap.c                 |  5 ++---
 mm/page_io.c                        | 10 ++++------
 42 files changed, 130 insertions(+), 194 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 6c3efb0fd12b1..b73c9babd5835 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -347,10 +347,7 @@ EXPORT_SYMBOL(bio_chain);
 struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
 		unsigned int nr_pages, unsigned int opf, gfp_t gfp)
 {
-	struct bio *new = bio_alloc(gfp, nr_pages);
-
-	bio_set_dev(new, bdev);
-	new->bi_opf = opf;
+	struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
 
 	if (bio) {
 		bio_chain(bio, new);
diff --git a/block/fops.c b/block/fops.c
index 3a62b8b912750..c683596847731 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -256,9 +256,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		}
 		atomic_inc(&dio->ref);
 		submit_bio(bio);
-		bio = bio_alloc(GFP_KERNEL, nr_pages);
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = opf;
+		bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
 	}
 
 	blk_finish_plug(&plug);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fb59b263deeef..04e3ec12d8b49 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1279,7 +1279,8 @@ static void one_flush_endio(struct bio *bio)
 
 static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
 {
-	struct bio *bio = bio_alloc(GFP_NOIO, 0);
+	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
+				    REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
 
 	if (!octx) {
@@ -1297,10 +1298,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
 
 	octx->device = device;
 	octx->ctx = ctx;
-	bio_set_dev(bio, device->ldev->backing_bdev);
 	bio->bi_private = octx;
 	bio->bi_end_io = one_flush_endio;
-	bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
 
 	device->flush_jif = jiffies;
 	set_bit(FLUSH_PENDING, &device->flags);
@@ -1685,11 +1684,10 @@ int drbd_submit_peer_request(struct drbd_device *device,
 	 * generated bio, but a bio allocated on behalf of the peer.
 	 */
 next_bio:
-	bio = bio_alloc(GFP_NOIO, nr_pages);
+	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags,
+			GFP_NOIO);
 	/* > peer_req->i.sector, unless this is the first bio */
 	bio->bi_iter.bi_sector = sector;
-	bio_set_dev(bio, device->ldev->backing_bdev);
-	bio_set_op_attrs(bio, op, op_flags);
 	bio->bi_private = peer_req;
 	bio->bi_end_io = drbd_peer_request_endio;
 
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index ff9b389976078..132e950685d59 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -149,7 +149,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 	priv->sess_dev = sess_dev;
 	priv->id = id;
 
-	bio = bio_alloc(GFP_KERNEL, 1);
+	bio = bio_alloc(sess_dev->rnbd_dev->bdev, 1,
+			rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
 	if (bio_add_page(bio, virt_to_page(data), datalen,
 			offset_in_page(data)) != datalen) {
 		rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
@@ -159,13 +160,11 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
 
 	bio->bi_end_io = rnbd_dev_bi_end_io;
 	bio->bi_private = priv;
-	bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
 	bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
 	bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
 	prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR ||
 	       usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio);
 	bio_set_prio(bio, prio);
-	bio_set_dev(bio, sess_dev->rnbd_dev->bdev);
 
 	submit_bio(bio);
 
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6bb2ad7692065..d1e26461a64ed 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1326,13 +1326,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
-			bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
+			bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i),
+					operation | operation_flags,
+					GFP_KERNEL);
 			biolist[nbio++] = bio;
-			bio_set_dev(bio, preq.bdev);
 			bio->bi_private = pending_req;
 			bio->bi_end_io  = end_block_io_op;
 			bio->bi_iter.bi_sector  = preq.sector_number;
-			bio_set_op_attrs(bio, operation, operation_flags);
 		}
 
 		preq.sector_number += seg[i].nsec;
@@ -1342,12 +1342,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	if (!bio) {
 		BUG_ON(operation_flags != REQ_PREFLUSH);
 
-		bio = bio_alloc(GFP_KERNEL, 0);
+		bio = bio_alloc(preq.bdev, 0, operation | operation_flags,
+				GFP_KERNEL);
 		biolist[nbio++] = bio;
-		bio_set_dev(bio, preq.bdev);
 		bio->bi_private = pending_req;
 		bio->bi_end_io  = end_block_io_op;
-		bio_set_op_attrs(bio, operation, operation_flags);
 	}
 
 	atomic_set(&pending_req->pendcnt, nbio);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 342dbcb3f2208..f3fe0ea8aa80f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -616,24 +616,21 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 {
 	struct bio *bio;
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
+			GFP_NOIO);
 	if (!bio)
 		return -ENOMEM;
 
 	bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
-	bio_set_dev(bio, zram->bdev);
 	if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 		bio_put(bio);
 		return -EIO;
 	}
 
-	if (!parent) {
-		bio->bi_opf = REQ_OP_READ;
+	if (!parent)
 		bio->bi_end_io = zram_page_end_io;
-	} else {
-		bio->bi_opf = parent->bi_opf;
+	else
 		bio_chain(bio, parent);
-	}
 
 	submit_bio(bio);
 	return 1;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 25f5e8d2d417b..c9d036d6bb2ee 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -217,14 +217,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
 	void *ptr;
 	size_t ret;
 
-	bio = bio_alloc(GFP_KERNEL, 1);
+	bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = sector;
-	bio_set_dev(bio, lc->logdev->bdev);
 	bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
 			  log_end_super : log_end_io;
 	bio->bi_private = lc;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
@@ -271,13 +269,12 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
 
 		atomic_inc(&lc->io_blocks);
 
-		bio = bio_alloc(GFP_KERNEL, bio_pages);
+		bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE,
+				GFP_KERNEL);
 		bio->bi_iter.bi_size = 0;
 		bio->bi_iter.bi_sector = sector;
-		bio_set_dev(bio, lc->logdev->bdev);
 		bio->bi_end_io = log_end_io;
 		bio->bi_private = lc;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		for (i = 0; i < bio_pages; i++) {
 			pg_datalen = min_t(int, datalen, PAGE_SIZE);
@@ -353,13 +350,12 @@ static int log_one_block(struct log_writes_c *lc,
 		goto out;
 
 	atomic_inc(&lc->io_blocks);
-	bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
+	bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt),
+			REQ_OP_WRITE, GFP_KERNEL);
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = sector;
-	bio_set_dev(bio, lc->logdev->bdev);
 	bio->bi_end_io = log_end_io;
 	bio->bi_private = lc;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 	for (i = 0; i < block->vec_cnt; i++) {
 		/*
@@ -371,14 +367,13 @@ static int log_one_block(struct log_writes_c *lc,
 		if (ret != block->vecs[i].bv_len) {
 			atomic_inc(&lc->io_blocks);
 			submit_bio(bio);
-			bio = bio_alloc(GFP_KERNEL,
-					bio_max_segs(block->vec_cnt - i));
+			bio = bio_alloc(lc->logdev->bdev,
+					bio_max_segs(block->vec_cnt - i),
+					REQ_OP_WRITE, GFP_KERNEL);
 			bio->bi_iter.bi_size = 0;
 			bio->bi_iter.bi_sector = sector;
-			bio_set_dev(bio, lc->logdev->bdev);
 			bio->bi_end_io = log_end_io;
 			bio->bi_private = lc;
-			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 			ret = bio_add_page(bio, block->vecs[i].bv_page,
 					   block->vecs[i].bv_len, 0);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 411a3f56ed90c..f4234d615aa1b 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1177,13 +1177,12 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 		return;
 	}
 
-	discard_parent = bio_alloc(GFP_NOIO, 1);
+	discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO);
 	discard_parent->bi_end_io = passdown_endio;
 	discard_parent->bi_private = m;
-
-	if (m->maybe_shared)
-		passdown_double_checking_shared_status(m, discard_parent);
-	else {
+ 	if (m->maybe_shared)
+ 		passdown_double_checking_shared_status(m, discard_parent);
+ 	else {
 		struct discard_op op;
 
 		begin_discard(&op, tc, discard_parent);
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 5718b83cc7182..e5f1eb27ce2e9 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -550,7 +550,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
 	if (!mblk)
 		return ERR_PTR(-ENOMEM);
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO,
+			GFP_NOIO);
 
 	spin_lock(&zmd->mblk_lock);
 
@@ -574,10 +575,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
 
 	/* Submit read BIO */
 	bio->bi_iter.bi_sector = dmz_blk2sect(block);
-	bio_set_dev(bio, dev->bdev);
 	bio->bi_private = mblk;
 	bio->bi_end_io = dmz_mblock_bio_end_io;
-	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
 	bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
 	submit_bio(bio);
 
@@ -721,15 +720,14 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
 	if (dmz_bdev_is_dying(dev))
 		return -EIO;
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO,
+			GFP_NOIO);
 
 	set_bit(DMZ_META_WRITING, &mblk->state);
 
 	bio->bi_iter.bi_sector = dmz_blk2sect(block);
-	bio_set_dev(bio, dev->bdev);
 	bio->bi_private = mblk;
 	bio->bi_end_io = dmz_mblock_bio_end_io;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
 	bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
 	submit_bio(bio);
 
@@ -751,10 +749,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
 	if (dmz_bdev_is_dying(dev))
 		return -EIO;
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO,
+			GFP_NOIO);
 	bio->bi_iter.bi_sector = dmz_blk2sect(block);
-	bio_set_dev(bio, dev->bdev);
-	bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
 	bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
 	ret = submit_bio_wait(bio);
 	bio_put(bio);
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fac..c6a648fd8744a 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -105,12 +105,12 @@ int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
 	 * parent bio. Otherwise directly call nd_region flush.
 	 */
 	if (bio && bio->bi_iter.bi_sector != -1) {
-		struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+		struct bio *child = bio_alloc(bio->bi_bdev, 0, REQ_PREFLUSH,
+					      GFP_ATOMIC);
 
 		if (!child)
 			return -ENOMEM;
-		bio_copy_dev(child, bio);
-		child->bi_opf = REQ_PREFLUSH;
+		bio_clone_blkg_association(child, bio);
 		child->bi_iter.bi_sector = -1;
 		bio_chain(child, bio);
 		submit_bio(child);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 70ca9dfc1771a..e092af3abc710 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -268,14 +268,15 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->b.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+		bio_set_dev(bio, req->ns->bdev);
+		bio->bi_opf = op;
 	} else {
-		bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
+		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op,
+				GFP_KERNEL);
 	}
-	bio_set_dev(bio, req->ns->bdev);
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_private = req;
 	bio->bi_end_io = nvmet_bio_done;
-	bio->bi_opf = op;
 
 	blk_start_plug(&plug);
 	if (req->metadata_len)
@@ -296,10 +297,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 				}
 			}
 
-			bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
-			bio_set_dev(bio, req->ns->bdev);
+			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
+					op, GFP_KERNEL);
 			bio->bi_iter.bi_sector = sector;
-			bio->bi_opf = op;
 
 			bio_chain(bio, prev);
 			submit_bio(prev);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 9e5b89ae29dfe..38f72968c3fde 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -207,11 +207,12 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->p.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+		bio->bi_opf = req_op(rq);
 	} else {
-		bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
+		bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq),
+				GFP_KERNEL);
 		bio->bi_end_io = bio_put;
 	}
-	bio->bi_opf = req_op(rq);
 
 	for_each_sg(req->sg, sg, req->sg_cnt, i) {
 		if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 247de74247fab..62c53e8f26d35 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -522,6 +522,7 @@ static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
 void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
 {
 	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
+	const unsigned int op = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
 	u16 status = NVME_SC_SUCCESS;
 	unsigned int total_len = 0;
 	struct scatterlist *sg;
@@ -552,13 +553,12 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->z.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+		bio->bi_opf = op;
 	} else {
-		bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
+		bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL);
 	}
 
-	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
 	bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
-	bio_set_dev(bio, req->ns->bdev);
 	bio->bi_iter.bi_sector = sect;
 	bio->bi_private = req;
 	if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 2d36a0715fca6..8970068314ef2 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -494,7 +494,7 @@ static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb,
 	if (!map_req)
 		return NULL;
 
-	bio = bio_alloc(GFP_KERNEL, hpb->pages_per_srgn);
+	bio = bio_alloc(NULL, hpb->pages_per_srgn, 0, GFP_KERNEL);
 	if (!bio) {
 		ufshpb_put_req(hpb, map_req);
 		return NULL;
@@ -2050,7 +2050,7 @@ static int ufshpb_pre_req_mempool_init(struct ufshpb_lu *hpb)
 		INIT_LIST_HEAD(&pre_req->list_req);
 		pre_req->req = NULL;
 
-		pre_req->bio = bio_alloc(GFP_KERNEL, 1);
+		pre_req->bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
 		if (!pre_req->bio)
 			goto release_mem;
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 3c92ba3748192..87ede165ddba4 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -415,10 +415,9 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
 	if (immed)
 		target_complete_cmd(cmd, SAM_STAT_GOOD);
 
-	bio = bio_alloc(GFP_KERNEL, 0);
+	bio = bio_alloc(ib_dev->ibd_bd, 0, REQ_OP_WRITE | REQ_PREFLUSH,
+			GFP_KERNEL);
 	bio->bi_end_io = iblock_end_io_flush;
-	bio_set_dev(bio, ib_dev->ibd_bd);
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 	if (!immed)
 		bio->bi_private = cmd;
 	submit_bio(bio);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 87a5addbedf6d..f45aa506f9a6f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4029,8 +4029,9 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * to do I/O, so we don't lose the ability to do integrity
 		 * checking.
 		 */
-		bio = bio_alloc(GFP_NOFS, 1);
-		bio_set_dev(bio, device->bdev);
+		bio = bio_alloc(device->bdev, 1,
+				REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO,
+				GFP_NOFS);
 		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
 		bio->bi_private = device;
 		bio->bi_end_io = btrfs_end_super_write;
@@ -4042,7 +4043,6 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * go down lazy and there's a short window where the on-disk
 		 * copies might still contain the older version.
 		 */
-		bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
 		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
 			bio->bi_opf |= REQ_FUA;
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 8e112b6bd3719..a17c386a142c7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3024,12 +3024,16 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
 		clear_buffer_write_io_error(bh);
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	if (buffer_meta(bh))
+		op_flags |= REQ_META;
+	if (buffer_prio(bh))
+		op_flags |= REQ_PRIO;
+
+	bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO);
 
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_write_hint = write_hint;
 
 	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -3038,12 +3042,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	bio->bi_end_io = end_bio_bh_io_sync;
 	bio->bi_private = bh;
 
-	if (buffer_meta(bh))
-		op_flags |= REQ_META;
-	if (buffer_prio(bh))
-		op_flags |= REQ_PRIO;
-	bio_set_op_attrs(bio, op, op_flags);
-
 	/* Take care of bh's that straddle the end of the device */
 	guard_bio_eod(bio);
 
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index bfc2a5b74ed39..755e985a42e0b 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -54,7 +54,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 	int num_pages = 0;
 
 	/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
-	bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
+	bio = bio_alloc(inode->i_sb->s_bdev, BIO_MAX_VECS, REQ_OP_WRITE,
+			GFP_NOFS);
 
 	while (len) {
 		unsigned int blocks_this_page = min(len, blocks_per_page);
@@ -62,10 +63,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 
 		if (num_pages == 0) {
 			fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS);
-			bio_set_dev(bio, inode->i_sb->s_bdev);
 			bio->bi_iter.bi_sector =
 					pblk << (blockbits - SECTOR_SHIFT);
-			bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 		}
 		ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0);
 		if (WARN_ON(ret != bytes_this_page)) {
@@ -82,6 +81,8 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 			if (err)
 				goto out;
 			bio_reset(bio);
+			bio_set_dev(bio, inode->i_sb->s_bdev);
+			bio->bi_opf = REQ_OP_WRITE;
 			num_pages = 0;
 		}
 	}
@@ -150,12 +151,10 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 		return -EINVAL;
 
 	/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
-	bio = bio_alloc(GFP_NOFS, nr_pages);
+	bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS);
 
 	do {
-		bio_set_dev(bio, inode->i_sb->s_bdev);
 		bio->bi_iter.bi_sector = pblk << (blockbits - 9);
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		i = 0;
 		offset = 0;
@@ -183,6 +182,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 		if (err)
 			goto out;
 		bio_reset(bio);
+		bio_set_dev(bio, inode->i_sb->s_bdev);
+		bio->bi_opf = REQ_OP_WRITE;
 	} while (len != 0);
 	err = 0;
 out:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6544435580470..38bca4980a1ca 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -396,11 +396,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 	 * bio_alloc() is guaranteed to return a bio when allowed to sleep and
 	 * we request a valid number of vectors.
 	 */
-	bio = bio_alloc(GFP_KERNEL, nr_vecs);
-
-	bio_set_dev(bio, bdev);
+	bio = bio_alloc(bdev, nr_vecs, dio->op | dio->op_flags, GFP_KERNEL);
 	bio->bi_iter.bi_sector = first_sector;
-	bio_set_op_attrs(bio, dio->op, dio->op_flags);
 	if (dio->is_async)
 		bio->bi_end_io = dio_bio_end_aio;
 	else
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 498b7666efe85..db7de2dbac739 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1371,15 +1371,14 @@ submit_bio_retry:
 			}
 
 			if (!bio) {
-				bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+				bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
+						REQ_OP_READ, GFP_NOIO);
 				bio->bi_end_io = z_erofs_decompressqueue_endio;
 
-				bio_set_dev(bio, mdev.m_bdev);
 				last_bdev = mdev.m_bdev;
 				bio->bi_iter.bi_sector = (sector_t)cur <<
 					LOG_SECTORS_PER_BLOCK;
 				bio->bi_private = bi_private;
-				bio->bi_opf = REQ_OP_READ;
 				if (f->readahead)
 					bio->bi_opf |= REQ_RAHEAD;
 				++nr_bios;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1d370364230e8..1253982268730 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -398,10 +398,9 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
 	 * bio_alloc will _always_ be able to allocate a bio if
 	 * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
 	 */
-	bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+	bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
 	io->io_bio = bio;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 4cd62f1d848c8..1aa26d6634fc9 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -365,15 +365,15 @@ int ext4_mpage_readpages(struct inode *inode,
 			 * bio_alloc will _always_ be able to allocate a bio if
 			 * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
 			 */
-			bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages));
+			bio = bio_alloc(bdev, bio_max_segs(nr_pages),
+					REQ_OP_READ, GFP_KERNEL);
 			fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
 						  GFP_KERNEL);
 			ext4_set_bio_post_read_ctx(bio, inode, page->index);
-			bio_set_dev(bio, bdev);
 			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 			bio->bi_end_io = mpage_end_io;
-			bio_set_op_attrs(bio, REQ_OP_READ,
-						rac ? REQ_RAHEAD : 0);
+			if (rac)
+				bio->bi_opf |= REQ_RAHEAD;
 		}
 
 		length = first_hole << blkbits;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ca0bb3a73912a..4ae1eefae616d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -265,10 +265,9 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
 				      bio_end_io_t *end_io)
 {
 	struct super_block *sb = sdp->sd_vfs;
-	struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+	struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
 
 	bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
-	bio_set_dev(bio, sb->s_bdev);
 	bio->bi_end_io = end_io;
 	bio->bi_private = sdp;
 
@@ -489,10 +488,9 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
 {
 	struct bio *new;
 
-	new = bio_alloc(GFP_NOIO, nr_iovecs);
-	bio_copy_dev(new, prev);
+	new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
+	bio_clone_blkg_association(new, prev);
 	new->bi_iter.bi_sector = bio_end_sector(prev);
-	new->bi_opf = prev->bi_opf;
 	new->bi_write_hint = prev->bi_write_hint;
 	bio_chain(new, prev);
 	submit_bio(prev);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 72d30a682ecec..a580b90b75222 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -222,9 +222,8 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
 		struct buffer_head *bh = *bhs;
 		struct bio *bio;
 
-		bio = bio_alloc(GFP_NOIO, num);
+		bio = bio_alloc(bh->b_bdev, num, op | op_flags, GFP_NOIO);
 		bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-		bio_set_dev(bio, bh->b_bdev);
 		while (num > 0) {
 			bh = *bhs;
 			if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
@@ -235,7 +234,6 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
 			num--;
 		}
 		bio->bi_end_io = gfs2_meta_read_endio;
-		bio_set_op_attrs(bio, op, op_flags);
 		submit_bio(bio);
 	}
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7f8410d8fdc1d..c9b423c874a32 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -251,14 +251,12 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 	ClearPageDirty(page);
 	lock_page(page);
 
-	bio = bio_alloc(GFP_NOFS, 1);
+	bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS);
 	bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
-	bio_set_dev(bio, sb->s_bdev);
 	bio_add_page(bio, page, PAGE_SIZE, 0);
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
 	submit_bio(bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 4688cc7b36926..0b8ad6586df53 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -63,10 +63,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
 	offset = start & (io_size - 1);
 	sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
 
-	bio = bio_alloc(GFP_NOIO, 1);
+	bio = bio_alloc(sb->s_bdev, 1, op | op_flags, GFP_NOIO);
 	bio->bi_iter.bi_sector = sector;
-	bio_set_dev(bio, sb->s_bdev);
-	bio_set_op_attrs(bio, op, op_flags);
 
 	if (op != WRITE && data)
 		*data = (u8 *)buf + offset;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 340d373cb1bf9..70f3657a6ec06 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -290,19 +290,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 
 		if (ctx->rac) /* same as readahead_gfp_mask */
 			gfp |= __GFP_NORETRY | __GFP_NOWARN;
-		ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
+		ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs),
+				     REQ_OP_READ, gfp);
 		/*
 		 * If the bio_alloc fails, try it again for a single page to
 		 * avoid having to deal with partial page reads.  This emulates
 		 * what do_mpage_readpage does.
 		 */
-		if (!ctx->bio)
-			ctx->bio = bio_alloc(orig_gfp, 1);
-		ctx->bio->bi_opf = REQ_OP_READ;
+		if (!ctx->bio) {
+			ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ,
+					     orig_gfp);
+		}
 		if (ctx->rac)
 			ctx->bio->bi_opf |= REQ_RAHEAD;
 		ctx->bio->bi_iter.bi_sector = sector;
-		bio_set_dev(ctx->bio, iomap->bdev);
 		ctx->bio->bi_end_io = iomap_read_end_io;
 		bio_add_folio(ctx->bio, folio, plen, poff);
 	}
@@ -1226,10 +1227,9 @@ iomap_chain_bio(struct bio *prev)
 {
 	struct bio *new;
 
-	new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
-	bio_copy_dev(new, prev);/* also copies over blkcg information */
+	new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
+	bio_clone_blkg_association(new, prev);
 	new->bi_iter.bi_sector = bio_end_sector(prev);
-	new->bi_opf = prev->bi_opf;
 	new->bi_write_hint = prev->bi_write_hint;
 
 	bio_chain(prev, new);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 03ea367df19a4..e2ba13645ef28 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -183,15 +183,13 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 	int flags = REQ_SYNC | REQ_IDLE;
 	struct bio *bio;
 
-	bio = bio_alloc(GFP_KERNEL, 1);
-	bio_set_dev(bio, iter->iomap.bdev);
+	bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL);
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
 	get_page(page);
 	__bio_add_page(bio, page, len, 0);
-	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
 	iomap_dio_submit_bio(iter, dio, bio, pos);
 }
 
@@ -309,14 +307,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 			goto out;
 		}
 
-		bio = bio_alloc(GFP_KERNEL, nr_pages);
-		bio_set_dev(bio, iomap->bdev);
+		bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL);
 		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
 		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_ioprio = dio->iocb->ki_ioprio;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
-		bio->bi_opf = bio_opf;
 
 		ret = bio_iov_iter_get_pages(bio, dio->submit.iter);
 		if (unlikely(ret)) {
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 78fd136ac13b9..997c81fcea349 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1980,17 +1980,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
 
 	bp->l_flag |= lbmREAD;
 
-	bio = bio_alloc(GFP_NOFS, 1);
-
+	bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
 	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
-	bio_set_dev(bio, log->bdev);
-
 	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
 	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
-	bio->bi_opf = REQ_OP_READ;
 	/*check if journaling to disk has been disabled*/
 	if (log->no_integrity) {
 		bio->bi_iter.bi_size = 0;
@@ -2125,16 +2121,13 @@ static void lbmStartIO(struct lbuf * bp)
 
 	jfs_info("lbmStartIO");
 
-	bio = bio_alloc(GFP_NOFS, 1);
+	bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
 	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
-	bio_set_dev(bio, log->bdev);
-
 	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
 	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
-	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 
 	/* check if journaling to disk has been disabled */
 	if (log->no_integrity) {
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 104ae698443ed..fde1a9cf902e8 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -417,12 +417,10 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 		}
 		len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
 
-		bio = bio_alloc(GFP_NOFS, 1);
-		bio_set_dev(bio, inode->i_sb->s_bdev);
+		bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOFS);
 		bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
 		bio->bi_end_io = metapage_write_end_io;
 		bio->bi_private = page;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		/* Don't call bio_add_page yet, we may add to this vec */
 		bio_offset = offset;
@@ -497,13 +495,12 @@ static int metapage_readpage(struct file *fp, struct page *page)
 			if (bio)
 				submit_bio(bio);
 
-			bio = bio_alloc(GFP_NOFS, 1);
-			bio_set_dev(bio, inode->i_sb->s_bdev);
+			bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_READ,
+					GFP_NOFS);
 			bio->bi_iter.bi_sector =
 				pblock << (inode->i_blkbits - 9);
 			bio->bi_end_io = metapage_read_end_io;
 			bio->bi_private = page;
-			bio_set_op_attrs(bio, REQ_OP_READ, 0);
 			len = xlen << inode->i_blkbits;
 			offset = block_offset << inode->i_blkbits;
 			if (bio_add_page(bio, page, len, offset) < len)
diff --git a/fs/mpage.c b/fs/mpage.c
index 06e95d777e940..dbfc02e23d97f 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -273,10 +273,10 @@ alloc_new:
 								page))
 				goto out;
 		}
-		args->bio = bio_alloc(gfp, bio_max_segs(args->nr_pages));
+		args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), 0,
+				      gfp);
 		if (args->bio == NULL)
 			goto confused;
-		bio_set_dev(args->bio, bdev);
 		args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 	}
 
@@ -586,8 +586,7 @@ alloc_new:
 								page, wbc))
 				goto out;
 		}
-		bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
-		bio_set_dev(bio, bdev);
+		bio = bio_alloc(bdev, BIO_MAX_VECS, 0, GFP_NOFS);
 		bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 
 		wbc_init_bio(wbc, bio);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 38e063af7e98a..79a8b451791f5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -154,12 +154,10 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
 
 retry:
 	if (!bio) {
-		bio = bio_alloc(GFP_NOIO, bio_max_segs(npg));
+		bio = bio_alloc(map->bdev, bio_max_segs(npg), rw, GFP_NOIO);
 		bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT;
-		bio_set_dev(bio, map->bdev);
 		bio->bi_end_io = end_io;
 		bio->bi_private = par;
-		bio_set_op_attrs(bio, rw, 0);
 	}
 	if (bio_add_page(bio, page, *len, offset) < *len) {
 		bio = bl_submit_bio(bio);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 53b7c6d21cdd8..4f71faacd8253 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -391,8 +391,8 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
 	BUG_ON(wi->nr_vecs <= 0);
  repeat:
 	if (!wi->bio) {
-		wi->bio = bio_alloc(GFP_NOIO, wi->nr_vecs);
-		bio_set_dev(wi->bio, wi->nilfs->ns_bdev);
+		wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, 0,
+				    GFP_NOIO);
 		wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) <<
 			(wi->nilfs->ns_blocksize_bits - 9);
 	}
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 4a255e21ecf5f..0660a07c5a96e 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1485,15 +1485,13 @@ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
 		lbo = ((u64)lcn << cluster_bits) + off;
 		len = ((u64)clen << cluster_bits) - off;
 new_bio:
-		new = bio_alloc(GFP_NOFS, nr_pages - page_idx);
+		new = bio_alloc(bdev, nr_pages - page_idx, op, GFP_NOFS);
 		if (bio) {
 			bio_chain(bio, new);
 			submit_bio(bio);
 		}
 		bio = new;
-		bio_set_dev(bio, bdev);
 		bio->bi_iter.bi_sector = lbo >> 9;
-		bio->bi_opf = op;
 
 		while (len) {
 			off = vbo & (PAGE_SIZE - 1);
@@ -1584,14 +1582,12 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
 		lbo = (u64)lcn << cluster_bits;
 		len = (u64)clen << cluster_bits;
 new_bio:
-		new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
+		new = bio_alloc(bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOFS);
 		if (bio) {
 			bio_chain(bio, new);
 			submit_bio(bio);
 		}
 		bio = new;
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = REQ_OP_WRITE;
 		bio->bi_iter.bi_sector = lbo >> 9;
 
 		for (;;) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a17be1618bf70..ea0e70c0fce09 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -518,7 +518,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 	 * GFP_KERNEL that the local node can get fenced. It would be
 	 * nicest if we could pre-allocate these bios and avoid this
 	 * all together. */
-	bio = bio_alloc(GFP_ATOMIC, 16);
+	bio = bio_alloc(reg->hr_bdev, 16, op | op_flags, GFP_ATOMIC);
 	if (!bio) {
 		mlog(ML_ERROR, "Could not alloc slots BIO!\n");
 		bio = ERR_PTR(-ENOMEM);
@@ -527,10 +527,8 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 
 	/* Must put everything in 512 byte sectors for the bio... */
 	bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
-	bio_set_dev(bio, reg->hr_bdev);
 	bio->bi_private = wc;
 	bio->bi_end_io = o2hb_bio_end_io;
-	bio_set_op_attrs(bio, op, op_flags);
 
 	vec_start = (cs << bits) % PAGE_SIZE;
 	while(cs < max_slots) {
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2db8bcf7ff859..622c844f6d118 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -86,16 +86,17 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
 	int error, i;
 	struct bio *bio;
 
-	if (page_count <= BIO_MAX_VECS)
-		bio = bio_alloc(GFP_NOIO, page_count);
-	else
+	if (page_count <= BIO_MAX_VECS) {
+		bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO);
+	} else {
 		bio = bio_kmalloc(GFP_NOIO, page_count);
+		bio_set_dev(bio, sb->s_bdev);
+		bio->bi_opf = REQ_OP_READ;
+	}
 
 	if (!bio)
 		return -ENOMEM;
 
-	bio_set_dev(bio, sb->s_bdev);
-	bio->bi_opf = READ;
 	bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
 
 	for (i = 0; i < page_count; ++i) {
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index 667e297f59b16..eff4a9f21dcff 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -61,10 +61,9 @@ xfs_rw_bdev(
 	if (is_vmalloc && op == REQ_OP_WRITE)
 		flush_kernel_vmap_range(data, count);
 
-	bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
-	bio_set_dev(bio, bdev);
+	bio = bio_alloc(bdev, bio_max_vecs(left), op | REQ_META | REQ_SYNC,
+			GFP_KERNEL);
 	bio->bi_iter.bi_sector = sector;
-	bio->bi_opf = op | REQ_META | REQ_SYNC;
 
 	do {
 		struct page	*page = kmem_to_page(data);
@@ -74,10 +73,9 @@ xfs_rw_bdev(
 		while (bio_add_page(bio, page, len, off) != len) {
 			struct bio	*prev = bio;
 
-			bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
-			bio_copy_dev(bio, prev);
+			bio = bio_alloc(prev->bi_bdev, bio_max_vecs(left),
+					prev->bi_opf, GFP_KERNEL);
 			bio->bi_iter.bi_sector = bio_end_sector(prev);
-			bio->bi_opf = prev->bi_opf;
 			bio_chain(prev, bio);
 
 			submit_bio(prev);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b45e0d50a4052..ae87fd95b17e2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1440,12 +1440,10 @@ next_chunk:
 	atomic_inc(&bp->b_io_remaining);
 	nr_pages = bio_max_segs(total_nr_pages);
 
-	bio = bio_alloc(GFP_NOIO, nr_pages);
-	bio_set_dev(bio, bp->b_target->bt_bdev);
+	bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_end_io = xfs_buf_bio_end_io;
 	bio->bi_private = bp;
-	bio->bi_opf = op;
 
 	for (; size && nr_pages; nr_pages--, page_index++) {
 		int	rbytes, nbytes = PAGE_SIZE - offset;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index b76dfb310ab65..c0fc2c326dcee 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -692,12 +692,11 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
 	if (!nr_pages)
 		return 0;
 
-	bio = bio_alloc(GFP_NOFS, nr_pages);
-	bio_set_dev(bio, bdev);
+	bio = bio_alloc(bdev, nr_pages,
+			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
 	bio->bi_iter.bi_sector = zi->i_zsector;
 	bio->bi_write_hint = iocb->ki_hint;
 	bio->bi_ioprio = iocb->ki_ioprio;
-	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
 	if (iocb->ki_flags & IOCB_DSYNC)
 		bio->bi_opf |= REQ_FUA;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5c5ada2ebb270..be6ac92913d48 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -418,9 +418,10 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 
 extern struct bio_set fs_bio_set;
 
-static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+static inline struct bio *bio_alloc(struct block_device *bdev,
+		unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
 {
-	return bio_alloc_bioset(NULL, nr_iovecs, 0, gfp_mask, &fs_bio_set);
+	return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set);
 }
 
 void submit_bio(struct bio *bio);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f1bd031295752..6c4f983cbacc5 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -276,10 +276,9 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 	struct bio *bio;
 	int error = 0;
 
-	bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1);
+	bio = bio_alloc(hib_resume_bdev, 1, op | op_flags,
+			GFP_NOIO | __GFP_HIGH);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
-	bio_set_dev(bio, hib_resume_bdev);
-	bio_set_op_attrs(bio, op, op_flags);
 
 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		pr_err("Adding page to bio failed at %llu\n",
diff --git a/mm/page_io.c b/mm/page_io.c
index 0bf8e40f4e573..61c792f916fac 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,10 +338,10 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		return 0;
 	}
 
-	bio = bio_alloc(GFP_NOIO, 1);
-	bio_set_dev(bio, sis->bdev);
+	bio = bio_alloc(sis->bdev, 1,
+			REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
+			GFP_NOIO);
 	bio->bi_iter.bi_sector = swap_page_sector(page);
-	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
 	bio->bi_end_io = end_write_func;
 	bio_add_page(bio, page, thp_size(page), 0);
 
@@ -403,9 +403,7 @@ int swap_readpage(struct page *page, bool synchronous)
 	}
 
 	ret = 0;
-	bio = bio_alloc(GFP_KERNEL, 1);
-	bio_set_dev(bio, sis->bdev);
-	bio->bi_opf = REQ_OP_READ;
+	bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
 	bio->bi_iter.bi_sector = swap_page_sector(page);
 	bio->bi_end_io = end_swap_bio_read;
 	bio_add_page(bio, page, thp_size(page), 0);
-- 
GitLab


From 49add4966d79244013fce35f95c6833fae82b8b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:06 +0100
Subject: [PATCH 0198/1586] block: pass a block_device and opf to bio_init

Pass the block_device that we plan to use this bio for and the
operation to bio_init to optimize the assignment.  A NULL block_device
can be passed, both for the passthrough case on a raw request_queue and
to temporarily avoid refactoring some nasty code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-19-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                       | 27 +++++++++++++--------------
 block/blk-flush.c                 |  4 +---
 block/blk-zoned.c                 |  5 +----
 block/fops.c                      | 18 +++++++++---------
 drivers/block/floppy.c            |  4 +---
 drivers/block/zram/zram_drv.c     |  5 ++---
 drivers/md/bcache/io.c            |  3 ++-
 drivers/md/bcache/journal.c       |  4 +---
 drivers/md/bcache/movinggc.c      |  4 ++--
 drivers/md/bcache/request.c       |  2 +-
 drivers/md/bcache/super.c         |  8 +++-----
 drivers/md/bcache/writeback.c     |  4 ++--
 drivers/md/dm.c                   |  5 ++---
 drivers/md/md-multipath.c         |  2 +-
 drivers/md/md.c                   |  8 +++-----
 drivers/md/raid5-cache.c          |  2 +-
 drivers/md/raid5-ppl.c            |  2 +-
 drivers/md/raid5.c                |  4 ++--
 drivers/nvme/target/io-cmd-bdev.c | 10 ++++------
 drivers/nvme/target/passthru.c    |  4 ++--
 drivers/nvme/target/zns.c         |  4 ++--
 fs/iomap/buffered-io.c            |  4 +---
 fs/xfs/xfs_bio_io.c               |  4 +---
 fs/xfs/xfs_log.c                  | 14 +++++++-------
 fs/zonefs/super.c                 |  4 +---
 include/linux/bio.h               |  4 ++--
 26 files changed, 68 insertions(+), 91 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index b73c9babd5835..b2133d86e885e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -249,12 +249,12 @@ static void bio_free(struct bio *bio)
  * they must remember to pair any call to bio_init() with bio_uninit()
  * when IO has completed, or when the bio is released.
  */
-void bio_init(struct bio *bio, struct bio_vec *table,
-	      unsigned short max_vecs)
+void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
+	      unsigned short max_vecs, unsigned int opf)
 {
 	bio->bi_next = NULL;
-	bio->bi_bdev = NULL;
-	bio->bi_opf = 0;
+	bio->bi_bdev = bdev;
+	bio->bi_opf = opf;
 	bio->bi_flags = 0;
 	bio->bi_ioprio = 0;
 	bio->bi_write_hint = 0;
@@ -268,6 +268,8 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 #ifdef CONFIG_BLK_CGROUP
 	bio->bi_blkg = NULL;
 	bio->bi_issue.value = 0;
+	if (bdev)
+		bio_associate_blkg(bio);
 #ifdef CONFIG_BLK_CGROUP_IOCOST
 	bio->bi_iocost_cost = 0;
 #endif
@@ -504,17 +506,14 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
 		if (unlikely(!bvl))
 			goto err_free;
 
-		bio_init(bio, bvl, nr_vecs);
+		bio_init(bio, bdev, bvl, nr_vecs, opf);
 	} else if (nr_vecs) {
-		bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
+		bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
 	} else {
-		bio_init(bio, NULL, 0);
+		bio_init(bio, bdev, NULL, 0, opf);
 	}
 
 	bio->bi_pool = bs;
-	if (bdev)
-		bio_set_dev(bio, bdev);
-	bio->bi_opf = opf;
 	return bio;
 
 err_free:
@@ -542,7 +541,8 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
 	bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
 	if (unlikely(!bio))
 		return NULL;
-	bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
+	bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
+		 0);
 	bio->bi_pool = NULL;
 	return bio;
 }
@@ -1756,9 +1756,8 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
 		cache->free_list = bio->bi_next;
 		cache->nr--;
 		put_cpu();
-		bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
-		bio_set_dev(bio, bdev);
-		bio->bi_opf = opf;
+		bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
+			 nr_vecs, opf);
 		bio->bi_pool = bs;
 		bio_set_flag(bio, BIO_PERCPU_CACHE);
 		return bio;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e4df894189ced..c689687248706 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -460,9 +460,7 @@ int blkdev_issue_flush(struct block_device *bdev)
 {
 	struct bio bio;
 
-	bio_init(&bio, NULL, 0);
-	bio_set_dev(&bio, bdev);
-	bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+	bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH);
 	return submit_bio_wait(&bio);
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 5ab755d792c81..602bef54c8134 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -238,10 +238,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
 {
 	struct bio bio;
 
-	bio_init(&bio, NULL, 0);
-	bio_set_dev(&bio, bdev);
-	bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
-
+	bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
 	return submit_bio_wait(&bio);
 }
 
diff --git a/block/fops.c b/block/fops.c
index c683596847731..3696665e586a8 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -75,8 +75,13 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 			return -ENOMEM;
 	}
 
-	bio_init(&bio, vecs, nr_pages);
-	bio_set_dev(&bio, bdev);
+	if (iov_iter_rw(iter) == READ) {
+		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
+		if (iter_is_iovec(iter))
+			should_dirty = true;
+	} else {
+		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
+	}
 	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
 	bio.bi_write_hint = iocb->ki_hint;
 	bio.bi_private = current;
@@ -88,14 +93,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		goto out;
 	ret = bio.bi_iter.bi_size;
 
-	if (iov_iter_rw(iter) == READ) {
-		bio.bi_opf = REQ_OP_READ;
-		if (iter_is_iovec(iter))
-			should_dirty = true;
-	} else {
-		bio.bi_opf = dio_bio_write_op(iocb);
+	if (iov_iter_rw(iter) == WRITE)
 		task_io_account_write(ret);
-	}
+
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		bio.bi_opf |= REQ_NOWAIT;
 	if (iocb->ki_flags & IOCB_HIPRI)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e611411a934ce..19c2d0327e157 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4129,15 +4129,13 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
 
 	cbdata.drive = drive;
 
-	bio_init(&bio, &bio_vec, 1);
-	bio_set_dev(&bio, bdev);
+	bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ);
 	bio_add_page(&bio, page, block_size(bdev), 0);
 
 	bio.bi_iter.bi_sector = 0;
 	bio.bi_flags |= (1 << BIO_QUIET);
 	bio.bi_private = &cbdata;
 	bio.bi_end_io = floppy_rb0_cb;
-	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
 
 	init_completion(&cbdata.complete);
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f3fe0ea8aa80f..a3a5e1e713268 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -743,10 +743,9 @@ static ssize_t writeback_store(struct device *dev,
 			continue;
 		}
 
-		bio_init(&bio, &bio_vec, 1);
-		bio_set_dev(&bio, zram->bdev);
+		bio_init(&bio, zram->bdev, &bio_vec, 1,
+			 REQ_OP_WRITE | REQ_SYNC);
 		bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
-		bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
 
 		bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
 				bvec.bv_offset);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9c6f9ec55b724..020712c5203fd 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -26,7 +26,8 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
 	struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
 	struct bio *bio = &b->bio;
 
-	bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
+	bio_init(bio, NULL, bio->bi_inline_vecs,
+		 meta_bucket_pages(&c->cache->sb), 0);
 
 	return bio;
 }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 61bd79babf7ae..6d26c5b06e2b6 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -611,11 +611,9 @@ static void do_journal_discard(struct cache *ca)
 
 		atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
 
-		bio_init(bio, bio->bi_inline_vecs, 1);
-		bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
+		bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD);
 		bio->bi_iter.bi_sector	= bucket_to_sector(ca->set,
 						ca->sb.d[ja->discard_idx]);
-		bio_set_dev(bio, ca->bdev);
 		bio->bi_iter.bi_size	= bucket_bytes(ca);
 		bio->bi_end_io		= journal_discard_endio;
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index b9c3d27ec093a..99499d1f6e666 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -79,8 +79,8 @@ static void moving_init(struct moving_io *io)
 {
 	struct bio *bio = &io->bio.bio;
 
-	bio_init(bio, bio->bi_inline_vecs,
-		 DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS));
+	bio_init(bio, NULL, bio->bi_inline_vecs,
+		 DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
 	bio_get(bio);
 	bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index c4b7e434de8ac..d4b98ebffd948 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s,
 {
 	struct bio *bio = &s->bio.bio;
 
-	bio_init(bio, NULL, 0);
+	bio_init(bio, NULL, NULL, 0, 0);
 	__bio_clone_fast(bio, orig_bio);
 	/*
 	 * bi_end_io can be set separately somewhere else, e.g. the
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c31a62b963f00..bf3de149d3c9f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -342,8 +342,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
 	down(&dc->sb_write_mutex);
 	closure_init(cl, parent);
 
-	bio_init(bio, dc->sb_bv, 1);
-	bio_set_dev(bio, dc->bdev);
+	bio_init(bio, dc->bdev, dc->sb_bv, 1, 0);
 	bio->bi_end_io	= write_bdev_super_endio;
 	bio->bi_private = dc;
 
@@ -386,8 +385,7 @@ void bcache_write_super(struct cache_set *c)
 	if (ca->sb.version < version)
 		ca->sb.version = version;
 
-	bio_init(bio, ca->sb_bv, 1);
-	bio_set_dev(bio, ca->bdev);
+	bio_init(bio, ca->bdev, ca->sb_bv, 1, 0);
 	bio->bi_end_io	= write_super_endio;
 	bio->bi_private = ca;
 
@@ -2239,7 +2237,7 @@ static int cache_alloc(struct cache *ca)
 	__module_get(THIS_MODULE);
 	kobject_init(&ca->kobj, &bch_cache_ktype);
 
-	bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
+	bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
 
 	/*
 	 * when ca->sb.njournal_buckets is not zero, journal exists,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index c7560f66dca88..d42301e6309d4 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -292,8 +292,8 @@ static void dirty_init(struct keybuf_key *w)
 	struct dirty_io *io = w->private;
 	struct bio *bio = &io->bio;
 
-	bio_init(bio, bio->bi_inline_vecs,
-		 DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS));
+	bio_init(bio, NULL, bio->bi_inline_vecs,
+		 DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
 	if (!io->dc->writeback_percent)
 		bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 069e29013b6be..fa596b654c99c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1303,9 +1303,8 @@ static int __send_empty_flush(struct clone_info *ci)
 	 * need to reference it after submit. It's just used as
 	 * the basis for the clone(s).
 	 */
-	bio_init(&flush_bio, NULL, 0);
-	flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
-	bio_set_dev(&flush_bio, ci->io->md->disk->part0);
+	bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
+		 REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
 
 	ci->bio = &flush_bio;
 	ci->sector_count = 0;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index e7d6486f090ff..5e15940634d85 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -121,7 +121,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	bio_init(&mp_bh->bio, NULL, 0);
+	bio_init(&mp_bh->bio, NULL, NULL, 0, 0);
 	__bio_clone_fast(&mp_bh->bio, bio);
 
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 40fc1f7e65c5d..0a89f072dae0d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -998,13 +998,11 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 	struct bio bio;
 	struct bio_vec bvec;
 
-	bio_init(&bio, &bvec, 1);
-
 	if (metadata_op && rdev->meta_bdev)
-		bio_set_dev(&bio, rdev->meta_bdev);
+		bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags);
 	else
-		bio_set_dev(&bio, rdev->bdev);
-	bio.bi_opf = op | op_flags;
+		bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags);
+
 	if (metadata_op)
 		bio.bi_iter.bi_sector = sector + rdev->sb_start;
 	else if (rdev->mddev->reshape_position != MaxSector &&
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 66313adf99875..98b9ca11c28d8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3108,7 +3108,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	INIT_LIST_HEAD(&log->io_end_ios);
 	INIT_LIST_HEAD(&log->flushing_ios);
 	INIT_LIST_HEAD(&log->finished_ios);
-	bio_init(&log->flush_bio, NULL, 0);
+	bio_init(&log->flush_bio, NULL, NULL, 0, 0);
 
 	log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
 	if (!log->io_kc)
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 054d3bb252d48..3446797fa0aca 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -250,7 +250,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	atomic_set(&io->pending_flushes, 0);
-	bio_init(&io->bio, io->biovec, PPL_IO_INLINE_BVECS);
+	bio_init(&io->bio, NULL, io->biovec, PPL_IO_INLINE_BVECS, 0);
 
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ffe720c73b0a5..a9dcc5bc9c329 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2310,8 +2310,8 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		for (i = 0; i < disks; i++) {
 			struct r5dev *dev = &sh->dev[i];
 
-			bio_init(&dev->req, &dev->vec, 1);
-			bio_init(&dev->rreq, &dev->rvec, 1);
+			bio_init(&dev->req, NULL, &dev->vec, 1, 0);
+			bio_init(&dev->rreq, NULL, &dev->rvec, 1, 0);
 		}
 
 		if (raid5_has_ppl(conf)) {
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index e092af3abc710..95c2bbb0b2f5f 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -267,9 +267,8 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->b.inline_bio;
-		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-		bio_set_dev(bio, req->ns->bdev);
-		bio->bi_opf = op;
+		bio_init(bio, req->ns->bdev, req->inline_bvec,
+			 ARRAY_SIZE(req->inline_bvec), op);
 	} else {
 		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op,
 				GFP_KERNEL);
@@ -328,11 +327,10 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 	if (!nvmet_check_transfer_len(req, 0))
 		return;
 
-	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-	bio_set_dev(bio, req->ns->bdev);
+	bio_init(bio, req->ns->bdev, req->inline_bvec,
+		 ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
 	bio->bi_private = req;
 	bio->bi_end_io = nvmet_bio_done;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
 	submit_bio(bio);
 }
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 38f72968c3fde..a810bf569fff8 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -206,8 +206,8 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->p.inline_bio;
-		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-		bio->bi_opf = req_op(rq);
+		bio_init(bio, NULL, req->inline_bvec,
+			 ARRAY_SIZE(req->inline_bvec), req_op(rq));
 	} else {
 		bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq),
 				GFP_KERNEL);
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 62c53e8f26d35..3e421217a7ade 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -552,8 +552,8 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
 
 	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->z.inline_bio;
-		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-		bio->bi_opf = op;
+		bio_init(bio, req->ns->bdev, req->inline_bvec,
+			 ARRAY_SIZE(req->inline_bvec), op);
 	} else {
 		bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL);
 	}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 70f3657a6ec06..491534e908615 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -549,10 +549,8 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
 	struct bio_vec bvec;
 	struct bio bio;
 
-	bio_init(&bio, &bvec, 1);
-	bio.bi_opf = REQ_OP_READ;
+	bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ);
 	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
-	bio_set_dev(&bio, iomap->bdev);
 	bio_add_folio(&bio, folio, plen, poff);
 	return submit_bio_wait(&bio);
 }
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index eff4a9f21dcff..32fa02945f739 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -36,9 +36,7 @@ xfs_flush_bdev_async(
 		return;
 	}
 
-	bio_init(bio, NULL, 0);
-	bio_set_dev(bio, bdev);
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+	bio_init(bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
 	bio->bi_private = done;
 	bio->bi_end_io = xfs_flush_bdev_async_endio;
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 89fec9a18c349..16f9edbda4eb3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1883,19 +1883,19 @@ xlog_write_iclog(
 		return;
 	}
 
-	bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
-	bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
-	iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
-	iclog->ic_bio.bi_end_io = xlog_bio_end_io;
-	iclog->ic_bio.bi_private = iclog;
-
 	/*
 	 * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
 	 * IOs coming immediately after this one. This prevents the block layer
 	 * writeback throttle from throttling log writes behind background
 	 * metadata writeback and causing priority inversions.
 	 */
-	iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
+	bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec,
+		 howmany(count, PAGE_SIZE),
+		 REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE);
+	iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+	iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+	iclog->ic_bio.bi_private = iclog;
+
 	if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
 		iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
 		/*
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index c0fc2c326dcee..d331b52592a0a 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1540,10 +1540,8 @@ static int zonefs_read_super(struct super_block *sb)
 	if (!page)
 		return -ENOMEM;
 
-	bio_init(&bio, &bio_vec, 1);
+	bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
 	bio.bi_iter.bi_sector = 0;
-	bio.bi_opf = REQ_OP_READ;
-	bio_set_dev(&bio, sb->s_bdev);
 	bio_add_page(&bio, page, PAGE_SIZE, 0);
 
 	ret = submit_bio_wait(&bio);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index be6ac92913d48..41bedf727f59c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -456,8 +456,8 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
 struct request_queue;
 
 extern int submit_bio_wait(struct bio *bio);
-extern void bio_init(struct bio *bio, struct bio_vec *table,
-		     unsigned short max_vecs);
+void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
+	      unsigned short max_vecs, unsigned int opf);
 extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
-- 
GitLab


From a7c50c940477bae89fb2b4f51bd969a2d95d7512 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jan 2022 10:11:07 +0100
Subject: [PATCH 0199/1586] block: pass a block_device and opf to bio_reset

Pass the block_device that we plan to use this bio for and the
operation to bio_reset to optimize the assigment.  A NULL block_device
can be passed, both for the passthrough case on a raw request_queue and
to temporarily avoid refactoring some nasty code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220124091107.642561-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                 |  6 +++++-
 drivers/block/pktcdvd.c     |  8 ++------
 drivers/md/bcache/journal.c | 12 ++++--------
 drivers/md/bcache/request.c |  4 ++--
 drivers/md/raid1.c          |  5 ++---
 drivers/md/raid10.c         |  8 +++-----
 drivers/md/raid5-cache.c    |  9 +++------
 drivers/md/raid5.c          |  8 ++++----
 fs/btrfs/disk-io.c          |  4 +---
 fs/crypto/bio.c             |  8 ++------
 include/linux/bio.h         |  9 +--------
 11 files changed, 29 insertions(+), 52 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index b2133d86e885e..03cefe81950f2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -295,6 +295,8 @@ EXPORT_SYMBOL(bio_init);
 /**
  * bio_reset - reinitialize a bio
  * @bio:	bio to reset
+ * @bdev:	block device to use the bio for
+ * @opf:	operation and flags for bio
  *
  * Description:
  *   After calling bio_reset(), @bio will be in the same state as a freshly
@@ -302,11 +304,13 @@ EXPORT_SYMBOL(bio_init);
  *   preserved are the ones that are initialized by bio_alloc_bioset(). See
  *   comment in struct bio.
  */
-void bio_reset(struct bio *bio)
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
 {
 	bio_uninit(bio);
 	memset(bio, 0, BIO_RESET_BYTES);
 	atomic_set(&bio->__bi_remaining, 1);
+	bio->bi_bdev = bdev;
+	bio->bi_opf = opf;
 }
 EXPORT_SYMBOL(bio_reset);
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 2b6b70a39e760..3aa5954429462 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1020,9 +1020,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 			continue;
 
 		bio = pkt->r_bios[f];
-		bio_reset(bio);
+		bio_reset(bio, pd->bdev, REQ_OP_READ);
 		bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
-		bio_set_dev(bio, pd->bdev);
 		bio->bi_end_io = pkt_end_io_read;
 		bio->bi_private = pkt;
 
@@ -1034,7 +1033,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 			BUG();
 
 		atomic_inc(&pkt->io_wait);
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		pkt_queue_bio(pd, bio);
 		frames_read++;
 	}
@@ -1235,9 +1233,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
 	int f;
 
-	bio_reset(pkt->w_bio);
+	bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
 	pkt->w_bio->bi_iter.bi_sector = pkt->sector;
-	bio_set_dev(pkt->w_bio, pd->bdev);
 	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
 	pkt->w_bio->bi_private = pkt;
 
@@ -1270,7 +1267,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 
 	/* Start the write request */
 	atomic_set(&pkt->io_wait, 1);
-	bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6d26c5b06e2b6..7c2ca52ca3e43 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -53,14 +53,12 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
 reread:		left = ca->sb.bucket_size - offset;
 		len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
 
-		bio_reset(bio);
+		bio_reset(bio, ca->bdev, REQ_OP_READ);
 		bio->bi_iter.bi_sector	= bucket + offset;
-		bio_set_dev(bio, ca->bdev);
 		bio->bi_iter.bi_size	= len << 9;
 
 		bio->bi_end_io	= journal_read_endio;
 		bio->bi_private = &cl;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
 		bch_bio_map(bio, data);
 
 		closure_bio_submit(ca->set, bio, &cl);
@@ -771,16 +769,14 @@ static void journal_write_unlocked(struct closure *cl)
 
 		atomic_long_add(sectors, &ca->meta_sectors_written);
 
-		bio_reset(bio);
+		bio_reset(bio, ca->bdev, REQ_OP_WRITE | 
+			  REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA);
+		bch_bio_map(bio, w->data);
 		bio->bi_iter.bi_sector	= PTR_OFFSET(k, i);
-		bio_set_dev(bio, ca->bdev);
 		bio->bi_iter.bi_size = sectors << 9;
 
 		bio->bi_end_io	= journal_write_endio;
 		bio->bi_private = w;
-		bio_set_op_attrs(bio, REQ_OP_WRITE,
-				 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
-		bch_bio_map(bio, w->data);
 
 		trace_bcache_journal_write(bio, w->data->keys);
 		bio_list_add(&list, bio);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index d4b98ebffd948..7ba59d08ed870 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -831,11 +831,11 @@ static void cached_dev_read_done(struct closure *cl)
 	 */
 
 	if (s->iop.bio) {
-		bio_reset(s->iop.bio);
+		bio_reset(s->iop.bio, s->cache_miss->bi_bdev, REQ_OP_READ);
 		s->iop.bio->bi_iter.bi_sector =
 			s->cache_miss->bi_iter.bi_sector;
-		bio_copy_dev(s->iop.bio, s->cache_miss);
 		s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
+		bio_clone_blkg_association(s->iop.bio, s->cache_miss);
 		bch_bio_map(s->iop.bio, NULL);
 
 		bio_copy_data(s->cache_miss, s->iop.bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 43276f8fdc815..e7710fb5befb4 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2166,11 +2166,10 @@ static void process_checks(struct r1bio *r1_bio)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
 		status = b->bi_status;
-		bio_reset(b);
+		bio_reset(b, conf->mirrors[i].rdev->bdev, REQ_OP_READ);
 		b->bi_status = status;
 		b->bi_iter.bi_sector = r1_bio->sector +
 			conf->mirrors[i].rdev->data_offset;
-		bio_set_dev(b, conf->mirrors[i].rdev->bdev);
 		b->bi_end_io = end_sync_read;
 		rp->raid_bio = r1_bio;
 		b->bi_private = rp;
@@ -2651,7 +2650,7 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
 	for (i = conf->poolinfo->raid_disks; i--; ) {
 		bio = r1bio->bios[i];
 		rps = bio->bi_private;
-		bio_reset(bio);
+		bio_reset(bio, NULL, 0);
 		bio->bi_private = rps;
 	}
 	r1bio->master_bio = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index cb7c58050708e..da07bcbc06d08 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2422,7 +2422,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		 * bi_vecs, as the read request might have corrupted these
 		 */
 		rp = get_resync_pages(tbio);
-		bio_reset(tbio);
+		bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
 
 		md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
 
@@ -2430,7 +2430,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tbio->bi_private = rp;
 		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
 		tbio->bi_end_io = end_sync_write;
-		bio_set_op_attrs(tbio, REQ_OP_WRITE, 0);
 
 		bio_copy_data(tbio, fbio);
 
@@ -2441,7 +2440,6 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
 			tbio->bi_opf |= MD_FAILFAST;
 		tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
-		bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
 		submit_bio_noacct(tbio);
 	}
 
@@ -3160,12 +3158,12 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
 	for (i = 0; i < nalloc; i++) {
 		bio = r10bio->devs[i].bio;
 		rp = bio->bi_private;
-		bio_reset(bio);
+		bio_reset(bio, NULL, 0);
 		bio->bi_private = rp;
 		bio = r10bio->devs[i].repl_bio;
 		if (bio) {
 			rp = bio->bi_private;
-			bio_reset(bio);
+			bio_reset(bio, NULL, 0);
 			bio->bi_private = rp;
 		}
 	}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 98b9ca11c28d8..86e2bb89d9c7b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1301,10 +1301,9 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 
 	if (!do_flush)
 		return;
-	bio_reset(&log->flush_bio);
-	bio_set_dev(&log->flush_bio, log->rdev->bdev);
+	bio_reset(&log->flush_bio, log->rdev->bdev,
+		  REQ_OP_WRITE | REQ_PREFLUSH);
 	log->flush_bio.bi_end_io = r5l_log_flush_endio;
-	log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 	submit_bio(&log->flush_bio);
 }
 
@@ -1678,9 +1677,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
 				      struct r5l_recovery_ctx *ctx,
 				      sector_t offset)
 {
-	bio_reset(ctx->ra_bio);
-	bio_set_dev(ctx->ra_bio, log->rdev->bdev);
-	bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0);
+	bio_reset(ctx->ra_bio, log->rdev->bdev, REQ_OP_READ);
 	ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
 
 	ctx->valid_pages = 0;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a9dcc5bc9c329..7c119208a2143 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2677,7 +2677,7 @@ static void raid5_end_read_request(struct bio * bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi);
+		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2785,7 +2785,7 @@ static void raid5_end_read_request(struct bio * bi)
 		}
 	}
 	rdev_dec_pending(rdev, conf->mddev);
-	bio_reset(bi);
+	bio_reset(bi, NULL, 0);
 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
 	raid5_release_stripe(sh);
@@ -2823,7 +2823,7 @@ static void raid5_end_write_request(struct bio *bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi);
+		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2860,7 +2860,7 @@ static void raid5_end_write_request(struct bio *bi)
 	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
-	bio_reset(bi);
+	bio_reset(bi, NULL, 0);
 	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
 		clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f45aa506f9a6f..505ba21230b1f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4154,10 +4154,8 @@ static void write_dev_flush(struct btrfs_device *device)
 		return;
 #endif
 
-	bio_reset(bio);
+	bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
 	bio->bi_end_io = btrfs_end_empty_barrier;
-	bio_set_dev(bio, device->bdev);
-	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 	init_completion(&device->flush_wait);
 	bio->bi_private = &device->flush_wait;
 
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 755e985a42e0b..2217fe5ece6f9 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -80,9 +80,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
 			err = submit_bio_wait(bio);
 			if (err)
 				goto out;
-			bio_reset(bio);
-			bio_set_dev(bio, inode->i_sb->s_bdev);
-			bio->bi_opf = REQ_OP_WRITE;
+			bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
 			num_pages = 0;
 		}
 	}
@@ -181,9 +179,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 		err = submit_bio_wait(bio);
 		if (err)
 			goto out;
-		bio_reset(bio);
-		bio_set_dev(bio, inode->i_sb->s_bdev);
-		bio->bi_opf = REQ_OP_WRITE;
+		bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
 	} while (len != 0);
 	err = 0;
 out:
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 41bedf727f59c..18cfe5bb41ea8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -459,7 +459,7 @@ extern int submit_bio_wait(struct bio *bio);
 void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	      unsigned short max_vecs, unsigned int opf);
 extern void bio_uninit(struct bio *);
-extern void bio_reset(struct bio *);
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf);
 void bio_chain(struct bio *, struct bio *);
 
 int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
@@ -517,13 +517,6 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
 	bio_associate_blkg(bio);
 }
 
-static inline void bio_copy_dev(struct bio *dst, struct bio *src)
-{
-	bio_clear_flag(dst, BIO_REMAPPED);
-	dst->bi_bdev = src->bi_bdev;
-	bio_clone_blkg_association(dst, src);
-}
-
 /*
  * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
-- 
GitLab


From b1f866b013e6e5583f2f0bf4a61d13eddb9a1799 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Jan 2022 08:05:48 +0100
Subject: [PATCH 0200/1586] block: remove blk_needs_flush_plug

blk_needs_flush_plug fails to account for the cb_list, which needs
flushing as well.  Remove it and just check if there is a plug instead
of poking into the internals of the plug structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220127070549.1377856-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c      |  2 +-
 include/linux/blkdev.h | 13 -------------
 kernel/exit.c          |  2 +-
 kernel/sched/core.c    |  2 +-
 4 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f8d7fe6db989e..f4ce38f6fc31c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2301,7 +2301,7 @@ void wakeup_flusher_threads(enum wb_reason reason)
 	/*
 	 * If we are expecting writeback progress we must submit plugged IO.
 	 */
-	if (blk_needs_flush_plug(current))
+	if (current->plug)
 		blk_flush_plug(current->plug, true);
 
 	rcu_read_lock();
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99a4384bb8a56..f902a1c2fac0e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1055,14 +1055,6 @@ extern void blk_finish_plug(struct blk_plug *);
 
 void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
 
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-{
-	struct blk_plug *plug = tsk->plug;
-
-	return plug &&
-		 (plug->mq_list || !list_empty(&plug->cb_list));
-}
-
 int blkdev_issue_flush(struct block_device *bdev);
 long nr_blockdev_pages(void);
 #else /* CONFIG_BLOCK */
@@ -1086,11 +1078,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
 {
 }
 
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-{
-	return false;
-}
-
 static inline int blkdev_issue_flush(struct block_device *bdev)
 {
 	return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab93..11fc6c9df9f28 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -735,7 +735,7 @@ void __noreturn do_exit(long code)
 	struct task_struct *tsk = current;
 	int group_dead;
 
-	WARN_ON(blk_needs_flush_plug(tsk));
+	WARN_ON(tsk->plug);
 
 	/*
 	 * If do_dead is called because this processes oopsed, it's possible
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 848eaa0efe0ea..3487bb92d1f2a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6344,7 +6344,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
 	 * If we are going to sleep and we have plugged IO queued,
 	 * make sure to submit it to avoid deadlocks.
 	 */
-	if (blk_needs_flush_plug(tsk))
+	if (tsk->plug)
 		blk_flush_plug(tsk->plug, true);
 }
 
-- 
GitLab


From aa8dcccaf32bfdc09f2aff089d5d60c37da5b7b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Jan 2022 08:05:49 +0100
Subject: [PATCH 0201/1586] block: check that there is a plug in blk_flush_plug

Rename blk_flush_plug to __blk_flush_plug and add a wrapper that includes
the NULL check instead of open coding that check everywhere.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220127070549.1377856-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 7 +++----
 fs/fs-writeback.c      | 6 ++----
 include/linux/blkdev.h | 7 ++++++-
 kernel/sched/core.c    | 7 ++-----
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d93e3bb9a769b..61f6a0dc4511a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -991,8 +991,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
 		return 0;
 
-	if (current->plug)
-		blk_flush_plug(current->plug, false);
+	blk_flush_plug(current->plug, false);
 
 	if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
 		return 0;
@@ -1274,7 +1273,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
 }
 EXPORT_SYMBOL(blk_check_plugged);
 
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 {
 	if (!list_empty(&plug->cb_list))
 		flush_plug_callbacks(plug, from_schedule);
@@ -1303,7 +1302,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 void blk_finish_plug(struct blk_plug *plug)
 {
 	if (plug == current->plug) {
-		blk_flush_plug(plug, false);
+		__blk_flush_plug(plug, false);
 		current->plug = NULL;
 	}
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f4ce38f6fc31c..33d54c9fbefc0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1903,8 +1903,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 			 * unplug, so get our IOs out the door before we
 			 * give up the CPU.
 			 */
-			if (current->plug)
-				blk_flush_plug(current->plug, false);
+			blk_flush_plug(current->plug, false);
 			cond_resched();
 		}
 
@@ -2301,8 +2300,7 @@ void wakeup_flusher_threads(enum wb_reason reason)
 	/*
 	 * If we are expecting writeback progress we must submit plugged IO.
 	 */
-	if (current->plug)
-		blk_flush_plug(current->plug, true);
+	blk_flush_plug(current->plug, true);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f902a1c2fac0e..654163d3b9036 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1053,7 +1053,12 @@ extern void blk_start_plug(struct blk_plug *);
 extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
 extern void blk_finish_plug(struct blk_plug *);
 
-void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
+void __blk_flush_plug(struct blk_plug *plug, bool from_schedule);
+static inline void blk_flush_plug(struct blk_plug *plug, bool async)
+{
+	if (plug)
+		__blk_flush_plug(plug, async);
+}
 
 int blkdev_issue_flush(struct block_device *bdev);
 long nr_blockdev_pages(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3487bb92d1f2a..46152982e4001 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6344,8 +6344,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
 	 * If we are going to sleep and we have plugged IO queued,
 	 * make sure to submit it to avoid deadlocks.
 	 */
-	if (tsk->plug)
-		blk_flush_plug(tsk->plug, true);
+	blk_flush_plug(tsk->plug, true);
 }
 
 static void sched_update_worker(struct task_struct *tsk)
@@ -8371,9 +8370,7 @@ int io_schedule_prepare(void)
 	int old_iowait = current->in_iowait;
 
 	current->in_iowait = 1;
-	if (current->plug)
-		blk_flush_plug(current->plug, true);
-
+	blk_flush_plug(current->plug, true);
 	return old_iowait;
 }
 
-- 
GitLab


From b42c1fc3d55e077d36718ad9800d89100b2aff81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Jan 2022 07:41:25 +0100
Subject: [PATCH 0202/1586] block: fix the kerneldoc for bio_end_io_acct

Document the actually existing parameter name.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220127064125.1314347-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 654163d3b9036..3bfc75a2a4509 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1520,7 +1520,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
 /**
  * bio_end_io_acct - end I/O accounting for bio based drivers
  * @bio:	bio to end account for
- * @start:	start time returned by bio_start_io_acct()
+ * @start_time:	start time returned by bio_start_io_acct()
  */
 static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
 {
-- 
GitLab


From 3767c902719befe44067717fa893a9ef8cb8747d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Jan 2022 07:42:21 +0100
Subject: [PATCH 0203/1586] MAINTAINERS: add bio.h to the block section

bio.h is part of the block layer, so list it in the MAINTAINERS file
as such.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220127064221.1314477-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f41088418aae2..f88a16ba8b475 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3436,6 +3436,7 @@ F:	Documentation/ABI/stable/sysfs-block
 F:	Documentation/block/
 F:	block/
 F:	drivers/block/
+F:	include/linux/bio.h
 F:	include/linux/blk*
 F:	kernel/trace/blktrace.c
 F:	lib/sbitmap.c
-- 
GitLab


From 455a844d6345639be682977b3f0126be446b702e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 28 Jan 2022 12:34:54 +0800
Subject: [PATCH 0204/1586] block: fix boolreturn.cocci warning

Return statements in functions returning bool should use true/false
instead of 1/0.

./block/bio.c:1081:9-10: WARNING: return of 0/1 in function
'bio_add_folio' with return type bool.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220128043454.68927-1-jiapeng.chong@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index 03cefe81950f2..2e19ca600fcdb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1078,7 +1078,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
 		   size_t off)
 {
 	if (len > UINT_MAX || off > UINT_MAX)
-		return 0;
+		return false;
 	return bio_add_page(bio, &folio->page, len, off) > 0;
 }
 
-- 
GitLab


From 365ab499153cdb2007d54e7e62bcbf2c67f7ab8f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 28 Jan 2022 17:09:22 +0300
Subject: [PATCH 0205/1586] fs/ntfs3: remove unnecessary NULL check

This code triggers a Smatch warning:

    fs/ntfs3/fsntfs.c:1606 ntfs_bio_fill_1()
    warn: variable dereferenced before check 'bio' (see line 1591)

The "bio" pointer cannot be NULL so there is no need to check.
Originally there was more extensive NULL checking but it was removed
because bio_alloc() will never fail if it is allowed to sleep.

Remove this check as well.

Fixes: 39146b6f66ba ("ntfs3: remove ntfs_alloc_bio")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220128140922.GA29766@kili
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/ntfs3/fsntfs.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 0660a07c5a96e..3de5700a9b833 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -1603,11 +1603,10 @@ new_bio:
 		}
 	} while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
 
-	if (bio) {
-		if (!err)
-			err = submit_bio_wait(bio);
-		bio_put(bio);
-	}
+	if (!err)
+		err = submit_bio_wait(bio);
+	bio_put(bio);
+
 	blk_finish_plug(&plug);
 out:
 	unlock_page(fill);
-- 
GitLab


From 7030c428fae100c339436f5cb6f9e7c0574097ad Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 1 Feb 2022 21:05:59 +0100
Subject: [PATCH 0206/1586] spi: Replace acpi_bus_get_device()

Replace acpi_bus_get_device() that is going to be dropped with
acpi_fetch_acpi_dev().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/2231987.ElGaqSPkdT@kreacher
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ec9f2ed579e34..c98c55f44ee6c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2585,10 +2585,10 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
 static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
 				       void *data, void **return_value)
 {
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
 	struct spi_controller *ctlr = data;
-	struct acpi_device *adev;
 
-	if (acpi_bus_get_device(handle, &adev))
+	if (!adev)
 		return AE_OK;
 
 	return acpi_register_spi_device(ctlr, adev);
-- 
GitLab


From 8d37f2710f022837635d9f97db3ac8c853e86979 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 2 Feb 2022 00:45:35 +0100
Subject: [PATCH 0207/1586] spi: mpc512x-psc: Fix compile errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My patch created compilation bugs in the MPC512x-PSC driver.
Fix them up.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Anatolij Gustschin <agust@denx.de>
Cc: linuxppc-dev@lists.ozlabs.org
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 2818824ced4b (" spi: mpc512x-psc: Convert to use GPIO descriptors")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220201234535.569973-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc512x-psc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 8a488d8e4c1be..03630359ce70d 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -127,7 +127,7 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi)
 	out_be32(psc_addr(mps, ccr), ccr);
 	mps->bits_per_word = cs->bits_per_word;
 
-	if (cs->gpiod) {
+	if (spi->cs_gpiod) {
 		if (mps->cs_control)
 			/* boardfile override */
 			mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
@@ -373,7 +373,6 @@ static int mpc512x_psc_spi_unprep_xfer_hw(struct spi_master *master)
 static int mpc512x_psc_spi_setup(struct spi_device *spi)
 {
 	struct mpc512x_psc_spi_cs *cs = spi->controller_state;
-	int ret;
 
 	if (spi->bits_per_word % 8)
 		return -EINVAL;
-- 
GitLab


From fe13889c390e14205e064d7e159e61eb5da4b1c3 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Fri, 28 Jan 2022 19:07:27 +0800
Subject: [PATCH 0208/1586] genirq, softirq: Use in_hardirq() instead of
 in_irq()

Replace the obsolete and ambiguos macro in_irq() with the new macro
in_hardirq().

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220128110727.5110-1-changbin.du@gmail.com
---
 kernel/irq/irqdesc.c |  4 ++--
 kernel/softirq.c     | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2267e6527db3c..6167d32e27da2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -640,7 +640,7 @@ int handle_irq_desc(struct irq_desc *desc)
 		return -EINVAL;
 
 	data = irq_desc_get_irq_data(desc);
-	if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data)))
+	if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data)))
 		return -EPERM;
 
 	generic_handle_irq_desc(desc);
@@ -676,7 +676,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 {
-	WARN_ON_ONCE(!in_irq());
+	WARN_ON_ONCE(!in_hardirq());
 	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 41f470929e991..fac801815554a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -222,7 +222,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 	u32 pending;
 	int curcnt;
 
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	lockdep_assert_irqs_enabled();
 
 	local_irq_save(flags);
@@ -305,7 +305,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
 	unsigned long flags;
 
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 
 	raw_local_irq_save(flags);
 	/*
@@ -352,14 +352,14 @@ static void __local_bh_enable(unsigned int cnt)
  */
 void _local_bh_enable(void)
 {
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
 }
 EXPORT_SYMBOL(_local_bh_enable);
 
 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 {
-	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(in_hardirq());
 	lockdep_assert_irqs_enabled();
 #ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_disable();
@@ -618,7 +618,7 @@ static inline void tick_irq_exit(void)
 
 	/* Make sure that timer wheel updates are propagated */
 	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
-		if (!in_irq())
+		if (!in_hardirq())
 			tick_nohz_irq_exit();
 	}
 #endif
-- 
GitLab


From d2d8e896485a52554cea486816c171dc7240792e Mon Sep 17 00:00:00 2001
From: Konstantin Vyshetsky <vkon@google.com>
Date: Mon, 13 Dec 2021 17:12:03 -0800
Subject: [PATCH 0209/1586] f2fs: move discard parameters into
 discard_cmd_control

This patch unifies parameters related to how often discard is issued and
how many requests go out at the same time by placing them in
discard_cmd_control. The move will allow the parameters to be modified
in the future without relying on hard-coded values.

Signed-off-by: Konstantin Vyshetsky <vkon@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  4 ++++
 fs/f2fs/segment.c | 22 +++++++++++++---------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8178a9152e496..63c90416364bc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -398,6 +398,10 @@ struct discard_cmd_control {
 	struct mutex cmd_lock;
 	unsigned int nr_discards;		/* # of discards in the list */
 	unsigned int max_discards;		/* max. discards to be issued */
+	unsigned int max_discard_request;	/* max. discard request per round */
+	unsigned int min_discard_issue_time;	/* min. interval between discard issue */
+	unsigned int mid_discard_issue_time;	/* mid. interval between discard issue */
+	unsigned int max_discard_issue_time;	/* max. interval between discard issue */
 	unsigned int discard_granularity;	/* discard granularity */
 	unsigned int undiscard_blks;		/* # of undiscard blocks */
 	unsigned int next_pos;			/* next discard position */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 216538b573310..56211e201d51b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1156,14 +1156,14 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 	dpolicy->ordered = false;
 	dpolicy->granularity = granularity;
 
-	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+	dpolicy->max_requests = dcc->max_discard_request;
 	dpolicy->io_aware_gran = MAX_PLIST_NUM;
 	dpolicy->timeout = false;
 
 	if (discard_type == DPOLICY_BG) {
-		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
-		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+		dpolicy->min_interval = dcc->min_discard_issue_time;
+		dpolicy->mid_interval = dcc->mid_discard_issue_time;
+		dpolicy->max_interval = dcc->max_discard_issue_time;
 		dpolicy->io_aware = true;
 		dpolicy->sync = false;
 		dpolicy->ordered = true;
@@ -1171,12 +1171,12 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 			dpolicy->granularity = 1;
 			if (atomic_read(&dcc->discard_cmd_cnt))
 				dpolicy->max_interval =
-					DEF_MIN_DISCARD_ISSUE_TIME;
+					dcc->min_discard_issue_time;
 		}
 	} else if (discard_type == DPOLICY_FORCE) {
-		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
-		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+		dpolicy->min_interval = dcc->min_discard_issue_time;
+		dpolicy->mid_interval = dcc->mid_discard_issue_time;
+		dpolicy->max_interval = dcc->max_discard_issue_time;
 		dpolicy->io_aware = false;
 	} else if (discard_type == DPOLICY_FSTRIM) {
 		dpolicy->io_aware = false;
@@ -1781,7 +1781,7 @@ static int issue_discard_thread(void *data)
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	wait_queue_head_t *q = &dcc->discard_wait_queue;
 	struct discard_policy dpolicy;
-	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+	unsigned int wait_ms = dcc->min_discard_issue_time;
 	int issued;
 
 	set_freezable();
@@ -2180,6 +2180,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	atomic_set(&dcc->discard_cmd_cnt, 0);
 	dcc->nr_discards = 0;
 	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+	dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
+	dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
+	dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
+	dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
 	dcc->undiscard_blks = 0;
 	dcc->next_pos = 0;
 	dcc->root = RB_ROOT_CACHED;
-- 
GitLab


From b2e4a2b300e5e2042e8d92ec16fc124222b7ecc9 Mon Sep 17 00:00:00 2001
From: Konstantin Vyshetsky <vkon@google.com>
Date: Mon, 13 Dec 2021 17:12:43 -0800
Subject: [PATCH 0210/1586] f2fs: expose discard related parameters in sysfs

This patch exposes max_discard_request, min_discard_issue_time,
mid_discard_issue_time, and max_discard_issue_time in sysfs. This will
allow the user to fine tune discard operations.

Signed-off-by: Konstantin Vyshetsky <vkon@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 27 +++++++++++++++++++++++++
 fs/f2fs/sysfs.c                         |  8 ++++++++
 2 files changed, 35 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 2416b03ff2837..87d3884c90ea6 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -98,6 +98,33 @@ Description:	Controls the issue rate of discard commands that consist of small
 		checkpoint is triggered, and issued during the checkpoint.
 		By default, it is disabled with 0.
 
+What:		/sys/fs/f2fs/<disk>/max_discard_request
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the number of discards a thread will issue at a time.
+		Higher number will allow the discard thread to finish its work
+		faster, at the cost of higher latency for incomming I/O.
+
+What:		/sys/fs/f2fs/<disk>/min_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait between
+		issuing discard requests when there are discards to be issued and
+		no I/O aware interruptions occur.
+
+What:		/sys/fs/f2fs/<disk>/mid_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait between
+		issuing discard requests when there are discards to be issued and
+		an I/O aware interruption occurs.
+
+What:		/sys/fs/f2fs/<disk>/max_discard_issue_time
+Date:		December 2021
+Contact:	"Konstantin Vyshetsky" <vkon@google.com>
+Description:	Controls the interval the discard thread will wait when there are
+		no discard operations to be issued.
+
 What:		/sys/fs/f2fs/<disk>/discard_granularity
 Date:		July 2017
 Contact:	"Chao Yu" <yuchao0@huawei.com>
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2bccdaedfb000..281bc0133ee6f 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -716,6 +716,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
@@ -832,6 +836,10 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(reclaim_segments),
 	ATTR_LIST(main_blkaddr),
 	ATTR_LIST(max_small_discards),
+	ATTR_LIST(max_discard_request),
+	ATTR_LIST(min_discard_issue_time),
+	ATTR_LIST(mid_discard_issue_time),
+	ATTR_LIST(max_discard_issue_time),
 	ATTR_LIST(discard_granularity),
 	ATTR_LIST(pending_discard),
 	ATTR_LIST(batched_trim_sections),
-- 
GitLab


From 4e28b22225e3a8b7f1bd3e093301802a2238071a Mon Sep 17 00:00:00 2001
From: David Heidelberg <david@ixit.cz>
Date: Wed, 2 Feb 2022 11:47:15 +0100
Subject: [PATCH 0211/1586] spi: dt-bindings: add mising description type to
 reg property

Added missing description type.

Fixes warning:
Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml: properties:reg:items: 'anyOf' conditional failed

Fixes: a708078eeb99 ("spi: Add Sunplus SP7021 schema")

Signed-off-by: David Heidelberg <david@ixit.cz>
Link: https://lore.kernel.org/r/20220202104715.27839-1-david@ixit.cz
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
index 24382cdda6455..38589fdbc80db 100644
--- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
@@ -20,8 +20,8 @@ properties:
 
   reg:
     items:
-      - the SPI master registers
-      - the SPI slave registers
+      - description: the SPI master registers
+      - description: the SPI slave registers
 
   reg-names:
     items:
-- 
GitLab


From 4a960e8941bd59fe20f8f774de371f40f222a0c7 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:48 -0800
Subject: [PATCH 0212/1586] x86/Documentation: Describe the Intel Hardware
 Feedback Interface

Start a documentation file to describe the purpose and operation of Intel's
Hardware Feedback Interface. Describe how this interface is used in Linux
to relay performance and energy efficiency updates to userspace.

Reviewed-by: Len Brown <len.brown@intel.com>
Suggested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/x86/index.rst     |  1 +
 Documentation/x86/intel-hfi.rst | 72 +++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 Documentation/x86/intel-hfi.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f498f1d36cd3c..982c8af853b9f 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -21,6 +21,7 @@ x86-specific Documentation
    tlb
    mtrr
    pat
+   intel-hfi
    intel-iommu
    intel_txt
    amd-memory-encryption
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst
new file mode 100644
index 0000000000000..49dea58ea4fb2
--- /dev/null
+++ b/Documentation/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
-- 
GitLab


From 7b8f40b3de75c971a4e5f9308b06deb59118dbac Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:49 -0800
Subject: [PATCH 0213/1586] x86/cpu: Add definitions for the Intel Hardware
 Feedback Interface

Add the CPUID feature bit and the model-specific registers needed to
identify and configure the Intel Hardware Feedback Interface.

Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/include/asm/msr-index.h   | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d8..b39f510128f3a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -330,6 +330,7 @@
 #define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
 #define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
 #define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI			(14*32+19) /* Hardware Feedback Interface */
 
 /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
 #define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3faf0f97edb1b..89e0a1d5ee36d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -704,12 +704,14 @@
 
 #define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
 #define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED	(1 << 26)
 
 #define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
 
 #define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
 #define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE		(1 << 25)
 
 /* Thermal Thresholds Support */
 #define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
@@ -958,4 +960,8 @@
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
+
 #endif /* _ASM_X86_MSR_INDEX_H */
-- 
GitLab


From 1cb19cabeb0e187b6c244d0da73d27f7432c40dc Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:50 -0800
Subject: [PATCH 0214/1586] thermal: intel: hfi: Minimally initialize the
 Hardware Feedback Interface

The Intel Hardware Feedback Interface provides guidance to the operating
system about the performance and energy efficiency capabilities of each
CPU in the system. Capabilities are numbers between 0 and 255 where a
higher number represents a higher capability. For each CPU, energy
efficiency and performance are reported as separate capabilities.

Hardware computes these capabilities based on the operating conditions of
the system such as power and thermal limits. These capabilities are shared
with the operating system in a table resident in memory. Each package in
the system has its own HFI instance. Every logical CPU in the package is
represented in the table. More than one logical CPUs may be represented in
a single table entry. When the hardware updates the table, it generates a
package-level thermal interrupt.

The size and format of the HFI table depend on the supported features and
can only be determined at runtime. To minimally initialize the HFI, parse
its features and allocate one instance per package of a data structure with
the necessary parameters to read and navigate a local copy (i.e., owned by
the driver) of individual HFI tables.

A subsequent changeset will provide per-CPU initialization and interrupt
handling.

Reviewed-by: Len Brown <len.brown@intel.com>
Co-developed by: Aubrey Li <aubrey.li@linux.intel.com>
Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/Kconfig       |  12 ++
 drivers/thermal/intel/Makefile      |   1 +
 drivers/thermal/intel/intel_hfi.c   | 181 ++++++++++++++++++++++++++++
 drivers/thermal/intel/intel_hfi.h   |  11 ++
 drivers/thermal/intel/therm_throt.c |   3 +
 5 files changed, 208 insertions(+)
 create mode 100644 drivers/thermal/intel/intel_hfi.c
 create mode 100644 drivers/thermal/intel/intel_hfi.h

diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index c83ea5d04a1da..e9d2925227d4f 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -99,3 +99,15 @@ config INTEL_MENLOW
 	  Intel Menlow platform.
 
 	  If unsure, say N.
+
+config INTEL_HFI_THERMAL
+	bool "Intel Hardware Feedback Interface"
+	depends on CPU_SUP_INTEL
+	depends on X86_THERMAL_VECTOR
+	help
+	  Select this option to enable the Hardware Feedback Interface. If
+	  selected, hardware provides guidance to the operating system on
+	  the performance and energy efficiency capabilities of each CPU.
+	  These capabilities may change as a result of changes in the operating
+	  conditions of the system such power and thermal limits. If selected,
+	  the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 960b56268b4a5..9a8d8054f3166 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
 obj-$(CONFIG_INTEL_TCC_COOLING)	+= intel_tcc_cooling.o
 obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
 obj-$(CONFIG_INTEL_MENLOW)	+= intel_menlow.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 0000000000000..969f4a1509730
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <aubrey.li@linux.intel.com>
+ *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt)  "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/math.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
+
+#include "intel_hfi.h"
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+	struct {
+		u8	performance:1;
+		u8	energy_efficiency:1;
+		u8	__reserved:6;
+	} split;
+	u8 bits;
+};
+
+union cpuid6_edx {
+	struct {
+		union hfi_capabilities	capabilities;
+		u32			table_pages:4;
+		u32			__reserved:4;
+		s32			index:16;
+	} split;
+	u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap:		Performance capability
+ * @ee_cap:		Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+	u8	perf_cap;
+	u8	ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated:	Hardware updated performance capabilities
+ * @ee_updated:		Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+	u8	perf_updated;
+	u8	ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table:	Base of the local copy of the HFI table
+ * @timestamp:		Timestamp of the last update of the local table.
+ *			Located at the base of the local table.
+ * @hdr:		Base address of the header of the local table
+ * @data:		Base address of the data of the local table
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+	union {
+		void			*local_table;
+		u64			*timestamp;
+	};
+	void			*hdr;
+	void			*data;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages:	Size of the HFI table in 4KB pages
+ * @cpu_stride:		Stride size to locate the capability data of a logical
+ *			processor within the table (i.e., row stride)
+ * @hdr_size:		Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+	unsigned int	nr_table_pages;
+	unsigned int	cpu_stride;
+	unsigned int	hdr_size;
+};
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+
+static __init int hfi_parse_features(void)
+{
+	unsigned int nr_capabilities;
+	union cpuid6_edx edx;
+
+	if (!boot_cpu_has(X86_FEATURE_HFI))
+		return -ENODEV;
+
+	/*
+	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+	 * supported capabilities and the size of the HFI table.
+	 */
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+	if (!edx.split.capabilities.split.performance) {
+		pr_debug("Performance reporting not supported! Not using HFI\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * The number of supported capabilities determines the number of
+	 * columns in the HFI table. Exclude the reserved bits.
+	 */
+	edx.split.capabilities.split.__reserved = 0;
+	nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+	/* The number of 4KB pages required by the table */
+	hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+	/*
+	 * The header contains change indications for each supported feature.
+	 * The size of the table header is rounded up to be a multiple of 8
+	 * bytes.
+	 */
+	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	/*
+	 * Data of each logical processor is also rounded up to be a multiple
+	 * of 8 bytes.
+	 */
+	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+	return 0;
+}
+
+void __init intel_hfi_init(void)
+{
+	if (hfi_parse_features())
+		return;
+
+	/* There is one HFI instance per die/package. */
+	max_hfi_instances = topology_max_packages() *
+			    topology_max_die_per_package();
+
+	/*
+	 * This allocation may fail. CPU hotplug callbacks must check
+	 * for a null pointer.
+	 */
+	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+				GFP_KERNEL);
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 0000000000000..05f748b48a4e4
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+#else
+static inline void intel_hfi_init(void) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index dab7e8fb10598..ac408714d52b8 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -32,6 +32,7 @@
 #include <asm/irq.h>
 #include <asm/msr.h>
 
+#include "intel_hfi.h"
 #include "thermal_interrupt.h"
 
 /* How long to wait between reporting thermal events */
@@ -509,6 +510,8 @@ static __init int thermal_throttle_init_device(void)
 	if (!atomic_read(&therm_throt_en))
 		return 0;
 
+	intel_hfi_init();
+
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
 				thermal_throttle_online,
 				thermal_throttle_offline);
-- 
GitLab


From 2d74e6319abe278981e79166b6c2d0c3ed39b1ae Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:51 -0800
Subject: [PATCH 0215/1586] thermal: intel: hfi: Handle CPU hotplug events

All CPUs in a package are represented in an HFI table. There exists an
HFI table per package. Thus, CPUs in a package need to coordinate to
initialize and access the table. Do such coordination during CPU hotplug.
Use the first CPU to come online in a package to initialize the HFI
instance and the data structure representing it. Other CPUs in the same
package need only to register or unregister themselves in that data
structure.

The HFI depends on both the package-level thermal management and the local
APIC thermal local vector. Thus, to ensure that a CPU coming online has an
associated HFI instance when the hardware issues an HFI event, enable the
HFI only after having enabled the local APIC thermal vector. The thermal
throttle driver takes care of the needed package-level initialization.

Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/intel_hfi.c   | 205 ++++++++++++++++++++++++++++
 drivers/thermal/intel/intel_hfi.h   |   4 +
 drivers/thermal/intel/therm_throt.c |   9 ++
 3 files changed, 218 insertions(+)

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 969f4a1509730..d36858474f746 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -23,14 +23,24 @@
 
 #include <linux/bitops.h>
 #include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
 #include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
 #include <linux/printk.h>
 #include <linux/processor.h>
 #include <linux/slab.h>
 #include <linux/topology.h>
 
+#include <asm/msr.h>
+
 #include "intel_hfi.h"
 
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
+
 /* CPUID detection and enumeration definitions for HFI */
 
 #define CPUID_HFI_LEAF 6
@@ -86,6 +96,8 @@ struct hfi_hdr {
  *			Located at the base of the local table.
  * @hdr:		Base address of the header of the local table
  * @data:		Base address of the data of the local table
+ * @cpus:		CPUs represented in this HFI table instance
+ * @hw_table:		Pointer to the HFI table of this instance
  *
  * A set of parameters to parse and navigate a specific HFI table.
  */
@@ -96,6 +108,8 @@ struct hfi_instance {
 	};
 	void			*hdr;
 	void			*data;
+	cpumask_var_t		cpus;
+	void			*hw_table;
 };
 
 /**
@@ -113,10 +127,179 @@ struct hfi_features {
 	unsigned int	hdr_size;
 };
 
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index:		Row of this CPU in its HFI table
+ * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+	s16			index;
+	struct hfi_instance	*hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
 static int max_hfi_instances;
 static struct hfi_instance *hfi_instances;
 
 static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+	union cpuid6_edx edx;
+
+	/* Do not re-read @cpu's index if it has already been initialized. */
+	if (info->index > -1)
+		return;
+
+	edx.full = cpuid_edx(CPUID_HFI_LEAF);
+	info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+	/* The HFI header is below the time-stamp. */
+	hfi_instance->hdr = hfi_instance->local_table +
+			    sizeof(*hfi_instance->timestamp);
+
+	/* The HFI data starts below the header. */
+	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+	struct hfi_instance *hfi_instance;
+	struct hfi_cpu_info *info;
+	phys_addr_t hw_table_pa;
+	u64 msr_val;
+	u16 die_id;
+
+	/* Nothing to do if hfi_instances are missing. */
+	if (!hfi_instances)
+		return;
+
+	/*
+	 * Link @cpu to the HFI instance of its package/die. It does not
+	 * matter whether the instance has been initialized.
+	 */
+	info = &per_cpu(hfi_cpu_info, cpu);
+	die_id = topology_logical_die_id(cpu);
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance) {
+		if (die_id < 0 || die_id >= max_hfi_instances)
+			return;
+
+		hfi_instance = &hfi_instances[die_id];
+		info->hfi_instance = hfi_instance;
+	}
+
+	init_hfi_cpu_index(info);
+
+	/*
+	 * Now check if the HFI instance of the package/die of @cpu has been
+	 * initialized (by checking its header). In such case, all we have to
+	 * do is to add @cpu to this instance's cpumask.
+	 */
+	mutex_lock(&hfi_instance_lock);
+	if (hfi_instance->hdr) {
+		cpumask_set_cpu(cpu, hfi_instance->cpus);
+		goto unlock;
+	}
+
+	/*
+	 * Hardware is programmed with the physical address of the first page
+	 * frame of the table. Hence, the allocated memory must be page-aligned.
+	 */
+	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+						   GFP_KERNEL | __GFP_ZERO);
+	if (!hfi_instance->hw_table)
+		goto unlock;
+
+	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+
+	/*
+	 * Allocate memory to keep a local copy of the table that
+	 * hardware generates.
+	 */
+	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+					    GFP_KERNEL);
+	if (!hfi_instance->local_table)
+		goto free_hw_table;
+
+	/*
+	 * Program the address of the feedback table of this die/package. On
+	 * some processors, hardware remembers the old address of the HFI table
+	 * even after having been reprogrammed and re-enabled. Thus, do not free
+	 * the pages allocated for the table or reprogram the hardware with a
+	 * new base address. Namely, program the hardware only once.
+	 */
+	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+
+	init_hfi_instance(hfi_instance);
+
+	cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+unlock:
+	mutex_unlock(&hfi_instance_lock);
+	return;
+
+free_hw_table:
+	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+	goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu:	CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+	struct hfi_instance *hfi_instance;
+
+	/*
+	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+	 * is present.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (!hfi_instance)
+		return;
+
+	if (!hfi_instance->hdr)
+		return;
+
+	mutex_lock(&hfi_instance_lock);
+	cpumask_clear_cpu(cpu, hfi_instance->cpus);
+	mutex_unlock(&hfi_instance_lock);
+}
 
 static __init int hfi_parse_features(void)
 {
@@ -165,6 +348,9 @@ static __init int hfi_parse_features(void)
 
 void __init intel_hfi_init(void)
 {
+	struct hfi_instance *hfi_instance;
+	int i, j;
+
 	if (hfi_parse_features())
 		return;
 
@@ -178,4 +364,23 @@ void __init intel_hfi_init(void)
 	 */
 	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
 				GFP_KERNEL);
+	if (!hfi_instances)
+		return;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+			goto err_nomem;
+	}
+
+	return;
+
+err_nomem:
+	for (j = 0; j < i; ++j) {
+		hfi_instance = &hfi_instances[j];
+		free_cpumask_var(hfi_instance->cpus);
+	}
+
+	kfree(hfi_instances);
+	hfi_instances = NULL;
 }
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
index 05f748b48a4e4..56c6b2d752027 100644
--- a/drivers/thermal/intel/intel_hfi.h
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -4,8 +4,12 @@
 
 #if defined(CONFIG_INTEL_HFI_THERMAL)
 void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
 #else
 static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
 #endif /* CONFIG_INTEL_HFI_THERMAL */
 
 #endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index ac408714d52b8..8571e57a013a3 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -476,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu)
 	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
 	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
 
+	/*
+	 * The first CPU coming online will enable the HFI. Usually this causes
+	 * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+	 * the CPU once we enable the thermal vector in the local APIC.
+	 */
+	intel_hfi_online(cpu);
+
 	/* Unmask the thermal vector after the above workqueues are initialized. */
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -493,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu)
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
 
+	intel_hfi_offline(cpu);
+
 	cancel_delayed_work_sync(&state->package_throttle.therm_work);
 	cancel_delayed_work_sync(&state->core_throttle.therm_work);
 
-- 
GitLab


From ab09b0744a9944cbdc0ac9a5cb00bef72adf79d5 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:52 -0800
Subject: [PATCH 0216/1586] thermal: intel: hfi: Enable notification interrupt

When hardware wants to inform the operating system about updates in the HFI
table, it issues a package-level thermal event interrupt. For this,
hardware has new interrupt and status bits in the IA32_PACKAGE_THERM_
INTERRUPT and IA32_PACKAGE_THERM_STATUS registers. The existing thermal
throttle driver already handles thermal event interrupts: it initializes
the thermal vector of the local APIC as well as per-CPU and package-level
interrupt reporting. It also provides routines to service such interrupts.
Extend its functionality to also handle HFI interrupts.

The frequency of the thermal HFI interrupt is specific to each processor
model. On some processors, a single interrupt happens as soon as the HFI is
enabled and hardware will never update HFI capabilities afterwards. On
other processors, thermal and power constraints may cause thermal HFI
interrupts every tens of milliseconds.

To not overwhelm consumers of the HFI data, use delayed work to throttle
the rate at which HFI updates are processed. Use a dedicated workqueue to
not overload system_wq if hardware issues many HFI updates.

Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/intel_hfi.c   | 110 ++++++++++++++++++++++++++++
 drivers/thermal/intel/intel_hfi.h   |   2 +
 drivers/thermal/intel/therm_throt.c |  10 +++
 3 files changed, 122 insertions(+)

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index d36858474f746..7a2b750eecc45 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -26,20 +26,28 @@
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/math.h>
 #include <linux/mutex.h>
 #include <linux/percpu-defs.h>
 #include <linux/printk.h>
 #include <linux/processor.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
 #include <linux/topology.h>
+#include <linux/workqueue.h>
 
 #include <asm/msr.h>
 
 #include "intel_hfi.h"
 
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
+				     BIT(9) | BIT(11) | BIT(26))
+
 /* Hardware Feedback Interface MSR configuration bits */
 #define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
 
 /* CPUID detection and enumeration definitions for HFI */
 
@@ -98,6 +106,9 @@ struct hfi_hdr {
  * @data:		Base address of the data of the local table
  * @cpus:		CPUs represented in this HFI table instance
  * @hw_table:		Pointer to the HFI table of this instance
+ * @update_work:	Delayed work to process HFI updates
+ * @table_lock:		Lock to protect acceses to the table of this instance
+ * @event_lock:		Lock to process HFI interrupts
  *
  * A set of parameters to parse and navigate a specific HFI table.
  */
@@ -110,6 +121,9 @@ struct hfi_instance {
 	void			*data;
 	cpumask_var_t		cpus;
 	void			*hw_table;
+	struct delayed_work	update_work;
+	raw_spinlock_t		table_lock;
+	raw_spinlock_t		event_lock;
 };
 
 /**
@@ -147,6 +161,86 @@ static struct hfi_instance *hfi_instances;
 static struct hfi_features hfi_features;
 static DEFINE_MUTEX(hfi_instance_lock);
 
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL		HZ
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+	struct hfi_instance *hfi_instance;
+
+	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+				    update_work);
+	if (!hfi_instance)
+		return;
+
+	/* TODO: Consume update here. */
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+	struct hfi_instance *hfi_instance;
+	int cpu = smp_processor_id();
+	struct hfi_cpu_info *info;
+	u64 new_timestamp;
+
+	if (!pkg_therm_status_msr_val)
+		return;
+
+	info = &per_cpu(hfi_cpu_info, cpu);
+	if (!info)
+		return;
+
+	/*
+	 * A CPU is linked to its HFI instance before the thermal vector in the
+	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+	 * when receiving an HFI event.
+	 */
+	hfi_instance = info->hfi_instance;
+	if (unlikely(!hfi_instance)) {
+		pr_debug("Received event on CPU %d but instance was null", cpu);
+		return;
+	}
+
+	/*
+	 * On most systems, all CPUs in the package receive a package-level
+	 * thermal interrupt when there is an HFI update. It is sufficient to
+	 * let a single CPU to acknowledge the update and queue work to
+	 * process it. The remaining CPUs can resume their work.
+	 */
+	if (!raw_spin_trylock(&hfi_instance->event_lock))
+		return;
+
+	/* Skip duplicated updates. */
+	new_timestamp = *(u64 *)hfi_instance->hw_table;
+	if (*hfi_instance->timestamp == new_timestamp) {
+		raw_spin_unlock(&hfi_instance->event_lock);
+		return;
+	}
+
+	raw_spin_lock(&hfi_instance->table_lock);
+
+	/*
+	 * Copy the updated table into our local copy. This includes the new
+	 * timestamp.
+	 */
+	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+	       hfi_features.nr_table_pages << PAGE_SHIFT);
+
+	raw_spin_unlock(&hfi_instance->table_lock);
+	raw_spin_unlock(&hfi_instance->event_lock);
+
+	/*
+	 * Let hardware know that we are done reading the HFI table and it is
+	 * free to update it again.
+	 */
+	pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
+				    ~PACKAGE_THERM_STATUS_HFI_UPDATED;
+	wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
+
+	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+			   HFI_UPDATE_INTERVAL);
+}
+
 static void init_hfi_cpu_index(struct hfi_cpu_info *info)
 {
 	union cpuid6_edx edx;
@@ -258,8 +352,20 @@ void intel_hfi_online(unsigned int cpu)
 
 	init_hfi_instance(hfi_instance);
 
+	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+	raw_spin_lock_init(&hfi_instance->table_lock);
+	raw_spin_lock_init(&hfi_instance->event_lock);
+
 	cpumask_set_cpu(cpu, hfi_instance->cpus);
 
+	/*
+	 * Enable the hardware feedback interface and never disable it. See
+	 * comment on programming the address of the table.
+	 */
+	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
 unlock:
 	mutex_unlock(&hfi_instance_lock);
 	return;
@@ -373,6 +479,10 @@ void __init intel_hfi_init(void)
 			goto err_nomem;
 	}
 
+	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+	if (!hfi_updates_wq)
+		goto err_nomem;
+
 	return;
 
 err_nomem:
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
index 56c6b2d752027..325aa78b745cf 100644
--- a/drivers/thermal/intel/intel_hfi.h
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -6,10 +6,12 @@
 void __init intel_hfi_init(void);
 void intel_hfi_online(unsigned int cpu);
 void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
 #else
 static inline void intel_hfi_init(void) { }
 static inline void intel_hfi_online(unsigned int cpu) { }
 static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
 #endif /* CONFIG_INTEL_HFI_THERMAL */
 
 #endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index 8571e57a013a3..8352083b87c7c 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -620,6 +620,10 @@ void intel_thermal_interrupt(void)
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					PACKAGE_LEVEL);
+
+		if (this_cpu_has(X86_FEATURE_HFI))
+			intel_hfi_process_event(msr_val &
+						PACKAGE_THERM_STATUS_HFI_UPDATED);
 	}
 }
 
@@ -729,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
 			      l | (PACKAGE_THERM_INT_LOW_ENABLE
 				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
+
+		if (cpu_has(c, X86_FEATURE_HFI)) {
+			rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+			      l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+		}
 	}
 
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-- 
GitLab


From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:53 -0800
Subject: [PATCH 0217/1586] thermal: netlink: Add a new event to notify CPU
 capabilities change

Add a new netlink event to notify change in CPU capabilities in terms of
performance and efficiency.

Firmware may change CPU capabilities as a result of thermal events in the
system or to account for changes in the TDP (thermal design power) level.

This notification type will allow user space to avoid running workloads
on certain CPUs or proactively adjust power limits to avoid future events.

The netlink message consists of a nested attribute
(THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes:

 * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32):
   -- logical CPU number
 * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32):
   -- Scaled performance from 0-1023
 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32):
   -- Scaled efficiency from 0-1023

Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_netlink.c | 53 +++++++++++++++++++++++++++++++
 drivers/thermal/thermal_netlink.h | 14 ++++++++
 include/uapi/linux/thermal.h      |  6 +++-
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index a16dd4d5d710e..7c97a091680ef 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] =
 	[THERMAL_GENL_ATTR_CDEV_MAX_STATE]	= { .type = NLA_U32 },
 	[THERMAL_GENL_ATTR_CDEV_NAME]		= { .type = NLA_STRING,
 						    .len = THERMAL_NAME_LENGTH },
+	/* CPU capabilities */
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY]		= { .type = NLA_NESTED },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_ID]		= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE]	= { .type = NLA_U32 },
+	[THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY]	= { .type = NLA_U32 },
 };
 
 struct param {
@@ -58,6 +63,8 @@ struct param {
 	int temp;
 	int cdev_state;
 	int cdev_max_state;
+	struct thermal_genl_cpu_caps *cpu_capabilities;
+	int cpu_capabilities_count;
 };
 
 typedef int (*cb_t)(struct param *);
@@ -190,6 +197,42 @@ static int thermal_genl_event_gov_change(struct param *p)
 	return 0;
 }
 
+static int thermal_genl_event_cpu_capability_change(struct param *p)
+{
+	struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities;
+	struct sk_buff *msg = p->msg;
+	struct nlattr *start_cap;
+	int i;
+
+	start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY);
+	if (!start_cap)
+		return -EMSGSIZE;
+
+	for (i = 0; i < p->cpu_capabilities_count; ++i) {
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+				cpu_cap->cpu))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+				cpu_cap->performance))
+			goto out_cancel_nest;
+
+		if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+				cpu_cap->efficiency))
+			goto out_cancel_nest;
+
+		++cpu_cap;
+	}
+
+	nla_nest_end(msg, start_cap);
+
+	return 0;
+out_cancel_nest:
+	nla_nest_cancel(msg, start_cap);
+
+	return -EMSGSIZE;
+}
+
 int thermal_genl_event_tz_delete(struct param *p)
 	__attribute__((alias("thermal_genl_event_tz")));
 
@@ -219,6 +262,7 @@ static cb_t event_cb[] = {
 	[THERMAL_GENL_EVENT_CDEV_DELETE]	= thermal_genl_event_cdev_delete,
 	[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE]	= thermal_genl_event_cdev_state_update,
 	[THERMAL_GENL_EVENT_TZ_GOV_CHANGE]	= thermal_genl_event_gov_change,
+	[THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change,
 };
 
 /*
@@ -356,6 +400,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name)
 	return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p);
 }
 
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps)
+{
+	struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps };
+
+	return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p);
+}
+EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event);
+
 /*************************** Command encoding ********************************/
 
 static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz,
diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index e554f76291f41..04d1adbbc0124 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -4,6 +4,12 @@
  *  Author: Daniel Lezcano <daniel.lezcano@linaro.org>
  */
 
+struct thermal_genl_cpu_caps {
+	int cpu;
+	int performance;
+	int efficiency;
+};
+
 /* Netlink notification function */
 #ifdef CONFIG_THERMAL_NETLINK
 int __init thermal_netlink_init(void);
@@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state);
 int thermal_notify_cdev_delete(int cdev_id);
 int thermal_notify_tz_gov_change(int tz_id, const char *name);
 int thermal_genl_sampling_temp(int id, int temp);
+int thermal_genl_cpu_capability_event(int count,
+				      struct thermal_genl_cpu_caps *caps);
 #else
 static inline int thermal_netlink_init(void)
 {
@@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
 {
 	return 0;
 }
+
+static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps)
+{
+	return 0;
+}
+
 #endif /* CONFIG_THERMAL_NETLINK */
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 9aa2fedfa309b..fc78bf3aead79 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
 	THERMAL_GENL_ATTR_CDEV_MAX_STATE,
 	THERMAL_GENL_ATTR_CDEV_NAME,
 	THERMAL_GENL_ATTR_GOV_NAME,
-
+	THERMAL_GENL_ATTR_CPU_CAPABILITY,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+	THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
 	__THERMAL_GENL_ATTR_MAX,
 };
 #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
 	THERMAL_GENL_EVENT_CDEV_DELETE,		/* Cdev unbound */
 	THERMAL_GENL_EVENT_CDEV_STATE_UPDATE,	/* Cdev state updated */
 	THERMAL_GENL_EVENT_TZ_GOV_CHANGE,	/* Governor policy changed  */
+	THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE,	/* CPU capability changed */
 	__THERMAL_GENL_EVENT_MAX,
 };
 #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
-- 
GitLab


From bd30cdfd9bd73b68e4977ce7c5540aa7b14c25cd Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 27 Jan 2022 11:34:54 -0800
Subject: [PATCH 0218/1586] thermal: intel: hfi: Notify user space for HFI
 events

When the hardware issues an HFI event, relay a notification to user space.
This allows user space to respond by reading performance and efficiency of
each CPU and take appropriate action.

For example, when the performance and efficiency of a CPU is 0, user space
can either offline the CPU or inject idle. Also, if user space notices a
downward trend in performance, it may proactively adjust power limits to
avoid future situations in which performance drops to 0.

To avoid excessive notifications, the rate is limited by one HZ per event.
To limit the netlink message size, send parameters for up to 16 CPUs in a
single message. If there are more than 16 CPUs, issue as many messages as
needed to notify the status of all CPUs.

In the HFI specification, both performance and efficiency capabilities are
defined in the [0, 255] range. The existing implementations of HFI hardware
do not scale the maximum values to 255. Since userspace cares about
capability values that are either 0 or show a downward/upward trend, this
fact does not matter much. Relative changes in capabilities are enough. To
comply with the thermal netlink ABI, scale both performance and efficiency
capabilities to the [0, 1023] interval.

Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/Kconfig     |  1 +
 drivers/thermal/intel/intel_hfi.c | 75 ++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index e9d2925227d4f..6cf3fe36a4ae8 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -104,6 +104,7 @@ config INTEL_HFI_THERMAL
 	bool "Intel Hardware Feedback Interface"
 	depends on CPU_SUP_INTEL
 	depends on X86_THERMAL_VECTOR
+	select THERMAL_NETLINK
 	help
 	  Select this option to enable the Hardware Feedback Interface. If
 	  selected, hardware provides guidance to the operating system on
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 7a2b750eecc45..730fd121df6ec 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -40,6 +40,7 @@
 
 #include <asm/msr.h>
 
+#include "../thermal_core.h"
 #include "intel_hfi.h"
 
 #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
@@ -163,6 +164,78 @@ static DEFINE_MUTEX(hfi_instance_lock);
 
 static struct workqueue_struct *hfi_updates_wq;
 #define HFI_UPDATE_INTERVAL		HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT	16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+			 struct thermal_genl_cpu_caps *cpu_caps)
+{
+	int cpu, i = 0;
+
+	raw_spin_lock_irq(&hfi_instance->table_lock);
+	for_each_cpu(cpu, hfi_instance->cpus) {
+		struct hfi_cpu_data *caps;
+		s16 index;
+
+		index = per_cpu(hfi_cpu_info, cpu).index;
+		caps = hfi_instance->data + index * hfi_features.cpu_stride;
+		cpu_caps[i].cpu = cpu;
+
+		/*
+		 * Scale performance and energy efficiency to
+		 * the [0, 1023] interval that thermal netlink uses.
+		 */
+		cpu_caps[i].performance = caps->perf_cap << 2;
+		cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+		++i;
+	}
+	raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+	struct thermal_genl_cpu_caps *cpu_caps;
+	int i = 0, cpu_count;
+
+	/* CPUs may come online/offline while processing an HFI update. */
+	mutex_lock(&hfi_instance_lock);
+
+	cpu_count = cpumask_weight(hfi_instance->cpus);
+
+	/* No CPUs to report in this hfi_instance. */
+	if (!cpu_count)
+		goto out;
+
+	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+	if (!cpu_caps)
+		goto out;
+
+	get_hfi_caps(hfi_instance, cpu_caps);
+
+	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+		goto last_cmd;
+
+	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+	for (i = 0;
+	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+	     i += HFI_MAX_THERM_NOTIFY_COUNT)
+		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+						  &cpu_caps[i]);
+
+	cpu_count = cpu_count - i;
+
+last_cmd:
+	/* Process the remaining capabilities if any. */
+	if (cpu_count)
+		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+	kfree(cpu_caps);
+out:
+	mutex_unlock(&hfi_instance_lock);
+}
 
 static void hfi_update_work_fn(struct work_struct *work)
 {
@@ -173,7 +246,7 @@ static void hfi_update_work_fn(struct work_struct *work)
 	if (!hfi_instance)
 		return;
 
-	/* TODO: Consume update here. */
+	update_capabilities(hfi_instance);
 }
 
 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
-- 
GitLab


From 2651bf680bc2ad9a078b7222b0873145ab4ece07 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Thu, 3 Feb 2022 11:28:25 -0800
Subject: [PATCH 0219/1586] block: introduce BLK_STS_OFFLINE

Currently, drivers reports BLK_STS_IOERR for devices that are not full
online or being removed. This behavior could cause confusion for users,
as they are not really I/O errors from the device.

Solve this issue with a new state BLK_STS_OFFLINE, which reports "device
offline error" in dmesg instead of "I/O error".

EIO is intentionally kept to not change user visible return value.

Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220203192827.1370270-2-song@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          | 1 +
 include/linux/blk_types.h | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 61f6a0dc4511a..24035dd2eef17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -164,6 +164,7 @@ static const struct {
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
 	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
 	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
+	[BLK_STS_OFFLINE]	= { -EIO,	"device offline" },
 
 	/* device mapper special case, should not leak out: */
 	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fe065c394fff6..5561e58d158ac 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -153,6 +153,13 @@ typedef u8 __bitwise blk_status_t;
  */
 #define BLK_STS_ZONE_ACTIVE_RESOURCE	((__force blk_status_t)16)
 
+/*
+ * BLK_STS_OFFLINE is returned from the driver when the target device is offline
+ * or is being taken offline. This could help differentiate the case where a
+ * device is intentionally being shut down from a real I/O error.
+ */
+#define BLK_STS_OFFLINE		((__force blk_status_t)17)
+
 /**
  * blk_path_error - returns true if error may be path related
  * @error: status the request was completed with
-- 
GitLab


From 7d32c027a21ef7aa0a400763397644d44b3576a9 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Thu, 3 Feb 2022 11:28:26 -0800
Subject: [PATCH 0220/1586] block: return -ENODEV for BLK_STS_OFFLINE

Change the user visible return value for BLK_STS_OFFLINE to -ENODEV, which
is more descriptive than existing -EIO.

Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220203192827.1370270-3-song@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 24035dd2eef17..be8812f5489d4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -164,7 +164,7 @@ static const struct {
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
 	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
 	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
-	[BLK_STS_OFFLINE]	= { -EIO,	"device offline" },
+	[BLK_STS_OFFLINE]	= { -ENODEV,	"device offline" },
 
 	/* device mapper special case, should not leak out: */
 	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
-- 
GitLab


From 9574d43479e16352e75bc875c9952ed8e129c9b2 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Thu, 3 Feb 2022 11:28:27 -0800
Subject: [PATCH 0221/1586] scsi: use BLK_STS_OFFLINE for not fully online
 devices

The new error message for such case looks like

[  172.809565] device offline error, dev sda, sector 3138208 ...

which will not be confused with regular I/O error (BLK_STS_IOERR).

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220203192827.1370270-4-song@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0a70aa763a961..e30bc51578e93 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1276,7 +1276,7 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req)
 		 * power management commands.
 		 */
 		if (req && !(req->rq_flags & RQF_PM))
-			return BLK_STS_IOERR;
+			return BLK_STS_OFFLINE;
 		return BLK_STS_OK;
 	}
 }
-- 
GitLab


From 6d18762ed5cd549fde74fd0e05d4d87bac5a3beb Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 4 Feb 2022 11:21:14 +0800
Subject: [PATCH 0222/1586] f2fs: fix to unlock page correctly in error path of
 is_alive()

As Pavel Machek reported in below link [1]:

After commit 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()"),
node page should be unlock via calling f2fs_put_page() in the error path
of is_alive(), otherwise, f2fs may hang when it tries to lock the node
page, fix it.

[1] https://lore.kernel.org/stable/20220124203637.GA19321@duo.ucw.cz/

Fixes: 77900c45ee5c ("f2fs: fix to do sanity check in is_alive()")
Cc: <stable@vger.kernel.org>
Reported-by: Pavel Machek <pavel@denx.de>
Signed-off-by: Pavel Machek <pavel@denx.de>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 0a6b0a8ae97ee..2d53ef121e76e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1038,8 +1038,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 	}
 
-	if (f2fs_check_nid_range(sbi, dni->ino))
+	if (f2fs_check_nid_range(sbi, dni->ino)) {
+		f2fs_put_page(node_page, 1);
 		return false;
+	}
 
 	*nofs = ofs_of_node(node_page);
 	source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
-- 
GitLab


From 430f163b01888dc26696365d9c1053ba9d6c7d92 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 4 Feb 2022 08:34:10 +0800
Subject: [PATCH 0223/1586] f2fs: adjust readahead block number during recovery

In a fragmented image, entries in dnode block list may locate in
incontiguous physical block address space, however, in recovery flow,
we will always readahead BIO_MAX_VECS size blocks, so in such case,
current readahead policy is low efficient, let's adjust readahead
window size dynamically based on consecutiveness of dnode blocks.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  8 ++++++--
 fs/f2fs/f2fs.h       |  6 +++++-
 fs/f2fs/recovery.c   | 27 ++++++++++++++++++++++++---
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index deeda95688f03..a13b6b4af220a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -282,18 +282,22 @@ out:
 	return blkno - start;
 }
 
-void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
+							unsigned int ra_blocks)
 {
 	struct page *page;
 	bool readahead = false;
 
+	if (ra_blocks == RECOVERY_MIN_RA_BLOCKS)
+		return;
+
 	page = find_get_page(META_MAPPING(sbi), index);
 	if (!page || !PageUptodate(page))
 		readahead = true;
 	f2fs_put_page(page, 0);
 
 	if (readahead)
-		f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true);
+		f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true);
 }
 
 static int __f2fs_write_meta_page(struct page *page,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 63c90416364bc..51c1392708e6e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -590,6 +590,9 @@ enum {
 /* number of extent info in extent cache we try to shrink */
 #define EXTENT_CACHE_SHRINK_NUMBER	128
 
+#define RECOVERY_MAX_RA_BLOCKS		BIO_MAX_VECS
+#define RECOVERY_MIN_RA_BLOCKS		1
+
 struct rb_entry {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
 	union {
@@ -3651,7 +3654,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 					block_t blkaddr, int type);
 int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 			int type, bool sync);
-void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
+							unsigned int ra_blocks);
 long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 			long nr_to_write, enum iostat_type io_type);
 void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 10d152cfa58d1..2af503f75b4fa 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -343,6 +343,19 @@ static int recover_inode(struct inode *inode, struct page *page)
 	return 0;
 }
 
+static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
+				unsigned int ra_blocks, unsigned int blkaddr,
+				unsigned int next_blkaddr)
+{
+	if (blkaddr + 1 == next_blkaddr)
+		ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
+							ra_blocks * 2);
+	else if (next_blkaddr % sbi->blocks_per_seg)
+		ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
+							ra_blocks / 2);
+	return ra_blocks;
+}
+
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
 				bool check_only)
 {
@@ -350,6 +363,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
 	struct page *page = NULL;
 	block_t blkaddr;
 	unsigned int loop_cnt = 0;
+	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
 	unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
 						valid_user_blocks(sbi);
 	int err = 0;
@@ -424,11 +438,14 @@ next:
 			break;
 		}
 
+		ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
+						next_blkaddr_of_node(page));
+
 		/* check next segment */
 		blkaddr = next_blkaddr_of_node(page);
 		f2fs_put_page(page, 1);
 
-		f2fs_ra_meta_pages_cond(sbi, blkaddr);
+		f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
 	}
 	return err;
 }
@@ -704,6 +721,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 	struct page *page = NULL;
 	int err = 0;
 	block_t blkaddr;
+	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
 
 	/* get node pages in the current segment */
 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -715,8 +733,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
 			break;
 
-		f2fs_ra_meta_pages_cond(sbi, blkaddr);
-
 		page = f2fs_get_tmp_page(sbi, blkaddr);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
@@ -759,9 +775,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 		if (entry->blkaddr == blkaddr)
 			list_move_tail(&entry->list, tmp_inode_list);
 next:
+		ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
+						next_blkaddr_of_node(page));
+
 		/* check next segment */
 		blkaddr = next_blkaddr_of_node(page);
 		f2fs_put_page(page, 1);
+
+		f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
 	}
 	if (!err)
 		f2fs_allocate_new_segments(sbi);
-- 
GitLab


From 78e3437450be5236c4949e377c9b848bbcd4fcb0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Feb 2022 08:19:34 +0100
Subject: [PATCH 0224/1586] block: call bio_associate_blkg from bio_reset

Call bio_associate_blkg just like bio_set_dev did in the callers before
the conversion to set the block device in bio_reset.

Fixes: a7c50c940477 ("block: pass a block_device and opf to bio_reset")
Reported-by: syzbot+2b3f18414c37b42dcc94@syzkaller.appspotmail.com
Tested-by: syzbot+2b3f18414c37b42dcc94@syzkaller.appspotmail.com
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220204071934.168469-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index 2e19ca600fcdb..d2f3c10350364 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -310,6 +310,8 @@ void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
 	memset(bio, 0, BIO_RESET_BYTES);
 	atomic_set(&bio->__bi_remaining, 1);
 	bio->bi_bdev = bdev;
+	if (bio->bi_bdev)
+		bio_associate_blkg(bio);
 	bio->bi_opf = opf;
 }
 EXPORT_SYMBOL(bio_reset);
-- 
GitLab


From c347a787e34cba0e5a80a04082dacaf259105605 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:00:57 +0100
Subject: [PATCH 0225/1586] drbd: set ->bi_bdev in drbd_req_new

Make sure the newly allocated bio has the correct bi_bdev set from the
start.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_req.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3235532ae0778..8d44e96c4c4ef 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -31,6 +31,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 	memset(req, 0, sizeof(*req));
 
 	req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
+	bio_set_dev(req->private_bio, device->ldev->backing_bdev);
 	req->private_bio->bi_private = req;
 	req->private_bio->bi_end_io = drbd_request_endio;
 
@@ -1151,8 +1152,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 	else
 		type = DRBD_FAULT_DT_RD;
 
-	bio_set_dev(bio, device->ldev->backing_bdev);
-
 	/* State may have changed since we grabbed our reference on the
 	 * ->ldev member. Double check, and short-circuit to endio.
 	 * In case the last activity log transaction failed to get on
-- 
GitLab


From 6c23f0bd7f16d88c774db37b30c5da82811c41be Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:00:58 +0100
Subject: [PATCH 0226/1586] dm: add a clone_to_tio helper

Add a helper to stop open coding the container_of operations to get
from the clone bio to the tio structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fa596b654c99c..5543e18f3c3bc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -79,10 +79,14 @@ struct clone_info {
 #define DM_IO_BIO_OFFSET \
 	(offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
 
+static inline struct dm_target_io *clone_to_tio(struct bio *clone)
+{
+	return container_of(clone, struct dm_target_io, clone);
+}
+
 void *dm_per_bio_data(struct bio *bio, size_t data_size)
 {
-	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
-	if (!tio->inside_dm_io)
+	if (!clone_to_tio(bio)->inside_dm_io)
 		return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
 	return (char *)bio - DM_IO_BIO_OFFSET - data_size;
 }
@@ -477,10 +481,7 @@ out:
 
 u64 dm_start_time_ns_from_clone(struct bio *bio)
 {
-	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
-	struct dm_io *io = tio->io;
-
-	return jiffies_to_nsecs(io->start_time);
+	return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time);
 }
 EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
 
@@ -521,7 +522,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 
 	clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs);
 
-	tio = container_of(clone, struct dm_target_io, clone);
+	tio = clone_to_tio(clone);
 	tio->inside_dm_io = true;
 	tio->io = NULL;
 
@@ -557,7 +558,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
 		if (!clone)
 			return NULL;
 
-		tio = container_of(clone, struct dm_target_io, clone);
+		tio = clone_to_tio(clone);
 		tio->inside_dm_io = false;
 	}
 
@@ -878,7 +879,7 @@ static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
 static void clone_endio(struct bio *bio)
 {
 	blk_status_t error = bio->bi_status;
-	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	struct dm_target_io *tio = clone_to_tio(bio);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
@@ -1084,7 +1085,7 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
  */
 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 {
-	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	struct dm_target_io *tio = clone_to_tio(bio);
 	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
 
 	BUG_ON(bio->bi_opf & REQ_PREFLUSH);
@@ -1257,10 +1258,8 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 		if (bio_nr == num_bios)
 			return;
 
-		while ((bio = bio_list_pop(blist))) {
-			tio = container_of(bio, struct dm_target_io, clone);
-			free_tio(tio);
-		}
+		while ((bio = bio_list_pop(blist)))
+			free_tio(clone_to_tio(bio));
 	}
 }
 
@@ -1282,14 +1281,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 {
 	struct bio_list blist = BIO_EMPTY_LIST;
 	struct bio *bio;
-	struct dm_target_io *tio;
 
 	alloc_multiple_bios(&blist, ci, ti, num_bios);
 
-	while ((bio = bio_list_pop(&blist))) {
-		tio = container_of(bio, struct dm_target_io, clone);
-		__clone_and_map_simple_bio(ci, tio, len);
-	}
+	while ((bio = bio_list_pop(&blist)))
+		__clone_and_map_simple_bio(ci, clone_to_tio(bio), len);
 }
 
 static int __send_empty_flush(struct clone_info *ci)
-- 
GitLab


From b1bee79237ce0ab43ef7fe66aa6e5c4783165012 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:00:59 +0100
Subject: [PATCH 0227/1586] dm: fold clone_bio into __clone_and_map_data_bio

Fold clone_bio into its only caller to prepare for refactoring.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 43 +++++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5543e18f3c3bc..9384d250a3e4e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1190,17 +1190,22 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
 /*
  * Creates a bio that consists of range of complete bvecs.
  */
-static int clone_bio(struct dm_target_io *tio, struct bio *bio,
-		     sector_t sector, unsigned len)
+static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
+				    sector_t sector, unsigned *len)
 {
-	struct bio *clone = &tio->clone;
+	struct bio *bio = ci->bio, *clone;
+	struct dm_target_io *tio;
 	int r;
 
+	tio = alloc_tio(ci, ti, 0, GFP_NOIO);
+	tio->len_ptr = len;
+
+	clone = &tio->clone;
 	__bio_clone_fast(clone, bio);
 
 	r = bio_crypt_clone(clone, bio, GFP_NOIO);
 	if (r < 0)
-		return r;
+		goto free_tio;
 
 	if (bio_integrity(bio)) {
 		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
@@ -1208,21 +1213,26 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 			DMWARN("%s: the target %s doesn't support integrity data.",
 				dm_device_name(tio->io->md),
 				tio->ti->type->name);
-			return -EIO;
+			r = -EIO;
+			goto free_tio;
 		}
 
 		r = bio_integrity_clone(clone, bio, GFP_NOIO);
 		if (r < 0)
-			return r;
+			goto free_tio;
 	}
 
 	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
-	clone->bi_iter.bi_size = to_bytes(len);
+	clone->bi_iter.bi_size = to_bytes(*len);
 
 	if (bio_integrity(bio))
 		bio_integrity_trim(clone);
 
+	__map_bio(tio);
 	return 0;
+free_tio:
+	free_tio(tio);
+	return r;
 }
 
 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
@@ -1313,25 +1323,6 @@ static int __send_empty_flush(struct clone_info *ci)
 	return 0;
 }
 
-static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
-				    sector_t sector, unsigned *len)
-{
-	struct bio *bio = ci->bio;
-	struct dm_target_io *tio;
-	int r;
-
-	tio = alloc_tio(ci, ti, 0, GFP_NOIO);
-	tio->len_ptr = len;
-	r = clone_bio(tio, bio, sector, *len);
-	if (r < 0) {
-		free_tio(tio);
-		return r;
-	}
-	__map_bio(tio);
-
-	return 0;
-}
-
 static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
 				       unsigned num_bios)
 {
-- 
GitLab


From 8eabf5d0a7bd9226d6cc25402dde67f372aae838 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:00 +0100
Subject: [PATCH 0228/1586] dm: fold __send_duplicate_bios into
 __clone_and_map_simple_bio

Fold __send_duplicate_bios into its only caller to prepare for
refactoring.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9384d250a3e4e..2527b287ead0f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1273,29 +1273,24 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 	}
 }
 
-static void __clone_and_map_simple_bio(struct clone_info *ci,
-					   struct dm_target_io *tio, unsigned *len)
-{
-	struct bio *clone = &tio->clone;
-
-	tio->len_ptr = len;
-
-	__bio_clone_fast(clone, ci->bio);
-	if (len)
-		bio_setup_sector(clone, ci->sector, *len);
-	__map_bio(tio);
-}
-
 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 				  unsigned num_bios, unsigned *len)
 {
 	struct bio_list blist = BIO_EMPTY_LIST;
-	struct bio *bio;
+	struct bio *clone;
 
 	alloc_multiple_bios(&blist, ci, ti, num_bios);
 
-	while ((bio = bio_list_pop(&blist)))
-		__clone_and_map_simple_bio(ci, clone_to_tio(bio), len);
+	while ((clone = bio_list_pop(&blist))) {
+		struct dm_target_io *tio = clone_to_tio(clone);
+
+		tio->len_ptr = len;
+
+		__bio_clone_fast(clone, ci->bio);
+		if (len)
+			bio_setup_sector(clone, ci->sector, *len);
+		__map_bio(tio);
+	}
 }
 
 static int __send_empty_flush(struct clone_info *ci)
-- 
GitLab


From dc8e2021da71f6b2d5971f98ee3e528cf30c409c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:01 +0100
Subject: [PATCH 0229/1586] dm: move cloning the bio into alloc_tio

Move the call to __bio_clone_fast and the assignment of ->len_ptr from
the callers into alloc_tio to prepare for changes to the bio clone API.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2527b287ead0f..90341b7fa5809 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -545,7 +545,7 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
 }
 
 static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
-				      unsigned target_bio_nr, gfp_t gfp_mask)
+		unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
 {
 	struct dm_target_io *tio;
 
@@ -561,11 +561,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
 		tio = clone_to_tio(clone);
 		tio->inside_dm_io = false;
 	}
+	__bio_clone_fast(&tio->clone, ci->bio);
 
 	tio->magic = DM_TIO_MAGIC;
 	tio->io = ci->io;
 	tio->ti = ti;
 	tio->target_bio_nr = target_bio_nr;
+	tio->len_ptr = len;
 
 	return tio;
 }
@@ -1197,11 +1199,8 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 	struct dm_target_io *tio;
 	int r;
 
-	tio = alloc_tio(ci, ti, 0, GFP_NOIO);
-	tio->len_ptr = len;
-
+	tio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
 	clone = &tio->clone;
-	__bio_clone_fast(clone, bio);
 
 	r = bio_crypt_clone(clone, bio, GFP_NOIO);
 	if (r < 0)
@@ -1236,7 +1235,8 @@ free_tio:
 }
 
 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
-				struct dm_target *ti, unsigned num_bios)
+				struct dm_target *ti, unsigned num_bios,
+				unsigned *len)
 {
 	struct dm_target_io *tio;
 	int try;
@@ -1245,7 +1245,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 		return;
 
 	if (num_bios == 1) {
-		tio = alloc_tio(ci, ti, 0, GFP_NOIO);
+		tio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
 		bio_list_add(blist, &tio->clone);
 		return;
 	}
@@ -1257,7 +1257,8 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 		if (try)
 			mutex_lock(&ci->io->md->table_devices_lock);
 		for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
-			tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
+			tio = alloc_tio(ci, ti, bio_nr, len,
+					try ? GFP_NOIO : GFP_NOWAIT);
 			if (!tio)
 				break;
 
@@ -1279,14 +1280,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 	struct bio_list blist = BIO_EMPTY_LIST;
 	struct bio *clone;
 
-	alloc_multiple_bios(&blist, ci, ti, num_bios);
+	alloc_multiple_bios(&blist, ci, ti, num_bios, len);
 
 	while ((clone = bio_list_pop(&blist))) {
 		struct dm_target_io *tio = clone_to_tio(clone);
 
-		tio->len_ptr = len;
-
-		__bio_clone_fast(clone, ci->bio);
 		if (len)
 			bio_setup_sector(clone, ci->sector, *len);
 		__map_bio(tio);
-- 
GitLab


From 1561b396106d759fdf5f9a71b412e068f74d2cc9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:02 +0100
Subject: [PATCH 0230/1586] dm: pass the bio instead of tio to __map_bio

This simplifies the callers a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 90341b7fa5809..a43d280e9bc54 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1117,11 +1117,11 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
 	mutex_unlock(&md->swap_bios_lock);
 }
 
-static void __map_bio(struct dm_target_io *tio)
+static void __map_bio(struct bio *clone)
 {
+	struct dm_target_io *tio = clone_to_tio(clone);
 	int r;
 	sector_t sector;
-	struct bio *clone = &tio->clone;
 	struct dm_io *io = tio->io;
 	struct dm_target *ti = tio->ti;
 
@@ -1227,7 +1227,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 	if (bio_integrity(bio))
 		bio_integrity_trim(clone);
 
-	__map_bio(tio);
+	__map_bio(clone);
 	return 0;
 free_tio:
 	free_tio(tio);
@@ -1283,11 +1283,9 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 	alloc_multiple_bios(&blist, ci, ti, num_bios, len);
 
 	while ((clone = bio_list_pop(&blist))) {
-		struct dm_target_io *tio = clone_to_tio(clone);
-
 		if (len)
 			bio_setup_sector(clone, ci->sector, *len);
-		__map_bio(tio);
+		__map_bio(clone);
 	}
 }
 
-- 
GitLab


From 1d1068cecff70cb8e48c7cb0ba27cc3fd906eb31 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:03 +0100
Subject: [PATCH 0231/1586] dm: retun the clone bio from alloc_tio

Return the clone bio embedded into the tio as that is what the callers
actually want.  Similar for the free side.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a43d280e9bc54..c05b6ff1bb957 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -544,7 +544,7 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
 	bio_put(&io->tio.clone);
 }
 
-static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
+static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
 {
 	struct dm_target_io *tio;
@@ -569,14 +569,14 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
 	tio->target_bio_nr = target_bio_nr;
 	tio->len_ptr = len;
 
-	return tio;
+	return &tio->clone;
 }
 
-static void free_tio(struct dm_target_io *tio)
+static void free_tio(struct bio *clone)
 {
-	if (tio->inside_dm_io)
+	if (clone_to_tio(clone)->inside_dm_io)
 		return;
-	bio_put(&tio->clone);
+	bio_put(clone);
 }
 
 /*
@@ -932,7 +932,7 @@ static void clone_endio(struct bio *bio)
 		up(&md->swap_bios_semaphore);
 	}
 
-	free_tio(tio);
+	free_tio(bio);
 	dm_io_dec_pending(io, error);
 }
 
@@ -1166,7 +1166,7 @@ static void __map_bio(struct bio *clone)
 			struct mapped_device *md = io->md;
 			up(&md->swap_bios_semaphore);
 		}
-		free_tio(tio);
+		free_tio(clone);
 		dm_io_dec_pending(io, BLK_STS_IOERR);
 		break;
 	case DM_MAPIO_REQUEUE:
@@ -1174,7 +1174,7 @@ static void __map_bio(struct bio *clone)
 			struct mapped_device *md = io->md;
 			up(&md->swap_bios_semaphore);
 		}
-		free_tio(tio);
+		free_tio(clone);
 		dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
 		break;
 	default:
@@ -1196,17 +1196,17 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 				    sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio, *clone;
-	struct dm_target_io *tio;
 	int r;
 
-	tio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
-	clone = &tio->clone;
+	clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
 
 	r = bio_crypt_clone(clone, bio, GFP_NOIO);
 	if (r < 0)
 		goto free_tio;
 
 	if (bio_integrity(bio)) {
+		struct dm_target_io *tio = clone_to_tio(clone);
+
 		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
 			     !dm_target_passes_integrity(tio->ti->type))) {
 			DMWARN("%s: the target %s doesn't support integrity data.",
@@ -1230,7 +1230,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 	__map_bio(clone);
 	return 0;
 free_tio:
-	free_tio(tio);
+	free_tio(clone);
 	return r;
 }
 
@@ -1238,31 +1238,30 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 				struct dm_target *ti, unsigned num_bios,
 				unsigned *len)
 {
-	struct dm_target_io *tio;
+	struct bio *bio;
 	int try;
 
 	if (!num_bios)
 		return;
 
 	if (num_bios == 1) {
-		tio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
-		bio_list_add(blist, &tio->clone);
+		bio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
+		bio_list_add(blist, bio);
 		return;
 	}
 
 	for (try = 0; try < 2; try++) {
 		int bio_nr;
-		struct bio *bio;
 
 		if (try)
 			mutex_lock(&ci->io->md->table_devices_lock);
 		for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
-			tio = alloc_tio(ci, ti, bio_nr, len,
+			bio = alloc_tio(ci, ti, bio_nr, len,
 					try ? GFP_NOIO : GFP_NOWAIT);
-			if (!tio)
+			if (!bio)
 				break;
 
-			bio_list_add(blist, &tio->clone);
+			bio_list_add(blist, bio);
 		}
 		if (try)
 			mutex_unlock(&ci->io->md->table_devices_lock);
@@ -1270,7 +1269,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 			return;
 
 		while ((bio = bio_list_pop(blist)))
-			free_tio(clone_to_tio(bio));
+			free_tio(bio);
 	}
 }
 
-- 
GitLab


From 891fced644a7529bfd4b1436b2341527ce8f68ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:04 +0100
Subject: [PATCH 0232/1586] dm: simplify the single bio fast path in
 __send_duplicate_bios

Most targets just need a single flush bio.  Open code that case in
__send_duplicate_bios without the need to add the bio to a list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c05b6ff1bb957..78df75f57288b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1241,15 +1241,6 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 	struct bio *bio;
 	int try;
 
-	if (!num_bios)
-		return;
-
-	if (num_bios == 1) {
-		bio = alloc_tio(ci, ti, 0, len, GFP_NOIO);
-		bio_list_add(blist, bio);
-		return;
-	}
-
 	for (try = 0; try < 2; try++) {
 		int bio_nr;
 
@@ -1279,12 +1270,23 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 	struct bio_list blist = BIO_EMPTY_LIST;
 	struct bio *clone;
 
-	alloc_multiple_bios(&blist, ci, ti, num_bios, len);
-
-	while ((clone = bio_list_pop(&blist))) {
+	switch (num_bios) {
+	case 0:
+		break;
+	case 1:
+		clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
 		if (len)
 			bio_setup_sector(clone, ci->sector, *len);
 		__map_bio(clone);
+		break;
+	default:
+		alloc_multiple_bios(&blist, ci, ti, num_bios, len);
+		while ((clone = bio_list_pop(&blist))) {
+			if (len)
+				bio_setup_sector(clone, ci->sector, *len);
+			__map_bio(clone);
+		}
+		break;
 	}
 }
 
-- 
GitLab


From 3c4b455ef8acdacd0e5ecd33428d4f32f861637a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:05 +0100
Subject: [PATCH 0233/1586] dm-cache: remove __remap_to_origin_clear_discard

Fold __remap_to_origin_clear_discard into the two callers to prepare
for bio cloning refactoring.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-cache-target.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 447d030036d18..1c37fe028e531 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -744,21 +744,14 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 	spin_unlock_irq(&cache->lock);
 }
 
-static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
-					    dm_oblock_t oblock, bool bio_has_pbd)
-{
-	if (bio_has_pbd)
-		check_if_tick_bio_needed(cache, bio);
-	remap_to_origin(cache, bio);
-	if (bio_data_dir(bio) == WRITE)
-		clear_discard(cache, oblock_to_dblock(cache, oblock));
-}
-
 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
 					  dm_oblock_t oblock)
 {
 	// FIXME: check_if_tick_bio_needed() is called way too much through this interface
-	__remap_to_origin_clear_discard(cache, bio, oblock, true);
+	check_if_tick_bio_needed(cache, bio);
+	remap_to_origin(cache, bio);
+	if (bio_data_dir(bio) == WRITE)
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
 }
 
 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -831,11 +824,10 @@ static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
 	BUG_ON(!origin_bio);
 
 	bio_chain(origin_bio, bio);
-	/*
-	 * Passing false to __remap_to_origin_clear_discard() skips
-	 * all code that might use per_bio_data (since clone doesn't have it)
-	 */
-	__remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
+
+	remap_to_origin(cache, origin_bio);
+	if (bio_data_dir(origin_bio) == WRITE)
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
 	submit_bio(origin_bio);
 
 	remap_to_cache(cache, bio, cblock);
-- 
GitLab


From 56b4b5abcdab6daf71c5536fca2772f178590e06 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:06 +0100
Subject: [PATCH 0234/1586] block: clone crypto and integrity data in
 __bio_clone_fast

__bio_clone_fast should also clone integrity and crypto data, as a clone
without those is incomplete.  Right now the only caller that can actually
support crypto and integrity data (dm) does it manually for the one
callchain that supports these, but we better do it properly in the core.

Note that all callers except for the above mentioned one also don't need
to handle failure at all, given that the integrity and crypto clones are
based on mempool allocations that won't fail for sleeping allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c       |  1 -
 block/bio.c                 | 26 +++++++++++++-------------
 block/blk-crypto.c          |  1 -
 drivers/md/bcache/request.c |  2 +-
 drivers/md/dm.c             | 33 ++++++---------------------------
 drivers/md/md-multipath.c   |  2 +-
 include/linux/bio.h         |  2 +-
 7 files changed, 22 insertions(+), 45 deletions(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index d251147154592..bd54532200650 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -420,7 +420,6 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 
 	return 0;
 }
-EXPORT_SYMBOL(bio_integrity_clone);
 
 int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
diff --git a/block/bio.c b/block/bio.c
index d2f3c10350364..2a921875bb420 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -732,6 +732,7 @@ EXPORT_SYMBOL(bio_put);
  * 	__bio_clone_fast - clone a bio that shares the original bio's biovec
  * 	@bio: destination bio
  * 	@bio_src: bio to clone
+ *	@gfp: allocation flags
  *
  *	Clone a &bio. Caller will own the returned bio, but not
  *	the actual data it points to. Reference count of returned
@@ -739,7 +740,7 @@ EXPORT_SYMBOL(bio_put);
  *
  * 	Caller must ensure that @bio_src is not freed before @bio.
  */
-void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
+int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 {
 	WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
 
@@ -761,6 +762,13 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 
 	bio_clone_blkg_association(bio, bio_src);
 	blkcg_bio_issue_init(bio);
+
+	if (bio_crypt_clone(bio, bio_src, gfp) < 0)
+		return -ENOMEM;
+	if (bio_integrity(bio_src) &&
+	    bio_integrity_clone(bio, bio_src, gfp) < 0)
+		return -ENOMEM;
+	return 0;
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
@@ -780,20 +788,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 	if (!b)
 		return NULL;
 
-	__bio_clone_fast(b, bio);
-
-	if (bio_crypt_clone(b, bio, gfp_mask) < 0)
-		goto err_put;
-
-	if (bio_integrity(bio) &&
-	    bio_integrity_clone(b, bio, gfp_mask) < 0)
-		goto err_put;
+	if (__bio_clone_fast(b, bio, gfp_mask < 0)) {
+		bio_put(b);
+		return NULL;
+	}
 
 	return b;
-
-err_put:
-	bio_put(b);
-	return NULL;
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index ec9efeeeca918..773dae4c329ba 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -111,7 +111,6 @@ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
 	*dst->bi_crypt_context = *src->bi_crypt_context;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__bio_crypt_clone);
 
 /* Increments @dun by @inc, treating @dun as a multi-limb integer. */
 void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 7ba59d08ed870..574b02b94f1a4 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -686,7 +686,7 @@ static void do_bio_hook(struct search *s,
 	struct bio *bio = &s->bio.bio;
 
 	bio_init(bio, NULL, NULL, 0, 0);
-	__bio_clone_fast(bio, orig_bio);
+	__bio_clone_fast(bio, orig_bio, GFP_NOIO);
 	/*
 	 * bi_end_io can be set separately somewhere else, e.g. the
 	 * variants in,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 78df75f57288b..0f8796159379e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -561,7 +561,12 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		tio = clone_to_tio(clone);
 		tio->inside_dm_io = false;
 	}
-	__bio_clone_fast(&tio->clone, ci->bio);
+
+	if (__bio_clone_fast(&tio->clone, ci->bio, gfp_mask) < 0) {
+		if (ci->io->tio.io)
+			bio_put(&tio->clone);
+		return NULL;
+	}
 
 	tio->magic = DM_TIO_MAGIC;
 	tio->io = ci->io;
@@ -1196,31 +1201,8 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 				    sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio, *clone;
-	int r;
 
 	clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
-
-	r = bio_crypt_clone(clone, bio, GFP_NOIO);
-	if (r < 0)
-		goto free_tio;
-
-	if (bio_integrity(bio)) {
-		struct dm_target_io *tio = clone_to_tio(clone);
-
-		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
-			     !dm_target_passes_integrity(tio->ti->type))) {
-			DMWARN("%s: the target %s doesn't support integrity data.",
-				dm_device_name(tio->io->md),
-				tio->ti->type->name);
-			r = -EIO;
-			goto free_tio;
-		}
-
-		r = bio_integrity_clone(clone, bio, GFP_NOIO);
-		if (r < 0)
-			goto free_tio;
-	}
-
 	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
 	clone->bi_iter.bi_size = to_bytes(*len);
 
@@ -1229,9 +1211,6 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 
 	__map_bio(clone);
 	return 0;
-free_tio:
-	free_tio(clone);
-	return r;
 }
 
 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 5e15940634d85..010c759c741ad 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -122,7 +122,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	multipath = conf->multipaths + mp_bh->path;
 
 	bio_init(&mp_bh->bio, NULL, NULL, 0, 0);
-	__bio_clone_fast(&mp_bh->bio, bio);
+	__bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO);
 
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
 	bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 18cfe5bb41ea8..b814361c957b0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -413,7 +413,7 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
 struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
 extern void bio_put(struct bio *);
 
-extern void __bio_clone_fast(struct bio *, struct bio *);
+int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 
 extern struct bio_set fs_bio_set;
-- 
GitLab


From 92986f6b4c8a2c24d3a36b80140624f80fd93de4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:07 +0100
Subject: [PATCH 0235/1586] dm: use bio_clone_fast in alloc_io/alloc_tio

Replace open coded bio_clone_fast implementations with the actual helper.
Note that the bio allocated as part of the dm_io structure in alloc_io
will only actually be used later in alloc_tio, making this earlier
cloning of the information safe.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0f8796159379e..862564a5df74b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -520,7 +520,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	struct dm_target_io *tio;
 	struct bio *clone;
 
-	clone = bio_alloc_bioset(NULL, 0, 0, GFP_NOIO, &md->io_bs);
+	clone = bio_clone_fast(bio, GFP_NOIO, &md->io_bs);
 
 	tio = clone_to_tio(clone);
 	tio->inside_dm_io = true;
@@ -553,8 +553,8 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		/* the dm_target_io embedded in ci->io is available */
 		tio = &ci->io->tio;
 	} else {
-		struct bio *clone = bio_alloc_bioset(NULL, 0, 0, gfp_mask,
-						     &ci->io->md->bs);
+		struct bio *clone = bio_clone_fast(ci->bio, gfp_mask,
+						   &ci->io->md->bs);
 		if (!clone)
 			return NULL;
 
@@ -562,12 +562,6 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		tio->inside_dm_io = false;
 	}
 
-	if (__bio_clone_fast(&tio->clone, ci->bio, gfp_mask) < 0) {
-		if (ci->io->tio.io)
-			bio_put(&tio->clone);
-		return NULL;
-	}
-
 	tio->magic = DM_TIO_MAGIC;
 	tio->io = ci->io;
 	tio->ti = ti;
-- 
GitLab


From a0e8de798dd6710a69d69ec57b246a0e34c4a695 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:08 +0100
Subject: [PATCH 0236/1586] block: initialize the target bio in
 __bio_clone_fast

All callers of __bio_clone_fast initialize the bio first.  Move that
initialization into __bio_clone_fast instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                 | 75 ++++++++++++++++++++-----------------
 drivers/md/bcache/request.c |  1 -
 drivers/md/md-multipath.c   |  1 -
 3 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 2a921875bb420..74f66e22ef630 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -728,37 +728,16 @@ void bio_put(struct bio *bio)
 }
 EXPORT_SYMBOL(bio_put);
 
-/**
- * 	__bio_clone_fast - clone a bio that shares the original bio's biovec
- * 	@bio: destination bio
- * 	@bio_src: bio to clone
- *	@gfp: allocation flags
- *
- *	Clone a &bio. Caller will own the returned bio, but not
- *	the actual data it points to. Reference count of returned
- * 	bio will be one.
- *
- * 	Caller must ensure that @bio_src is not freed before @bio.
- */
-int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp)
+static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 {
-	WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
-
-	/*
-	 * most users will be overriding ->bi_bdev with a new target,
-	 * so we don't set nor calculate new physical/hw segment counts here
-	 */
-	bio->bi_bdev = bio_src->bi_bdev;
 	bio_set_flag(bio, BIO_CLONED);
 	if (bio_flagged(bio_src, BIO_THROTTLED))
 		bio_set_flag(bio, BIO_THROTTLED);
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
-	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
-	bio->bi_io_vec = bio_src->bi_io_vec;
 
 	bio_clone_blkg_association(bio, bio_src);
 	blkcg_bio_issue_init(bio);
@@ -770,33 +749,59 @@ int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 		return -ENOMEM;
 	return 0;
 }
-EXPORT_SYMBOL(__bio_clone_fast);
 
 /**
- *	bio_clone_fast - clone a bio that shares the original bio's biovec
- *	@bio: bio to clone
- *	@gfp_mask: allocation priority
- *	@bs: bio_set to allocate from
+ * bio_clone_fast - clone a bio that shares the original bio's biovec
+ * @bio_src: bio to clone from
+ * @gfp: allocation priority
+ * @bs: bio_set to allocate from
+ *
+ * Allocate a new bio that is a clone of @bio_src. The caller owns the returned
+ * bio, but not the actual data it points to.
  *
- * 	Like __bio_clone_fast, only also allocates the returned bio
+ * The caller must ensure that the return bio is not freed before @bio_src.
  */
-struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
+struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs)
 {
-	struct bio *b;
+	struct bio *bio;
 
-	b = bio_alloc_bioset(NULL, 0, 0, gfp_mask, bs);
-	if (!b)
+	bio = bio_alloc_bioset(bio_src->bi_bdev, 0, bio_src->bi_opf, gfp, bs);
+	if (!bio)
 		return NULL;
 
-	if (__bio_clone_fast(b, bio, gfp_mask < 0)) {
-		bio_put(b);
+	if (__bio_clone(bio, bio_src, gfp) < 0) {
+		bio_put(bio);
 		return NULL;
 	}
+	bio->bi_io_vec = bio_src->bi_io_vec;
 
-	return b;
+	return bio;
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
+/**
+ * __bio_clone_fast - clone a bio that shares the original bio's biovec
+ * @bio: bio to clone into
+ * @bio_src: bio to clone from
+ * @gfp: allocation priority
+ *
+ * Initialize a new bio in caller provided memory that is a clone of @bio_src.
+ * The caller owns the returned bio, but not the actual data it points to.
+ *
+ * The caller must ensure that @bio_src is not freed before @bio.
+ */
+int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp)
+{
+	int ret;
+
+	bio_init(bio, bio_src->bi_bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
+	ret = __bio_clone(bio, bio_src, gfp);
+	if (ret)
+		bio_uninit(bio);
+	return ret;
+}
+EXPORT_SYMBOL(__bio_clone_fast);
+
 const char *bio_devname(struct bio *bio, char *buf)
 {
 	return bdevname(bio->bi_bdev, buf);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 574b02b94f1a4..d2cb853bf9173 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -685,7 +685,6 @@ static void do_bio_hook(struct search *s,
 {
 	struct bio *bio = &s->bio.bio;
 
-	bio_init(bio, NULL, NULL, 0, 0);
 	__bio_clone_fast(bio, orig_bio, GFP_NOIO);
 	/*
 	 * bi_end_io can be set separately somewhere else, e.g. the
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 010c759c741ad..483a5500f83cd 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -121,7 +121,6 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	bio_init(&mp_bh->bio, NULL, NULL, 0, 0);
 	__bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO);
 
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
-- 
GitLab


From abfc426d1b2fb2176df59851a64223b58ddae7e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Feb 2022 17:01:09 +0100
Subject: [PATCH 0237/1586] block: pass a block_device to bio_clone_fast

Pass a block_device to bio_clone_fast and __bio_clone_fast and give
the functions more suitable names.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220202160109.108149-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.rst   |  5 -----
 block/bio.c                      | 31 +++++++++++++++++------------
 block/blk-mq.c                   |  4 ++--
 block/bounce.c                   |  3 +--
 drivers/block/drbd/drbd_req.c    |  4 ++--
 drivers/block/drbd/drbd_worker.c |  4 ++--
 drivers/block/pktcdvd.c          |  4 ++--
 drivers/md/bcache/request.c      |  5 +++--
 drivers/md/dm-cache-target.c     |  4 ++--
 drivers/md/dm-crypt.c            | 11 +++++------
 drivers/md/dm-zoned-target.c     |  3 +--
 drivers/md/dm.c                  |  6 +++---
 drivers/md/md-faulty.c           |  4 ++--
 drivers/md/md-multipath.c        |  3 +--
 drivers/md/md.c                  |  5 +++--
 drivers/md/raid1.c               | 34 ++++++++++++++++----------------
 drivers/md/raid10.c              | 16 +++++++--------
 drivers/md/raid5.c               |  4 ++--
 fs/btrfs/extent_io.c             |  4 ++--
 include/linux/bio.h              |  6 ++++--
 20 files changed, 80 insertions(+), 80 deletions(-)

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index 2098477851a4b..4fbc367e62f95 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -663,11 +663,6 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone_fast() routine may be used to duplicate a bio, where the clone
-shares the bio_vec_list with the original bio (i.e. both point to the
-same bio_vec_list). This would typically be used for splitting i/o requests
-in lvm or md.
-
 3.2 Generic bio helper Routines
 -------------------------------
 
diff --git a/block/bio.c b/block/bio.c
index 74f66e22ef630..18d34b33351b8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -733,7 +733,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 	bio_set_flag(bio, BIO_CLONED);
 	if (bio_flagged(bio_src, BIO_THROTTLED))
 		bio_set_flag(bio, BIO_THROTTLED);
-	if (bio_flagged(bio_src, BIO_REMAPPED))
+	if (bio->bi_bdev == bio_src->bi_bdev &&
+	    bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
@@ -751,7 +752,8 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 }
 
 /**
- * bio_clone_fast - clone a bio that shares the original bio's biovec
+ * bio_alloc_clone - clone a bio that shares the original bio's biovec
+ * @bdev: block_device to clone onto
  * @bio_src: bio to clone from
  * @gfp: allocation priority
  * @bs: bio_set to allocate from
@@ -761,11 +763,12 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
  *
  * The caller must ensure that the return bio is not freed before @bio_src.
  */
-struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs)
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+		gfp_t gfp, struct bio_set *bs)
 {
 	struct bio *bio;
 
-	bio = bio_alloc_bioset(bio_src->bi_bdev, 0, bio_src->bi_opf, gfp, bs);
+	bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
 	if (!bio)
 		return NULL;
 
@@ -777,10 +780,11 @@ struct bio *bio_clone_fast(struct bio *bio_src, gfp_t gfp, struct bio_set *bs)
 
 	return bio;
 }
-EXPORT_SYMBOL(bio_clone_fast);
+EXPORT_SYMBOL(bio_alloc_clone);
 
 /**
- * __bio_clone_fast - clone a bio that shares the original bio's biovec
+ * bio_init_clone - clone a bio that shares the original bio's biovec
+ * @bdev: block_device to clone onto
  * @bio: bio to clone into
  * @bio_src: bio to clone from
  * @gfp: allocation priority
@@ -790,17 +794,18 @@ EXPORT_SYMBOL(bio_clone_fast);
  *
  * The caller must ensure that @bio_src is not freed before @bio.
  */
-int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp)
+int bio_init_clone(struct block_device *bdev, struct bio *bio,
+		struct bio *bio_src, gfp_t gfp)
 {
 	int ret;
 
-	bio_init(bio, bio_src->bi_bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
+	bio_init(bio, bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
 	ret = __bio_clone(bio, bio_src, gfp);
 	if (ret)
 		bio_uninit(bio);
 	return ret;
 }
-EXPORT_SYMBOL(__bio_clone_fast);
+EXPORT_SYMBOL(bio_init_clone);
 
 const char *bio_devname(struct bio *bio, char *buf)
 {
@@ -1572,7 +1577,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
 		return NULL;
 
-	split = bio_clone_fast(bio, gfp, bs);
+	split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
 	if (!split)
 		return NULL;
 
@@ -1667,9 +1672,9 @@ EXPORT_SYMBOL(bioset_exit);
  *    Note that the bio must be embedded at the END of that structure always,
  *    or things will break badly.
  *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
- *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
- *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
- *    dispatch queued requests when the mempool runs out of space.
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_init_clone().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used
+ *    to dispatch queued requests when the mempool runs out of space.
  *
  */
 int bioset_init(struct bio_set *bs,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1adfe4824ef5e..4b868e792ba4a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2975,10 +2975,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		bs = &fs_bio_set;
 
 	__rq_for_each_bio(bio_src, rq_src) {
-		bio = bio_clone_fast(bio_src, gfp_mask, bs);
+		bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
+				      bs);
 		if (!bio)
 			goto free_and_out;
-		bio->bi_bdev = rq->q->disk->part0;
 
 		if (bio_ctr && bio_ctr(bio, bio_src, data))
 			goto free_and_out;
diff --git a/block/bounce.c b/block/bounce.c
index 330ddde25b460..3fd3bc6fd5dbb 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -162,8 +162,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
 	 *    that does not own the bio - reason being drivers don't use it for
 	 *    iterating over the biovec anymore, so expecting it to be kept up
 	 *    to date (i.e. for clones that share the parent biovec) is just
-	 *    asking for trouble and would force extra work on
-	 *    __bio_clone_fast() anyways.
+	 *    asking for trouble and would force extra work.
 	 */
 	bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src),
 			       bio_src->bi_opf, GFP_NOIO, &bounce_bio_set);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 8d44e96c4c4ef..c00ae8619519e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -30,8 +30,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 		return NULL;
 	memset(req, 0, sizeof(*req));
 
-	req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
-	bio_set_dev(req->private_bio, device->ldev->backing_bdev);
+	req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
+					   GFP_NOIO, &drbd_io_bio_set);
 	req->private_bio->bi_private = req;
 	req->private_bio->bi_end_io = drbd_request_endio;
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64563bfdf0da0..a5e04b38006b6 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1523,9 +1523,9 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
 		drbd_al_begin_io(device, &req->i);
 
-	req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO,
+	req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
+					   req->master_bio, GFP_NOIO,
 					  &drbd_io_bio_set);
-	bio_set_dev(req->private_bio, device->ldev->backing_bdev);
 	req->private_bio->bi_private = req;
 	req->private_bio->bi_end_io = drbd_request_endio;
 	submit_bio_noacct(req->private_bio);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3aa5954429462..be749c686feb7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2294,12 +2294,12 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
+	struct bio *cloned_bio =
+		bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
 	struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
 
 	psd->pd = pd;
 	psd->bio = bio;
-	bio_set_dev(cloned_bio, pd->bdev);
 	cloned_bio->bi_private = psd;
 	cloned_bio->bi_end_io = pkt_end_io_read_cloned;
 	pd->stats.secs_r += bio_sectors(bio);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index d2cb853bf9173..6869e010475a3 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s,
 {
 	struct bio *bio = &s->bio.bio;
 
-	__bio_clone_fast(bio, orig_bio, GFP_NOIO);
+	bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO);
 	/*
 	 * bi_end_io can be set separately somewhere else, e.g. the
 	 * variants in,
@@ -1036,7 +1036,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
 			closure_bio_submit(s->iop.c, flush, cl);
 		}
 	} else {
-		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
+		s->iop.bio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+					     &dc->disk.bio_split);
 		/* I/O request sent to backing device */
 		bio->bi_end_io = backing_request_endio;
 		closure_bio_submit(s->iop.c, bio, cl);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 1c37fe028e531..89fdfb49d564e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -819,13 +819,13 @@ static void issue_op(struct bio *bio, void *context)
 static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
 				      dm_oblock_t oblock, dm_cblock_t cblock)
 {
-	struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
+	struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
+						 GFP_NOIO, &cache->bs);
 
 	BUG_ON(!origin_bio);
 
 	bio_chain(origin_bio, bio);
 
-	remap_to_origin(cache, origin_bio);
 	if (bio_data_dir(origin_bio) == WRITE)
 		clear_discard(cache, oblock_to_dblock(cache, oblock));
 	submit_bio(origin_bio);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f7e4435b7439a..a5006cb6ee8ad 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1834,17 +1834,16 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 	struct bio *clone;
 
 	/*
-	 * We need the original biovec array in order to decrypt
-	 * the whole bio data *afterwards* -- thanks to immutable
-	 * biovecs we don't need to worry about the block layer
-	 * modifying the biovec array; so leverage bio_clone_fast().
+	 * We need the original biovec array in order to decrypt the whole bio
+	 * data *afterwards* -- thanks to immutable biovecs we don't need to
+	 * worry about the block layer modifying the biovec array; so leverage
+	 * bio_alloc_clone().
 	 */
-	clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
+	clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs);
 	if (!clone)
 		return 1;
 	clone->bi_private = io;
 	clone->bi_end_io = crypt_endio;
-	bio_set_dev(clone, cc->dev->bdev);
 
 	crypt_inc_pending(io);
 
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 166c4e9d99c97..a3f6d3ef38174 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -125,11 +125,10 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
 	if (dev->flags & DMZ_BDEV_DYING)
 		return -EIO;
 
-	clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
+	clone = bio_alloc_clone(dev->bdev, bio, GFP_NOIO, &dmz->bio_set);
 	if (!clone)
 		return -ENOMEM;
 
-	bio_set_dev(clone, dev->bdev);
 	bioctx->dev = dev;
 	clone->bi_iter.bi_sector =
 		dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 862564a5df74b..ab9cc91931f99 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -520,7 +520,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	struct dm_target_io *tio;
 	struct bio *clone;
 
-	clone = bio_clone_fast(bio, GFP_NOIO, &md->io_bs);
+	clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &md->io_bs);
 
 	tio = clone_to_tio(clone);
 	tio->inside_dm_io = true;
@@ -553,8 +553,8 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		/* the dm_target_io embedded in ci->io is available */
 		tio = &ci->io->tio;
 	} else {
-		struct bio *clone = bio_clone_fast(ci->bio, gfp_mask,
-						   &ci->io->md->bs);
+		struct bio *clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio,
+						    gfp_mask, &ci->io->md->bs);
 		if (!clone)
 			return NULL;
 
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index c0dc6f2ef4a3d..50ad818978a43 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -205,9 +205,9 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
 		}
 	}
 	if (failit) {
-		struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+		struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
+						&mddev->bio_set);
 
-		bio_set_dev(b, conf->rdev->bdev);
 		b->bi_private = bio;
 		b->bi_end_io = faulty_fail;
 		bio = b;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 483a5500f83cd..97fb948e3e741 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -121,10 +121,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	__bio_clone_fast(&mp_bh->bio, bio, GFP_NOIO);
+	bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
 
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
-	bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
 	mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
 	mp_bh->bio.bi_end_io = multipath_end_request;
 	mp_bh->bio.bi_private = mp_bh;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0a89f072dae0d..f88a9e948f3eb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8634,13 +8634,14 @@ static void md_end_io_acct(struct bio *bio)
  */
 void md_account_bio(struct mddev *mddev, struct bio **bio)
 {
+	struct block_device *bdev = (*bio)->bi_bdev;
 	struct md_io_acct *md_io_acct;
 	struct bio *clone;
 
-	if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
+	if (!blk_queue_io_stat(bdev->bd_disk->queue))
 		return;
 
-	clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
+	clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
 	md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
 	md_io_acct->orig_bio = *bio;
 	md_io_acct->start_time = bio_start_io_acct(*bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e7710fb5befb4..c3288d46948de 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1320,13 +1320,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 	if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
 		r1_bio->start_time = bio_start_io_acct(bio);
 
-	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+	read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
+				   &mddev->bio_set);
 
 	r1_bio->bios[rdisk] = read_bio;
 
 	read_bio->bi_iter.bi_sector = r1_bio->sector +
 		mirror->rdev->data_offset;
-	bio_set_dev(read_bio, mirror->rdev->bdev);
 	read_bio->bi_end_io = raid1_end_read_request;
 	bio_set_op_attrs(read_bio, op, do_sync);
 	if (test_bit(FailFast, &mirror->rdev->flags) &&
@@ -1546,24 +1546,25 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			first_clone = 0;
 		}
 
-		if (r1_bio->behind_master_bio)
-			mbio = bio_clone_fast(r1_bio->behind_master_bio,
-					      GFP_NOIO, &mddev->bio_set);
-		else
-			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
-
 		if (r1_bio->behind_master_bio) {
+			mbio = bio_alloc_clone(rdev->bdev,
+					       r1_bio->behind_master_bio,
+					       GFP_NOIO, &mddev->bio_set);
 			if (test_bit(CollisionCheck, &rdev->flags))
 				wait_for_serialization(rdev, r1_bio);
 			if (test_bit(WriteMostly, &rdev->flags))
 				atomic_inc(&r1_bio->behind_remaining);
-		} else if (mddev->serialize_policy)
-			wait_for_serialization(rdev, r1_bio);
+		} else {
+			mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+					       &mddev->bio_set);
+
+			if (mddev->serialize_policy)
+				wait_for_serialization(rdev, r1_bio);
+		}
 
 		r1_bio->bios[i] = mbio;
 
 		mbio->bi_iter.bi_sector	= (r1_bio->sector + rdev->data_offset);
-		bio_set_dev(mbio, rdev->bdev);
 		mbio->bi_end_io	= raid1_end_write_request;
 		mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
 		if (test_bit(FailFast, &rdev->flags) &&
@@ -2416,12 +2417,12 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
 		/* Write at 'sector' for 'sectors'*/
 
 		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-			wbio = bio_clone_fast(r1_bio->behind_master_bio,
-					      GFP_NOIO,
-					      &mddev->bio_set);
+			wbio = bio_alloc_clone(rdev->bdev,
+					       r1_bio->behind_master_bio,
+					       GFP_NOIO, &mddev->bio_set);
 		} else {
-			wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
-					      &mddev->bio_set);
+			wbio = bio_alloc_clone(rdev->bdev, r1_bio->master_bio,
+					       GFP_NOIO, &mddev->bio_set);
 		}
 
 		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
@@ -2430,7 +2431,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
 
 		bio_trim(wbio, sector - r1_bio->sector, sectors);
 		wbio->bi_iter.bi_sector += rdev->data_offset;
-		bio_set_dev(wbio, rdev->bdev);
 
 		if (submit_bio_wait(wbio) < 0)
 			/* failure! */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index da07bcbc06d08..5dd2e17e1d0ea 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1208,14 +1208,13 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 
 	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
 		r10_bio->start_time = bio_start_io_acct(bio);
-	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
+	read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
 
 	r10_bio->devs[slot].bio = read_bio;
 	r10_bio->devs[slot].rdev = rdev;
 
 	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
 		choose_data_offset(r10_bio, rdev);
-	bio_set_dev(read_bio, rdev->bdev);
 	read_bio->bi_end_io = raid10_end_read_request;
 	bio_set_op_attrs(read_bio, op, do_sync);
 	if (test_bit(FailFast, &rdev->flags) &&
@@ -1255,7 +1254,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 	} else
 		rdev = conf->mirrors[devnum].rdev;
 
-	mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+	mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
 	if (replacement)
 		r10_bio->devs[n_copy].repl_bio = mbio;
 	else
@@ -1263,7 +1262,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 
 	mbio->bi_iter.bi_sector	= (r10_bio->devs[n_copy].addr +
 				   choose_data_offset(r10_bio, rdev));
-	bio_set_dev(mbio, rdev->bdev);
 	mbio->bi_end_io	= raid10_end_write_request;
 	bio_set_op_attrs(mbio, op, do_sync | do_fua);
 	if (!replacement && test_bit(FailFast,
@@ -1812,7 +1810,8 @@ retry_discard:
 		 */
 		if (r10_bio->devs[disk].bio) {
 			struct md_rdev *rdev = conf->mirrors[disk].rdev;
-			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+			mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+					       &mddev->bio_set);
 			mbio->bi_end_io = raid10_end_discard_request;
 			mbio->bi_private = r10_bio;
 			r10_bio->devs[disk].bio = mbio;
@@ -1825,7 +1824,8 @@ retry_discard:
 		}
 		if (r10_bio->devs[disk].repl_bio) {
 			struct md_rdev *rrdev = conf->mirrors[disk].replacement;
-			rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+			rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+					       &mddev->bio_set);
 			rbio->bi_end_io = raid10_end_discard_request;
 			rbio->bi_private = r10_bio;
 			r10_bio->devs[disk].repl_bio = rbio;
@@ -2892,12 +2892,12 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
 		if (sectors > sect_to_write)
 			sectors = sect_to_write;
 		/* Write at 'sector' for 'sectors' */
-		wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+		wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+				       &mddev->bio_set);
 		bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
 		wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
 		wbio->bi_iter.bi_sector = wsector +
 				   choose_data_offset(r10_bio, rdev);
-		bio_set_dev(wbio, rdev->bdev);
 		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 
 		if (submit_bio_wait(wbio) < 0)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7c119208a2143..8891aaba65964 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5438,14 +5438,14 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 		return 0;
 	}
 
-	align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
+	align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
+				    &mddev->io_acct_set);
 	md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
 	raid_bio->bi_next = (void *)rdev;
 	if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
 		md_io_acct->start_time = bio_start_io_acct(raid_bio);
 	md_io_acct->orig_bio = raid_bio;
 
-	bio_set_dev(align_bio, rdev->bdev);
 	align_bio->bi_end_io = raid5_align_endio;
 	align_bio->bi_private = md_io_acct;
 	align_bio->bi_iter.bi_sector = sector;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 421d921a05716..dee86911a4bef 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3154,7 +3154,7 @@ struct bio *btrfs_bio_clone(struct bio *bio)
 	struct bio *new;
 
 	/* Bio allocation backed by a bioset does not fail */
-	new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
+	new = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOFS, &btrfs_bioset);
 	bbio = btrfs_bio(new);
 	btrfs_bio_init(bbio);
 	bbio->iter = bio->bi_iter;
@@ -3169,7 +3169,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
 	ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
 
 	/* this will never fail when it's backed by a bioset */
-	bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
+	bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
 	ASSERT(bio);
 
 	bbio = btrfs_bio(bio);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b814361c957b0..7523aba4ddf7c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -413,8 +413,10 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
 struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
 extern void bio_put(struct bio *);
 
-int __bio_clone_fast(struct bio *bio, struct bio *bio_src, gfp_t gfp);
-extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+		gfp_t gfp, struct bio_set *bs);
+int bio_init_clone(struct block_device *bdev, struct bio *bio,
+		struct bio *bio_src, gfp_t gfp);
 
 extern struct bio_set fs_bio_set;
 
-- 
GitLab


From b9794a822281944ef3de5b1812a94cbdb8134320 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 28 Jan 2022 17:35:33 +0100
Subject: [PATCH 0238/1586] powercap/drivers/dtpm: Convert the init table
 section to a simple array

The init table section is freed after the system booted. However the
next changes will make per module the DTPM description, so the table
won't be accessible when the module is loaded.

In order to fix that, we should move the table to the data section
where there are very few entries and that makes strange to add it
there.

The main goal of the table was to keep self-encapsulated code and we
can keep it almost as it by using an array instead.

Suggested-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20220128163537.212248-2-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c           |  2 ++
 drivers/powercap/dtpm_cpu.c       |  5 ++++-
 drivers/powercap/dtpm_subsys.h    | 18 ++++++++++++++++++
 include/asm-generic/vmlinux.lds.h | 11 -----------
 include/linux/dtpm.h              | 24 +++---------------------
 5 files changed, 27 insertions(+), 33 deletions(-)
 create mode 100644 drivers/powercap/dtpm_subsys.h

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 8cb45f2d3d78d..0e5c93443c707 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -24,6 +24,8 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 
+#include "dtpm_subsys.h"
+
 #define DTPM_POWER_LIMIT_FLAG 0
 
 static const char *constraint_name[] = {
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index b740866b228d9..5763e0ce2af56 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -269,4 +269,7 @@ static int __init dtpm_cpu_init(void)
 	return 0;
 }
 
-DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
+struct dtpm_subsys_ops dtpm_cpu_ops = {
+	.name = KBUILD_MODNAME,
+	.init = dtpm_cpu_init,
+};
diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h
new file mode 100644
index 0000000000000..2a3a2055f60ed
--- /dev/null
+++ b/drivers/powercap/dtpm_subsys.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#ifndef ___DTPM_SUBSYS_H__
+#define ___DTPM_SUBSYS_H__
+
+extern struct dtpm_subsys_ops dtpm_cpu_ops;
+
+struct dtpm_subsys_ops *dtpm_subsys[] = {
+#ifdef CONFIG_DTPM_CPU
+	&dtpm_cpu_ops,
+#endif
+};
+
+#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 42f3866bca697..2a10db2f0bc5c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -321,16 +321,6 @@
 #define THERMAL_TABLE(name)
 #endif
 
-#ifdef CONFIG_DTPM
-#define DTPM_TABLE()							\
-	. = ALIGN(8);							\
-	__dtpm_table = .;						\
-	KEEP(*(__dtpm_table))						\
-	__dtpm_table_end = .;
-#else
-#define DTPM_TABLE()
-#endif
-
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	__dtb_start = .;						\
@@ -723,7 +713,6 @@
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(timer)						\
 	THERMAL_TABLE(governor)						\
-	DTPM_TABLE()							\
 	EARLYCON_TABLE()						\
 	LSM_TABLE()							\
 	EARLY_LSM_TABLE()						\
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index d37e5d06a357d..506048158a506 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -32,29 +32,11 @@ struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
-typedef int (*dtpm_init_t)(void);
-
-struct dtpm_descr {
-	dtpm_init_t init;
+struct dtpm_subsys_ops {
+	const char *name;
+	int (*init)(void);
 };
 
-/* Init section thermal table */
-extern struct dtpm_descr __dtpm_table[];
-extern struct dtpm_descr __dtpm_table_end[];
-
-#define DTPM_TABLE_ENTRY(name, __init)				\
-	static struct dtpm_descr __dtpm_table_entry_##name	\
-	__used __section("__dtpm_table") = {			\
-		.init = __init,					\
-	}
-
-#define DTPM_DECLARE(name, init)	DTPM_TABLE_ENTRY(name, init)
-
-#define for_each_dtpm_table(__dtpm)	\
-	for (__dtpm = __dtpm_table;	\
-	     __dtpm < __dtpm_table_end;	\
-	     __dtpm++)
-
 static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
 {
 	return container_of(zone, struct dtpm, zone);
-- 
GitLab


From 3759ec678e8944dc2ea70cab77a300408f78ae27 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 28 Jan 2022 17:35:34 +0100
Subject: [PATCH 0239/1586] powercap/drivers/dtpm: Add hierarchy creation

The DTPM framework is available but without a way to configure it.

This change provides a way to create a hierarchy of DTPM node where
the power consumption reflects the sum of the children's power
consumption.

It is up to the platform to specify an array of dtpm nodes where each
element has a pointer to its parent, except the top most one. The type
of the node gives the indication of which initialization callback to
call. At this time, we can create a virtual node, where its purpose is
to be a parent in the hierarchy, and a DT node where the name
describes its path.

In order to ensure a nice self-encapsulation, the DTPM subsys array
contains a couple of initialization functions, one to setup the DTPM
backend and one to initialize it up. With this approach, the DTPM
framework has a very few material to export.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220128163537.212248-3-daniel.lezcano@linaro.org
---
 drivers/powercap/Kconfig |   1 +
 drivers/powercap/dtpm.c  | 190 ++++++++++++++++++++++++++++++++++++++-
 include/linux/dtpm.h     |  15 ++++
 3 files changed, 203 insertions(+), 3 deletions(-)

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 8242e8c5ed77e..b1ca339957e3d 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -46,6 +46,7 @@ config IDLE_INJECT
 
 config DTPM
 	bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
+	depends on OF
 	help
 	  This enables support for the power capping for the dynamic
 	  thermal power management userspace engine.
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 0e5c93443c707..414826a1509b6 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -23,6 +23,7 @@
 #include <linux/powercap.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 
 #include "dtpm_subsys.h"
 
@@ -463,14 +464,197 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	return 0;
 }
 
-static int __init init_dtpm(void)
+static struct dtpm *dtpm_setup_virtual(const struct dtpm_node *hierarchy,
+				       struct dtpm *parent)
 {
+	struct dtpm *dtpm;
+	int ret;
+
+	dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
+	if (!dtpm)
+		return ERR_PTR(-ENOMEM);
+	dtpm_init(dtpm, NULL);
+
+	ret = dtpm_register(hierarchy->name, dtpm, parent);
+	if (ret) {
+		pr_err("Failed to register dtpm node '%s': %d\n",
+		       hierarchy->name, ret);
+		kfree(dtpm);
+		return ERR_PTR(ret);
+	}
+
+	return dtpm;
+}
+
+static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
+				  struct dtpm *parent)
+{
+	struct device_node *np;
+	int i, ret;
+
+	np = of_find_node_by_path(hierarchy->name);
+	if (!np) {
+		pr_err("Failed to find '%s'\n", hierarchy->name);
+		return ERR_PTR(-ENXIO);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->setup)
+			continue;
+
+		ret = dtpm_subsys[i]->setup(parent, np);
+		if (ret) {
+			pr_err("Failed to setup '%s': %d\n", dtpm_subsys[i]->name, ret);
+			of_node_put(np);
+			return ERR_PTR(ret);
+		}
+	}
+
+	of_node_put(np);
+
+	/*
+	 * By returning a NULL pointer, we let know the caller there
+	 * is no child for us as we are a leaf of the tree
+	 */
+	return NULL;
+}
+
+typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
+
+dtpm_node_callback_t dtpm_node_callback[] = {
+	[DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
+	[DTPM_NODE_DT] = dtpm_setup_dt,
+};
+
+static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
+			       const struct dtpm_node *it, struct dtpm *parent)
+{
+	struct dtpm *dtpm;
+	int i, ret;
+
+	for (i = 0; hierarchy[i].name; i++) {
+
+		if (hierarchy[i].parent != it)
+			continue;
+
+		dtpm = dtpm_node_callback[hierarchy[i].type](&hierarchy[i], parent);
+
+		/*
+		 * A NULL pointer means there is no children, hence we
+		 * continue without going deeper in the recursivity.
+		 */
+		if (!dtpm)
+			continue;
+
+		/*
+		 * There are multiple reasons why the callback could
+		 * fail. The generic glue is abstracting the backend
+		 * and therefore it is not possible to report back or
+		 * take a decision based on the error.  In any case,
+		 * if this call fails, it is not critical in the
+		 * hierarchy creation, we can assume the underlying
+		 * service is not found, so we continue without this
+		 * branch in the tree but with a warning to log the
+		 * information the node was not created.
+		 */
+		if (IS_ERR(dtpm)) {
+			pr_warn("Failed to create '%s' in the hierarchy\n",
+				hierarchy[i].name);
+			continue;
+		}
+
+		ret = dtpm_for_each_child(hierarchy, &hierarchy[i], dtpm);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * dtpm_create_hierarchy - Create the dtpm hierarchy
+ * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ *
+ * The function is called by the platform specific code with the
+ * description of the different node in the hierarchy. It creates the
+ * tree in the sysfs filesystem under the powercap dtpm entry.
+ *
+ * The expected tree has the format:
+ *
+ * struct dtpm_node hierarchy[] = {
+ *	[0] { .name = "topmost", type =  DTPM_NODE_VIRTUAL },
+ *	[1] { .name = "package", .type = DTPM_NODE_VIRTUAL, .parent = &hierarchy[0] },
+ *	[2] { .name = "/cpus/cpu0", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[3] { .name = "/cpus/cpu1", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[4] { .name = "/cpus/cpu2", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[5] { .name = "/cpus/cpu3", .type = DTPM_NODE_DT, .parent = &hierarchy[1] },
+ *	[6] { }
+ * };
+ *
+ * The last element is always an empty one and marks the end of the
+ * array.
+ *
+ * Return: zero on success, a negative value in case of error. Errors
+ * are reported back from the underlying functions.
+ */
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
+{
+	const struct of_device_id *match;
+	const struct dtpm_node *hierarchy;
+	struct device_node *np;
+	int i, ret;
+
+	if (pct)
+		return -EBUSY;
+
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
 		pr_err("Failed to register control type\n");
-		return PTR_ERR(pct);
+		ret = PTR_ERR(pct);
+		goto out_pct;
+	}
+
+	ret = -ENODEV;
+	np = of_find_node_by_path("/");
+	if (!np)
+		goto out_err;
+
+	match = of_match_node(dtpm_match_table, np);
+
+	of_node_put(np);
+
+	if (!match)
+		goto out_err;
+
+	hierarchy = match->data;
+	if (!hierarchy) {
+		ret = -EFAULT;
+		goto out_err;
+	}
+
+	ret = dtpm_for_each_child(hierarchy, NULL, NULL);
+	if (ret)
+		goto out_err;
+	
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->init)
+			continue;
+
+		ret = dtpm_subsys[i]->init();
+		if (ret)
+			pr_info("Failed to initialze '%s': %d",
+				dtpm_subsys[i]->name, ret);
 	}
 
 	return 0;
+
+out_err:
+	powercap_unregister_control_type(pct);
+out_pct:
+	pct = NULL;
+	
+	return ret;
 }
-late_initcall(init_dtpm);
+EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index 506048158a506..f7a25c70dd4c0 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -32,9 +32,23 @@ struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
+struct device_node;
+
 struct dtpm_subsys_ops {
 	const char *name;
 	int (*init)(void);
+	int (*setup)(struct dtpm *, struct device_node *);
+};
+
+enum DTPM_NODE_TYPE {
+	DTPM_NODE_VIRTUAL = 0,
+	DTPM_NODE_DT,
+};
+
+struct dtpm_node {
+	enum DTPM_NODE_TYPE type;
+	const char *name;
+	struct dtpm_node *parent;
 };
 
 static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
@@ -52,4 +66,5 @@ void dtpm_unregister(struct dtpm *dtpm);
 
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
+int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
 #endif
-- 
GitLab


From 73dbcb6e37bf0c43bac8c15fe5bcab2bec2367fb Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 28 Jan 2022 17:35:35 +0100
Subject: [PATCH 0240/1586] powercap/drivers/dtpm: Add CPU DT initialization
 support

Based on the previous DT changes in the core code, use the 'setup'
callback to initialize the CPU DTPM backend.

Code is reorganized to stick to the DTPM table description. No
functional changes.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220128163537.212248-4-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm_cpu.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 5763e0ce2af56..eed5ad688d467 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -21,6 +21,7 @@
 #include <linux/cpuhotplug.h>
 #include <linux/dtpm.h>
 #include <linux/energy_model.h>
+#include <linux/of.h>
 #include <linux/pm_qos.h>
 #include <linux/slab.h>
 #include <linux/units.h>
@@ -176,6 +177,17 @@ static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
+{
+	struct dtpm_cpu *dtpm_cpu;
+
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return dtpm_update_power(&dtpm_cpu->dtpm);
+
+	return 0;
+}
+
+static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
 {
 	struct dtpm_cpu *dtpm_cpu;
 	struct cpufreq_policy *policy;
@@ -183,6 +195,10 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	char name[CPUFREQ_NAME_LEN];
 	int ret = -ENOMEM;
 
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return 0;
+
 	policy = cpufreq_cpu_get(cpu);
 	if (!policy)
 		return 0;
@@ -191,10 +207,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	if (!pd)
 		return -EINVAL;
 
-	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
-	if (dtpm_cpu)
-		return dtpm_update_power(&dtpm_cpu->dtpm);
-
 	dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
 	if (!dtpm_cpu)
 		return -ENOMEM;
@@ -207,7 +219,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 	snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
+	ret = dtpm_register(name, &dtpm_cpu->dtpm, parent);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
@@ -231,7 +243,18 @@ out_kfree_dtpm_cpu:
 	return ret;
 }
 
-static int __init dtpm_cpu_init(void)
+static int dtpm_cpu_setup(struct dtpm *dtpm, struct device_node *np)
+{
+	int cpu;
+
+	cpu = of_cpu_node_to_id(np);
+	if (cpu < 0)
+		return 0;
+
+	return __dtpm_cpu_setup(cpu, dtpm);
+}
+
+static int dtpm_cpu_init(void)
 {
 	int ret;
 
@@ -272,4 +295,5 @@ static int __init dtpm_cpu_init(void)
 struct dtpm_subsys_ops dtpm_cpu_ops = {
 	.name = KBUILD_MODNAME,
 	.init = dtpm_cpu_init,
+	.setup = dtpm_cpu_setup,
 };
-- 
GitLab


From e446556173170e675a7a321e76ce5fa3587de724 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 28 Jan 2022 17:35:36 +0100
Subject: [PATCH 0241/1586] powercap/drivers/dtpm: Add dtpm devfreq with energy
 model support

Currently the dtpm supports the CPUs via cpufreq and the energy
model. This change provides the same for the device which supports
devfreq.

Each device supporting devfreq and having an energy model can be added
to the hierarchy.

The concept is the same as the cpufreq DTPM support: the QoS is used
to aggregate the requests and the energy model gives the value of the
instantaneous power consumption ponderated by the load of the device.

Cc: Chanwoo Choi <cwchoi00@gmail.com>
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20220128163537.212248-5-daniel.lezcano@linaro.org
---
 drivers/powercap/Kconfig        |   7 ++
 drivers/powercap/Makefile       |   1 +
 drivers/powercap/dtpm_devfreq.c | 203 ++++++++++++++++++++++++++++++++
 drivers/powercap/dtpm_subsys.h  |   4 +
 4 files changed, 215 insertions(+)
 create mode 100644 drivers/powercap/dtpm_devfreq.c

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index b1ca339957e3d..515e3ceb3393a 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -57,4 +57,11 @@ config DTPM_CPU
 	help
 	  This enables support for CPU power limitation based on
 	  energy model.
+
+config DTPM_DEVFREQ
+	bool "Add device power capping based on the energy model"
+	depends on DTPM && ENERGY_MODEL
+	help
+	  This enables support for device power limitation based on
+	  energy model.
 endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index fabcf388a8d39..494617cdad885 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DTPM) += dtpm.o
 obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
+obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
 obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
 obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
 obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c
new file mode 100644
index 0000000000000..91276761a31d9
--- /dev/null
+++ b/drivers/powercap/dtpm_devfreq.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The devfreq device combined with the energy model and the load can
+ * give an estimation of the power consumption as well as limiting the
+ * power.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/devfreq.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/of.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+struct dtpm_devfreq {
+	struct dtpm dtpm;
+	struct dev_pm_qos_request qos_req;
+	struct devfreq *devfreq;
+};
+
+static struct dtpm_devfreq *to_dtpm_devfreq(struct dtpm *dtpm)
+{
+	return container_of(dtpm, struct dtpm_devfreq, dtpm);
+}
+
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+
+	dtpm->power_min = pd->table[0].power;
+	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+
+	dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
+	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+
+	return 0;
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+	unsigned long freq;
+	u64 power;
+	int i;
+
+	for (i = 0; i < pd->nr_perf_states; i++) {
+
+		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+		if (power > power_limit)
+			break;
+	}
+
+	freq = pd->table[i - 1].frequency;
+
+	dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
+
+	power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT;
+
+	return power_limit;
+}
+
+static void _normalize_load(struct devfreq_dev_status *status)
+{
+	if (status->total_time > 0xfffff) {
+		status->total_time >>= 10;
+		status->busy_time >>= 10;
+	}
+
+	status->busy_time <<= 10;
+	status->busy_time /= status->total_time ? : 1;
+
+	status->busy_time = status->busy_time ? : 1;
+	status->total_time = 1024;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+	struct devfreq *devfreq = dtpm_devfreq->devfreq;
+	struct device *dev = devfreq->dev.parent;
+	struct em_perf_domain *pd = em_pd_get(dev);
+	struct devfreq_dev_status status;
+	unsigned long freq;
+	u64 power;
+	int i;
+
+	mutex_lock(&devfreq->lock);
+	status = devfreq->last_status;
+	mutex_unlock(&devfreq->lock);
+
+	freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
+	_normalize_load(&status);
+
+	for (i = 0; i < pd->nr_perf_states; i++) {
+
+		if (pd->table[i].frequency < freq)
+			continue;
+
+		power = pd->table[i].power * MICROWATT_PER_MILLIWATT;
+		power *= status.busy_time;
+		power >>= 10;
+
+		return power;
+	}
+
+	return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+	struct dtpm_devfreq *dtpm_devfreq = to_dtpm_devfreq(dtpm);
+
+	if (dev_pm_qos_request_active(&dtpm_devfreq->qos_req))
+		dev_pm_qos_remove_request(&dtpm_devfreq->qos_req);
+
+	kfree(dtpm_devfreq);
+}
+
+static struct dtpm_ops dtpm_ops = {
+	.set_power_uw = set_pd_power_limit,
+	.get_power_uw = get_pd_power_uw,
+	.update_power_uw = update_pd_power_uw,
+	.release = pd_release,
+};
+
+static int __dtpm_devfreq_setup(struct devfreq *devfreq, struct dtpm *parent)
+{
+	struct device *dev = devfreq->dev.parent;
+	struct dtpm_devfreq *dtpm_devfreq;
+	struct em_perf_domain *pd;
+	int ret = -ENOMEM;
+
+	pd = em_pd_get(dev);
+	if (!pd) {
+		ret = dev_pm_opp_of_register_em(dev, NULL);
+		if (ret) {
+			pr_err("No energy model available for '%s'\n", dev_name(dev));
+			return -EINVAL;
+		}
+	}
+
+	dtpm_devfreq = kzalloc(sizeof(*dtpm_devfreq), GFP_KERNEL);
+	if (!dtpm_devfreq)
+		return -ENOMEM;
+
+	dtpm_init(&dtpm_devfreq->dtpm, &dtpm_ops);
+
+	dtpm_devfreq->devfreq = devfreq;
+
+	ret = dtpm_register(dev_name(dev), &dtpm_devfreq->dtpm, parent);
+	if (ret) {
+		pr_err("Failed to register '%s': %d\n", dev_name(dev), ret);
+		kfree(dtpm_devfreq);
+		return ret;
+	}
+
+	ret = dev_pm_qos_add_request(dev, &dtpm_devfreq->qos_req,
+				     DEV_PM_QOS_MAX_FREQUENCY,
+				     PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+	if (ret) {
+		pr_err("Failed to add QoS request: %d\n", ret);
+		goto out_dtpm_unregister;
+	}
+
+	dtpm_update_power(&dtpm_devfreq->dtpm);
+
+	return 0;
+
+out_dtpm_unregister:
+	dtpm_unregister(&dtpm_devfreq->dtpm);
+
+	return ret;
+}
+
+static int dtpm_devfreq_setup(struct dtpm *dtpm, struct device_node *np)
+{
+	struct devfreq *devfreq;
+
+	devfreq = devfreq_get_devfreq_by_node(np);
+	if (IS_ERR(devfreq))
+		return 0;
+
+	return __dtpm_devfreq_setup(devfreq, dtpm);
+}
+
+struct dtpm_subsys_ops dtpm_devfreq_ops = {
+	.name = KBUILD_MODNAME,
+	.setup = dtpm_devfreq_setup,
+};
diff --git a/drivers/powercap/dtpm_subsys.h b/drivers/powercap/dtpm_subsys.h
index 2a3a2055f60ed..db1712938a969 100644
--- a/drivers/powercap/dtpm_subsys.h
+++ b/drivers/powercap/dtpm_subsys.h
@@ -8,11 +8,15 @@
 #define ___DTPM_SUBSYS_H__
 
 extern struct dtpm_subsys_ops dtpm_cpu_ops;
+extern struct dtpm_subsys_ops dtpm_devfreq_ops;
 
 struct dtpm_subsys_ops *dtpm_subsys[] = {
 #ifdef CONFIG_DTPM_CPU
 	&dtpm_cpu_ops,
 #endif
+#ifdef CONFIG_DTPM_DEVFREQ
+	&dtpm_devfreq_ops,
+#endif
 };
 
 #endif
-- 
GitLab


From b9d6c47a2be8d273ecc063afda6e9fd66a35116d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 28 Jan 2022 17:35:37 +0100
Subject: [PATCH 0242/1586] rockchip/soc/drivers: Add DTPM description for
 rk3399

The DTPM framework does support now the hierarchy description.

The platform specific code can call the hierarchy creation function
with an array of struct dtpm_node pointing to their parent.

This patch provides a description of the big / Little CPUs and the
GPU and tie them together under a virtual 'package' name. Only rk3399 is
described now.

The description could be extended in the future with the memory
controller with devfreq.

The description is always a module and it describes the soft
dependencies. The userspace has to load the softdeps module in the
right order.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by; Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20220128163537.212248-6-daniel.lezcano@linaro.org
---
 drivers/soc/rockchip/Kconfig  |  8 +++++
 drivers/soc/rockchip/Makefile |  1 +
 drivers/soc/rockchip/dtpm.c   | 59 +++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 drivers/soc/rockchip/dtpm.c

diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig
index 25eb2c1e31bb2..156ac0e0c8fe5 100644
--- a/drivers/soc/rockchip/Kconfig
+++ b/drivers/soc/rockchip/Kconfig
@@ -34,4 +34,12 @@ config ROCKCHIP_PM_DOMAINS
 
           If unsure, say N.
 
+config ROCKCHIP_DTPM
+	tristate "Rockchip DTPM hierarchy"
+	depends on DTPM && m
+	help
+	 Describe the hierarchy for the Dynamic Thermal Power
+	 Management tree on this platform. That will create all the
+	 power capping capable devices.
+
 endif
diff --git a/drivers/soc/rockchip/Makefile b/drivers/soc/rockchip/Makefile
index 875032f7344ea..05f31a4e743c9 100644
--- a/drivers/soc/rockchip/Makefile
+++ b/drivers/soc/rockchip/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_ROCKCHIP_GRF) += grf.o
 obj-$(CONFIG_ROCKCHIP_IODOMAIN) += io-domain.o
 obj-$(CONFIG_ROCKCHIP_PM_DOMAINS) += pm_domains.o
+obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o
diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c
new file mode 100644
index 0000000000000..ebebb748488b7
--- /dev/null
+++ b/drivers/soc/rockchip/dtpm.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * DTPM hierarchy description
+ */
+#include <linux/dtpm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static struct dtpm_node __initdata rk3399_hierarchy[] = {
+	[0]{ .name = "rk3399",
+	     .type = DTPM_NODE_VIRTUAL },
+	[1]{ .name = "package",
+	     .type = DTPM_NODE_VIRTUAL,
+	     .parent = &rk3399_hierarchy[0] },
+	[2]{ .name = "/cpus/cpu@0",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[3]{ .name = "/cpus/cpu@1",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[4]{ .name = "/cpus/cpu@2",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[5]{ .name = "/cpus/cpu@3",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[6]{ .name = "/cpus/cpu@100",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[7]{ .name = "/cpus/cpu@101",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[8]{ .name = "/gpu@ff9a0000",
+	     .type = DTPM_NODE_DT,
+	     .parent = &rk3399_hierarchy[1] },
+	[9]{ /* sentinel */ }
+};
+
+static struct of_device_id __initdata rockchip_dtpm_match_table[] = {
+        { .compatible = "rockchip,rk3399", .data = rk3399_hierarchy },
+        {},
+};
+
+static int __init rockchip_dtpm_init(void)
+{
+	return dtpm_create_hierarchy(rockchip_dtpm_match_table);
+}
+module_init(rockchip_dtpm_init);
+
+MODULE_SOFTDEP("pre: panfrost cpufreq-dt");
+MODULE_DESCRIPTION("Rockchip DTPM driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dtpm");
+MODULE_AUTHOR("Daniel Lezcano <daniel.lezcano@kernel.org");
-- 
GitLab


From 38b16d6cfe54c820848bcfc999bc5e8a7da1cefb Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Wed, 26 Jan 2022 09:48:53 +0800
Subject: [PATCH 0243/1586] thermal: int340x: Check for NULL after calling
 kmemdup()

As the potential failure of the allocation, kmemdup() may return NULL.

Then, 'bin_attr_data_vault.private' will be NULL, but
'bin_attr_data_vault.size' is not 0, which is not consistent.

Therefore, it is better to check the return value of kmemdup() to
avoid the confusion.

Fixes: 0ba13c763aac ("thermal/int340x_thermal: Export GDDV")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 72acb1f618497..82de8bcd368fc 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -464,6 +464,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
 	priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
 				   obj->package.elements[0].buffer.length,
 				   GFP_KERNEL);
+	if (!priv->data_vault) {
+		kfree(buffer.pointer);
+		return;
+	}
+
 	bin_attr_data_vault.private = priv->data_vault;
 	bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
 	kfree(buffer.pointer);
-- 
GitLab


From ae26508651272695a3ab353f75ab9a8daf3da324 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sun, 23 Jan 2022 20:45:06 +0800
Subject: [PATCH 0244/1586] cpufreq: Move to_gov_attr_set() to cpufreq.h

So it can be reused by other codes.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor_attr_set.c | 5 -----
 include/linux/cpufreq.h                     | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
index a6f365b9cc1ad..771770ea0ed0b 100644
--- a/drivers/cpufreq/cpufreq_governor_attr_set.c
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -8,11 +8,6 @@
 
 #include "cpufreq_governor.h"
 
-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
-{
-	return container_of(kobj, struct gov_attr_set, kobj);
-}
-
 static inline struct governor_attr *to_gov_attr(struct attribute *attr)
 {
 	return container_of(attr, struct governor_attr, attr);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1ab29e61b078e..f0dfc0b260ecb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -658,6 +658,11 @@ struct gov_attr_set {
 /* sysfs ops for cpufreq governors */
 extern const struct sysfs_ops governor_sysfs_ops;
 
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+	return container_of(kobj, struct gov_attr_set, kobj);
+}
+
 void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
 void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
 unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
-- 
GitLab


From 53725c4cbd4567423ff6143c5d10300e53ecf52a Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sun, 23 Jan 2022 20:45:07 +0800
Subject: [PATCH 0245/1586] cpufreq: schedutil: Use to_gov_attr_set() to get
 the gov_attr_set

The to_gov_attr_set() has been moved to the cpufreq.h, so use it to get
the gov_attr_set.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 26778884d9ab1..cffcd08f4ec8f 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -539,7 +539,7 @@ ATTRIBUTE_GROUPS(sugov);
 
 static void sugov_tunables_free(struct kobject *kobj)
 {
-	struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
+	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
 
 	kfree(to_sugov_tunables(attr_set));
 }
-- 
GitLab


From a11cda8e2f184e35e42bbb96a27272b3c5e801d2 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sun, 23 Jan 2022 10:38:34 -0800
Subject: [PATCH 0246/1586] thermal: intel_powerclamp: don't use
 bitmap_weight() in end_power_clamp()

Don't call bitmap_weight() if the following code can get by
without it.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/intel_powerclamp.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 14256421d98c2..c841ab37e7c6d 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -556,12 +556,9 @@ static void end_power_clamp(void)
 	 * stop faster.
 	 */
 	clamping = false;
-	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
-		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
-			pr_debug("clamping worker for cpu %d alive, destroy\n",
-				 i);
-			stop_power_clamp_worker(i);
-		}
+	for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+		pr_debug("clamping worker for cpu %d alive, destroy\n", i);
+		stop_power_clamp_worker(i);
 	}
 }
 
-- 
GitLab


From 7ddf5e37631ac7a96920f0f8aa3c8c4c289aaa25 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 Jan 2022 20:43:59 +0100
Subject: [PATCH 0247/1586] cpufreq: longhaul: Replace acpi_bus_get_device()

Replace acpi_bus_get_device() that is going to be dropped with
acpi_fetch_acpi_dev().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/longhaul.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c538a153ee828..3e000e1a75c6c 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -668,9 +668,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 					  u32 nesting_level,
 					  void *context, void **return_value)
 {
-	struct acpi_device *d;
+	struct acpi_device *d = acpi_fetch_acpi_dev(obj_handle);
 
-	if (acpi_bus_get_device(obj_handle, &d))
+	if (!d)
 		return 0;
 
 	*return_value = acpi_driver_data(d);
-- 
GitLab


From 098c874e20be2a4cee3021aa9b3485ed5e1f4d5b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 Jan 2022 20:45:17 +0100
Subject: [PATCH 0248/1586] thermal: Replace acpi_bus_get_device()

Replace acpi_bus_get_device() that is going to be dropped with
acpi_fetch_acpi_dev().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel/int340x_thermal/acpi_thermal_rel.c  | 23 ++++++-------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index e90690a234c40..01b80331eab66 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 	int i;
 	int nr_bad_entries = 0;
 	struct trt *trts;
-	struct acpi_device *adev;
 	union acpi_object *p;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_buffer element = { 0, NULL };
@@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 		if (!create_dev)
 			continue;
 
-		result = acpi_bus_get_device(trt->source, &adev);
-		if (result)
+		if (!acpi_fetch_acpi_dev(trt->source))
 			pr_warn("Failed to get source ACPI device\n");
 
-		result = acpi_bus_get_device(trt->target, &adev);
-		if (result)
+		if (!acpi_fetch_acpi_dev(trt->target))
 			pr_warn("Failed to get target ACPI device\n");
 	}
 
@@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 	int i;
 	int nr_bad_entries = 0;
 	struct art *arts;
-	struct acpi_device *adev;
 	union acpi_object *p;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_buffer element = { 0, NULL };
@@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 		if (!create_dev)
 			continue;
 
-		if (art->source) {
-			result = acpi_bus_get_device(art->source, &adev);
-			if (result)
-				pr_warn("Failed to get source ACPI device\n");
-		}
-		if (art->target) {
-			result = acpi_bus_get_device(art->target, &adev);
-			if (result)
-				pr_warn("Failed to get target ACPI device\n");
-		}
+		if (!acpi_fetch_acpi_dev(art->source))
+			pr_warn("Failed to get source ACPI device\n");
+
+		if (!acpi_fetch_acpi_dev(art->target))
+			pr_warn("Failed to get target ACPI device\n");
 	}
 
 	*artp = arts;
-- 
GitLab


From ed945296bb90ce79a7ff331885dd37eafa38b551 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 Jan 2022 20:51:27 +0100
Subject: [PATCH 0249/1586] PNP: Replace acpi_bus_get_device()

Replace acpi_bus_get_device() that is going to be dropped with
acpi_fetch_acpi_dev().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/pnpacpi/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index afaf30a3622c0..38928ff7472b0 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -287,9 +287,9 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
 						     u32 lvl, void *context,
 						     void **rv)
 {
-	struct acpi_device *device;
+	struct acpi_device *device = acpi_fetch_acpi_dev(handle);
 
-	if (acpi_bus_get_device(handle, &device))
+	if (!device)
 		return AE_CTRL_DEPTH;
 	if (acpi_is_pnp_device(device))
 		pnpacpi_add_device(device);
-- 
GitLab


From 70f4169ab421b277caf7429e84f468d8c47aa00a Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Wed, 2 Feb 2022 13:55:29 +0100
Subject: [PATCH 0250/1586] selinux: parse contexts for mount options early

Commit b8b87fd954b4 ("selinux: Fix selinux_sb_mnt_opts_compat()")
started to parse mount options into SIDs in selinux_add_opt() if policy
has already been loaded. Since it's extremely unlikely that anyone would
depend on the ability to set SELinux contexts on fs_context before
loading the policy and then mounting that context after simplify the
logic by always parsing the options early.

Note that the multi-step mounting is only possible with the new
fscontext mount API and wasn't possible before its introduction.

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 202 ++++++++++-----------------------------
 1 file changed, 53 insertions(+), 149 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b24a1aeeedd43..ab32303e66181 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -340,7 +340,6 @@ static void inode_free_security(struct inode *inode)
 }
 
 struct selinux_mnt_opts {
-	const char *fscontext, *context, *rootcontext, *defcontext;
 	u32 fscontext_sid;
 	u32 context_sid;
 	u32 rootcontext_sid;
@@ -349,12 +348,7 @@ struct selinux_mnt_opts {
 
 static void selinux_free_mnt_opts(void *mnt_opts)
 {
-	struct selinux_mnt_opts *opts = mnt_opts;
-	kfree(opts->fscontext);
-	kfree(opts->context);
-	kfree(opts->rootcontext);
-	kfree(opts->defcontext);
-	kfree(opts);
+	kfree(mnt_opts);
 }
 
 enum {
@@ -601,17 +595,6 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag,
 	return 0;
 }
 
-static int parse_sid(struct super_block *sb, const char *s, u32 *sid)
-{
-	int rc = security_context_str_to_sid(&selinux_state, s,
-					     sid, GFP_KERNEL);
-	if (rc)
-		pr_warn("SELinux: security_context_str_to_sid"
-		       "(%s) failed for (dev %s, type %s) errno=%d\n",
-		       s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc);
-	return rc;
-}
-
 /*
  * Allow filesystems with binary mount data to explicitly set mount point
  * labeling information.
@@ -674,49 +657,29 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 	 * than once with different security options.
 	 */
 	if (opts) {
-		if (opts->fscontext) {
-			if (opts->fscontext_sid == SECSID_NULL) {
-				rc = parse_sid(sb, opts->fscontext, &fscontext_sid);
-				if (rc)
-					goto out;
-			} else
-				fscontext_sid = opts->fscontext_sid;
+		if (opts->fscontext_sid) {
+			fscontext_sid = opts->fscontext_sid;
 			if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
 					fscontext_sid))
 				goto out_double_mount;
 			sbsec->flags |= FSCONTEXT_MNT;
 		}
-		if (opts->context) {
-			if (opts->context_sid == SECSID_NULL) {
-				rc = parse_sid(sb, opts->context, &context_sid);
-				if (rc)
-					goto out;
-			} else
-				context_sid = opts->context_sid;
+		if (opts->context_sid) {
+			context_sid = opts->context_sid;
 			if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
 					context_sid))
 				goto out_double_mount;
 			sbsec->flags |= CONTEXT_MNT;
 		}
-		if (opts->rootcontext) {
-			if (opts->rootcontext_sid == SECSID_NULL) {
-				rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid);
-				if (rc)
-					goto out;
-			} else
-				rootcontext_sid = opts->rootcontext_sid;
+		if (opts->rootcontext_sid) {
+			rootcontext_sid = opts->rootcontext_sid;
 			if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
 					rootcontext_sid))
 				goto out_double_mount;
 			sbsec->flags |= ROOTCONTEXT_MNT;
 		}
-		if (opts->defcontext) {
-			if (opts->defcontext_sid == SECSID_NULL) {
-				rc = parse_sid(sb, opts->defcontext, &defcontext_sid);
-				if (rc)
-					goto out;
-			} else
-				defcontext_sid = opts->defcontext_sid;
+		if (opts->defcontext_sid) {
+			defcontext_sid = opts->defcontext_sid;
 			if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
 					defcontext_sid))
 				goto out_double_mount;
@@ -986,6 +949,8 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
 {
 	struct selinux_mnt_opts *opts = *mnt_opts;
 	bool is_alloc_opts = false;
+	u32 *dst_sid;
+	int rc;
 
 	if (token == Opt_seclabel)
 		/* eaten and completely ignored */
@@ -993,6 +958,11 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
 	if (!s)
 		return -ENOMEM;
 
+	if (!selinux_initialized(&selinux_state)) {
+		pr_warn("SELinux: Unable to set superblock options before the security server is initialized\n");
+		return -EINVAL;
+	}
+
 	if (!opts) {
 		opts = kzalloc(sizeof(*opts), GFP_KERNEL);
 		if (!opts)
@@ -1003,36 +973,34 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts)
 
 	switch (token) {
 	case Opt_context:
-		if (opts->context || opts->defcontext)
+		if (opts->context_sid || opts->defcontext_sid)
 			goto err;
-		opts->context = s;
-		if (selinux_initialized(&selinux_state))
-			parse_sid(NULL, s, &opts->context_sid);
+		dst_sid = &opts->context_sid;
 		break;
 	case Opt_fscontext:
-		if (opts->fscontext)
+		if (opts->fscontext_sid)
 			goto err;
-		opts->fscontext = s;
-		if (selinux_initialized(&selinux_state))
-			parse_sid(NULL, s, &opts->fscontext_sid);
+		dst_sid = &opts->fscontext_sid;
 		break;
 	case Opt_rootcontext:
-		if (opts->rootcontext)
+		if (opts->rootcontext_sid)
 			goto err;
-		opts->rootcontext = s;
-		if (selinux_initialized(&selinux_state))
-			parse_sid(NULL, s, &opts->rootcontext_sid);
+		dst_sid = &opts->rootcontext_sid;
 		break;
 	case Opt_defcontext:
-		if (opts->context || opts->defcontext)
+		if (opts->context_sid || opts->defcontext_sid)
 			goto err;
-		opts->defcontext = s;
-		if (selinux_initialized(&selinux_state))
-			parse_sid(NULL, s, &opts->defcontext_sid);
+		dst_sid = &opts->defcontext_sid;
 		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
 	}
-
-	return 0;
+	rc = security_context_str_to_sid(&selinux_state, s, dst_sid, GFP_KERNEL);
+	if (rc)
+		pr_warn("SELinux: security_context_str_to_sid (%s) failed with errno=%d\n",
+			s, rc);
+	return rc;
 
 err:
 	if (is_alloc_opts) {
@@ -2681,37 +2649,27 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts)
 	if (!opts)
 		return (sbsec->flags & SE_MNTMASK) ? 1 : 0;
 
-	if (opts->fscontext) {
-		if (opts->fscontext_sid == SECSID_NULL)
-			return 1;
-		else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
-				       opts->fscontext_sid))
+	if (opts->fscontext_sid) {
+		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
+			       opts->fscontext_sid))
 			return 1;
 	}
-	if (opts->context) {
-		if (opts->context_sid == SECSID_NULL)
-			return 1;
-		else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
-				       opts->context_sid))
+	if (opts->context_sid) {
+		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
+			       opts->context_sid))
 			return 1;
 	}
-	if (opts->rootcontext) {
-		if (opts->rootcontext_sid == SECSID_NULL)
-			return 1;
-		else {
-			struct inode_security_struct *root_isec;
+	if (opts->rootcontext_sid) {
+		struct inode_security_struct *root_isec;
 
-			root_isec = backing_inode_security(sb->s_root);
-			if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
-				       opts->rootcontext_sid))
-				return 1;
-		}
-	}
-	if (opts->defcontext) {
-		if (opts->defcontext_sid == SECSID_NULL)
+		root_isec = backing_inode_security(sb->s_root);
+		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
+			       opts->rootcontext_sid))
 			return 1;
-		else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
-				       opts->defcontext_sid))
+	}
+	if (opts->defcontext_sid) {
+		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
+			       opts->defcontext_sid))
 			return 1;
 	}
 	return 0;
@@ -2721,7 +2679,6 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 {
 	struct selinux_mnt_opts *opts = mnt_opts;
 	struct superblock_security_struct *sbsec = selinux_superblock(sb);
-	int rc;
 
 	if (!(sbsec->flags & SE_SBINITIALIZED))
 		return 0;
@@ -2729,47 +2686,24 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
 	if (!opts)
 		return 0;
 
-	if (opts->fscontext) {
-		if (opts->fscontext_sid == SECSID_NULL) {
-			rc = parse_sid(sb, opts->fscontext,
-				       &opts->fscontext_sid);
-			if (rc)
-				return rc;
-		}
+	if (opts->fscontext_sid) {
 		if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid,
 			       opts->fscontext_sid))
 			goto out_bad_option;
 	}
-	if (opts->context) {
-		if (opts->context_sid == SECSID_NULL) {
-			rc = parse_sid(sb, opts->context, &opts->context_sid);
-			if (rc)
-				return rc;
-		}
+	if (opts->context_sid) {
 		if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid,
 			       opts->context_sid))
 			goto out_bad_option;
 	}
-	if (opts->rootcontext) {
+	if (opts->rootcontext_sid) {
 		struct inode_security_struct *root_isec;
 		root_isec = backing_inode_security(sb->s_root);
-		if (opts->rootcontext_sid == SECSID_NULL) {
-			rc = parse_sid(sb, opts->rootcontext,
-				       &opts->rootcontext_sid);
-			if (rc)
-				return rc;
-		}
 		if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid,
 			       opts->rootcontext_sid))
 			goto out_bad_option;
 	}
-	if (opts->defcontext) {
-		if (opts->defcontext_sid == SECSID_NULL) {
-			rc = parse_sid(sb, opts->defcontext,
-				       &opts->defcontext_sid);
-			if (rc)
-				return rc;
-		}
+	if (opts->defcontext_sid) {
 		if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid,
 			       opts->defcontext_sid))
 			goto out_bad_option;
@@ -2838,42 +2772,12 @@ static int selinux_fs_context_dup(struct fs_context *fc,
 				  struct fs_context *src_fc)
 {
 	const struct selinux_mnt_opts *src = src_fc->security;
-	struct selinux_mnt_opts *opts;
 
 	if (!src)
 		return 0;
 
-	fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL);
-	if (!fc->security)
-		return -ENOMEM;
-
-	opts = fc->security;
-
-	if (src->fscontext) {
-		opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL);
-		if (!opts->fscontext)
-			return -ENOMEM;
-	}
-	if (src->context) {
-		opts->context = kstrdup(src->context, GFP_KERNEL);
-		if (!opts->context)
-			return -ENOMEM;
-	}
-	if (src->rootcontext) {
-		opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL);
-		if (!opts->rootcontext)
-			return -ENOMEM;
-	}
-	if (src->defcontext) {
-		opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL);
-		if (!opts->defcontext)
-			return -ENOMEM;
-	}
-	opts->fscontext_sid = src->fscontext_sid;
-	opts->context_sid = src->context_sid;
-	opts->rootcontext_sid = src->rootcontext_sid;
-	opts->defcontext_sid = src->defcontext_sid;
-	return 0;
+	fc->security = kmemdup(src, sizeof(*src), GFP_KERNEL);
+	return fc->security ? 0 : -ENOMEM;
 }
 
 static const struct fs_parameter_spec selinux_fs_parameters[] = {
-- 
GitLab


From 783dedf41b79ac7a3a68b51cf6f88cbfd6dc3292 Mon Sep 17 00:00:00 2001
From: Robert Kiraly <me@boldcoder.com>
Date: Thu, 27 Jan 2022 02:15:23 -0800
Subject: [PATCH 0251/1586] ACPI: tables: Add CEDT signature to the list of
 known tables

Add ACPI_SIG_CEDT to table_sigs[] in  "drivers/acpi/tables.c".

Signed-off-by: Robert Kiraly <me@boldcoder.com>
[ rjw: Rebase, new subject and changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 0741a4933f622..2d7ed7126faac 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -545,7 +545,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
 	ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT,
 	ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
 	ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
-	ACPI_SIG_NHLT, ACPI_SIG_AEST };
+	ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT };
 
 #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
 
-- 
GitLab


From 26a03981318d2e0f12456a1041fe3bf8af5416cf Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 24 Jan 2022 21:29:46 +0100
Subject: [PATCH 0252/1586] hwrng: core - explicit ordering of initcalls

hw-random device drivers depend on the hw-random core being
initialized. Make this ordering explicit, also for the case
these drivers are built-in. As the core itself depends on
misc_register() which is set up at subsys_initcall time,
advance the initialization of the core (only) to the
fs_initcall() level.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a3db27916256d..e860e044b19e3 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -638,7 +638,7 @@ static void __exit hwrng_modexit(void)
 	unregister_miscdev();
 }
 
-module_init(hwrng_modinit);
+fs_initcall(hwrng_modinit); /* depends on misc_register() */
 module_exit(hwrng_modexit);
 
 MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
-- 
GitLab


From 6ff6304497c900a0284e3aa89ad86f0ed01f86d8 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 24 Jan 2022 21:29:47 +0100
Subject: [PATCH 0253/1586] hwrng: core - read() callback must be called for
 size of 32 or more bytes

According to <linux/hw_random.h>, the @max parameter of the ->read
callback "is a multiple of 4 and >= 32 bytes". That promise was not
kept by add_early_randomness(), which only asked for 16 bytes. As
rng_buffer_size() is at least 32, we can simply ask for 32 bytes.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index e860e044b19e3..c2d260b5dd921 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -64,10 +64,9 @@ static size_t rng_buffer_size(void)
 static void add_early_randomness(struct hwrng *rng)
 {
 	int bytes_read;
-	size_t size = min_t(size_t, 16, rng_buffer_size());
 
 	mutex_lock(&reading_mutex);
-	bytes_read = rng_get_data(rng, rng_buffer, size, 0);
+	bytes_read = rng_get_data(rng, rng_buffer, 32, 0);
 	mutex_unlock(&reading_mutex);
 	if (bytes_read > 0)
 		add_device_randomness(rng_buffer, bytes_read);
-- 
GitLab


From c05ac449442dd214e262057a3a49c368e224c384 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 24 Jan 2022 21:29:48 +0100
Subject: [PATCH 0254/1586] hwrng: core - use rng_fillbuf in
 add_early_randomness()

Using rng_buffer in add_early_randomness() may race with rng_dev_read().
Use rng_fillbuf instead, as it is otherwise only used within the kernel
by hwrng_fillfn() and therefore never exposed to userspace.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index c2d260b5dd921..89891ac87af00 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -66,10 +66,10 @@ static void add_early_randomness(struct hwrng *rng)
 	int bytes_read;
 
 	mutex_lock(&reading_mutex);
-	bytes_read = rng_get_data(rng, rng_buffer, 32, 0);
+	bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
 	mutex_unlock(&reading_mutex);
 	if (bytes_read > 0)
-		add_device_randomness(rng_buffer, bytes_read);
+		add_device_randomness(rng_fillbuf, bytes_read);
 }
 
 static inline void cleanup_rng(struct kref *kref)
-- 
GitLab


From f41aa47c8bb40bcbad51fc368fe15f96406897cb Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 24 Jan 2022 21:29:49 +0100
Subject: [PATCH 0255/1586] hwrng: core - only set cur_rng_set_by_user if it is
 working

In case the user-specified rng device is not working, it is not used;
therefore cur_rng_set_by_user must not be set to 1.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 89891ac87af00..9405fcdace384 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -335,8 +335,9 @@ static ssize_t rng_current_store(struct device *dev,
 	} else {
 		list_for_each_entry(rng, &rng_list, list) {
 			if (sysfs_streq(rng->name, buf)) {
-				cur_rng_set_by_user = 1;
 				err = set_current_rng(rng);
+				if (!err)
+					cur_rng_set_by_user = 1;
 				break;
 			}
 		}
-- 
GitLab


From f4f7c153a61782f3fb259d0f39aab91444e555d9 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 24 Jan 2022 21:29:50 +0100
Subject: [PATCH 0256/1586] hwrng: core - break out of hwrng_fillfn if current
 rng is not trusted

For two reasons, current_quality may become zero within the rngd
kernel thread: (1) The user lowers current_quality to 0 by writing
to the sysfs module parameter file (note that increasing the quality
from zero is without effect at the moment), or (2) there are two or
more hwrng devices registered, and those which provide quality>0 are
unregistered, but one with quality==0 remains.

If current_quality is 0, the randomness is not trusted and cannot help
to increase the entropy count. That will lead to continuous calls to
the hwrngd thread and continuous stirring of the input pool with
untrusted bits.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 9405fcdace384..bc9f95cbac921 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -429,6 +429,9 @@ static int hwrng_fillfn(void *unused)
 	while (!kthread_should_stop()) {
 		struct hwrng *rng;
 
+		if (!current_quality)
+			break;
+
 		rng = get_current_rng();
 		if (IS_ERR(rng) || !rng)
 			break;
-- 
GitLab


From bd9305b0cb69bfe98885a63a9e6231ae92e822e2 Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 Jan 2022 23:56:22 +0530
Subject: [PATCH 0257/1586] crypto: octeontx2 - CN10K CPT to RNM workaround

When software sets CPT_AF_CTL[RNM_REQ_EN]=1 and RNM in not producing
entropy(i.e., RNM_ENTROPY_STATUS[NORMAL_CNT] < 0x40), the first cycle of
the response may be lost due to a conditional clocking issue. Due to
this, the subsequent random number stream will be corrupted. So, this
patch adds support to ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40
before writing CPT_AF_CTL[RNM_REQ_EN] = 1, as a workaround.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../marvell/octeontx2/otx2_cptpf_ucode.c      | 43 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 4c8ebdf671ca8..addc760501a95 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev,
 		delete_engine_group(&pdev->dev, &eng_grps->grp[i]);
 }
 
+#define PCI_DEVID_CN10K_RNM 0xA098
+#define RNM_ENTROPY_STATUS  0x8
+
+static void rnm_to_cpt_errata_fixup(struct device *dev)
+{
+	struct pci_dev *pdev;
+	void __iomem *base;
+	int timeout = 5000;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL);
+	if (!pdev)
+		return;
+
+	base = pci_ioremap_bar(pdev, 0);
+	if (!base)
+		goto put_pdev;
+
+	while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) {
+		cpu_relax();
+		udelay(1);
+		timeout--;
+		if (!timeout) {
+			dev_warn(dev, "RNM is not producing entropy\n");
+			break;
+		}
+	}
+
+	iounmap(base);
+
+put_pdev:
+	pci_dev_put(pdev);
+}
+
 int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type)
 {
 
@@ -1189,9 +1222,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 
 	if (is_dev_otx2(pdev))
 		goto unlock;
+
+	/*
+	 * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing
+	 * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata.
+	 */
+	rnm_to_cpt_errata_fixup(&pdev->dev);
+
 	/*
 	 * Configure engine group mask to allow context prefetching
-	 * for the groups.
+	 * for the groups and enable random number request, to enable
+	 * CPT to request random numbers from RNM.
 	 */
 	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL,
 			      OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16),
-- 
GitLab


From 9eef6e972a32bc2454a22e8f0e8d4e7f55ff6613 Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 Jan 2022 23:56:23 +0530
Subject: [PATCH 0258/1586] crypto: octeontx2 - disable DMA black hole on an
 DMA fault

When CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to
LLC/DRAM encounters a fault/poison, a rare case may result
in unpredictable data being delivered to a CPT engine.
So, this patch adds code to set FLT_DIS as a workaround.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 13 +++++++++++++
 drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c |  1 +
 2 files changed, 14 insertions(+)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index addc760501a95..ede84abf2988c 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1144,6 +1144,7 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 	struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} };
 	struct pci_dev *pdev = cptpf->pdev;
 	struct fw_info_t fw_info;
+	u64 reg_val;
 	int ret = 0;
 
 	mutex_lock(&eng_grps->lock);
@@ -1244,6 +1245,18 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 	 */
 	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER,
 			      CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0);
+
+	/*
+	 * Set CPT_AF_DIAG[FLT_DIS], as a workaround for HW errata, when
+	 * CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM
+	 * encounters a fault/poison, a rare case may result in
+	 * unpredictable data being delivered to a CPT engine.
+	 */
+	otx2_cpt_read_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, &reg_val,
+			     BLKADDR_CPT0);
+	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG,
+			      reg_val | BIT_ULL(24), BLKADDR_CPT0);
+
 	mutex_unlock(&eng_grps->lock);
 	return 0;
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index a73a8017e0ee9..a79201a9a6f03 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -605,6 +605,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 	} else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) {
 		/* Registers that can be accessed from PF */
 		switch (offset) {
+		case CPT_AF_DIAG:
 		case CPT_AF_CTL:
 		case CPT_AF_PF_FUNC:
 		case CPT_AF_BLK_RST:
-- 
GitLab


From e236ab0d43622a8a5a8ff06630fd467b444a9db9 Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 Jan 2022 23:56:24 +0530
Subject: [PATCH 0259/1586] crypto: octeontx2 - increase CPT HW instruction
 queue length

LDWB is getting incorrectly used in HW when
CPT_AF_LF()_PTR_CTL[IQB_LDWB]=1 and CPT instruction queue has less than
320 free entries. So, increase HW instruction queue size by 320 and give
320 entries less for SW/NIX RX as a SW workaround.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index b691b6c1d5c45..4fcaf61a70e36 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -26,12 +26,22 @@
  */
 #define OTX2_CPT_INST_QLEN_MSGS	((OTX2_CPT_SIZE_DIV40 - 1) * 40)
 
+/*
+ * LDWB is getting incorrectly used when IQB_LDWB = 1 and CPT instruction
+ * queue has less than 320 free entries. So, increase HW instruction queue
+ * size by 320 and give 320 entries less for SW/NIX RX as a workaround.
+ */
+#define OTX2_CPT_INST_QLEN_EXTRA_BYTES  (320 * OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_EXTRA_SIZE_DIV40       (320/40)
+
 /* CPT instruction queue length in bytes */
-#define OTX2_CPT_INST_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 40 * \
-				  OTX2_CPT_INST_SIZE)
+#define OTX2_CPT_INST_QLEN_BYTES                                               \
+		((OTX2_CPT_SIZE_DIV40 * 40 * OTX2_CPT_INST_SIZE) +             \
+		OTX2_CPT_INST_QLEN_EXTRA_BYTES)
 
 /* CPT instruction group queue length in bytes */
-#define OTX2_CPT_INST_GRP_QLEN_BYTES (OTX2_CPT_SIZE_DIV40 * 16)
+#define OTX2_CPT_INST_GRP_QLEN_BYTES                                           \
+		((OTX2_CPT_SIZE_DIV40 + OTX2_CPT_EXTRA_SIZE_DIV40) * 16)
 
 /* CPT FC length in bytes */
 #define OTX2_CPT_Q_FC_LEN 128
@@ -179,7 +189,8 @@ static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf)
 {
 	union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 };
 
-	lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40;
+	lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40 +
+				 OTX2_CPT_EXTRA_SIZE_DIV40;
 	otx2_cpt_write64(lf->lfs->reg_base, BLKADDR_CPT0, lf->slot,
 			 OTX2_CPT_LF_Q_SIZE, lf_q_size.u);
 }
-- 
GitLab


From 8daa399edeed4cfa792ccea12beda50d445ab6a0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 27 Jan 2022 10:52:11 +0100
Subject: [PATCH 0260/1586] crypto: arm64/aes-neon-ctr - improve handling of
 single tail block

Instead of falling back to C code to do a memcpy of the output of the
last block, handle this in the asm code directly if possible, which is
the case if the entire input is longer than 16 bytes.

Cc: Nathan Huckleberry <nhuck@google.com>
Cc: Eric Biggers <ebiggers@google.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-glue.c  | 21 +++++++--------------
 arch/arm64/crypto/aes-modes.S | 18 +++++++++++++-----
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 30b7cc6a70790..7d66f8babb1d6 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -24,7 +24,6 @@
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
 #define PRIO			300
-#define STRIDE			5
 #define aes_expandkey		ce_aes_expandkey
 #define aes_ecb_encrypt		ce_aes_ecb_encrypt
 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
@@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #else
 #define MODE			"neon"
 #define PRIO			200
-#define STRIDE			4
 #define aes_ecb_encrypt		neon_aes_ecb_encrypt
 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
@@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int bytes, u8 const iv[]);
 
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				int rounds, int bytes, u8 ctr[], u8 finalbuf[]);
+				int rounds, int bytes, u8 ctr[]);
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
 				int rounds, int bytes, u32 const rk2[], u8 iv[],
@@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
 		unsigned int nbytes = walk.nbytes;
 		u8 *dst = walk.dst.virt.addr;
 		u8 buf[AES_BLOCK_SIZE];
-		unsigned int tail;
 
 		if (unlikely(nbytes < AES_BLOCK_SIZE))
-			src = memcpy(buf, src, nbytes);
+			src = dst = memcpy(buf + sizeof(buf) - nbytes,
+					   src, nbytes);
 		else if (nbytes < walk.total)
 			nbytes &= ~(AES_BLOCK_SIZE - 1);
 
 		kernel_neon_begin();
 		aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
-				walk.iv, buf);
+				walk.iv);
 		kernel_neon_end();
 
-		tail = nbytes % (STRIDE * AES_BLOCK_SIZE);
-		if (tail > 0 && tail < AES_BLOCK_SIZE)
-			/*
-			 * The final partial block could not be returned using
-			 * an overlapping store, so it was passed via buf[]
-			 * instead.
-			 */
-			memcpy(dst + nbytes - tail, buf, tail);
+		if (unlikely(nbytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr,
+			       buf + sizeof(buf) - nbytes, nbytes);
 
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index ff01f0167ba2c..dc35eb0245c55 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
 
 	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int bytes, u8 ctr[], u8 finalbuf[])
+	 *		   int bytes, u8 ctr[])
 	 */
 
 AES_FUNC_START(aes_ctr_encrypt)
@@ -414,8 +414,8 @@ ST5(	st1		{v4.16b}, [x0], #16		)
 .Lctrtail:
 	/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
 	mov		x16, #16
-	ands		x13, x4, #0xf
-	csel		x13, x13, x16, ne
+	ands		x6, x4, #0xf
+	csel		x13, x6, x16, ne
 
 ST5(	cmp		w4, #64 - (MAX_STRIDE << 4)	)
 ST5(	csel		x14, x16, xzr, gt		)
@@ -424,10 +424,10 @@ ST5(	csel		x14, x16, xzr, gt		)
 	cmp		w4, #32 - (MAX_STRIDE << 4)
 	csel		x16, x16, xzr, gt
 	cmp		w4, #16 - (MAX_STRIDE << 4)
-	ble		.Lctrtail1x
 
 	adr_l		x12, .Lcts_permute_table
 	add		x12, x12, x13
+	ble		.Lctrtail1x
 
 ST5(	ld1		{v5.16b}, [x1], x14		)
 	ld1		{v6.16b}, [x1], x15
@@ -462,11 +462,19 @@ ST5(	st1		{v5.16b}, [x0], x14		)
 	b		.Lctrout
 
 .Lctrtail1x:
-	csel		x0, x0, x6, eq		// use finalbuf if less than a full block
+	sub		x7, x6, #16
+	csel		x6, x6, x7, eq
+	add		x1, x1, x6
+	add		x0, x0, x6
 	ld1		{v5.16b}, [x1]
+	ld1		{v6.16b}, [x0]
 ST5(	mov		v3.16b, v4.16b			)
 	encrypt_block	v3, w3, x2, x8, w7
+	ld1		{v10.16b-v11.16b}, [x12]
+	tbl		v3.16b, {v3.16b}, v10.16b
+	sshr		v11.16b, v11.16b, #7
 	eor		v5.16b, v5.16b, v3.16b
+	bif		v5.16b, v6.16b, v11.16b
 	st1		{v5.16b}, [x0]
 	b		.Lctrout
 AES_FUNC_END(aes_ctr_encrypt)
-- 
GitLab


From c8bf850e991a1838964ed8d8426f96cf8d3fbab5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 27 Jan 2022 12:35:43 +0100
Subject: [PATCH 0261/1586] crypto: arm/aes-neonbs-ctr - deal with
 non-multiples of AES block size

Instead of falling back to C code to deal with the final bit of input
that is not a round multiple of the block size, handle this in the asm
code, permitting us to use overlapping loads and stores for performance,
and implement the 16-byte wide XOR using a single NEON instruction.

Since NEON loads and stores have a natural width of 16 bytes, we need to
handle inputs of less than 16 bytes in a special way, but this rarely
occurs in practice so it does not impact performance. All other input
sizes can be consumed directly by the NEON asm code, although it should
be noted that the core AES transform can still only process 128 bytes (8
AES blocks) at a time.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/aes-neonbs-core.S | 105 ++++++++++++++++++------------
 arch/arm/crypto/aes-neonbs-glue.c |  35 ++++------
 2 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index 7d0cc7f226a50..7b61032f29fad 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -758,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt)
 ENDPROC(aesbs_cbc_decrypt)
 
 	.macro		next_ctr, q
-	vmov.32		\q\()h[1], r10
+	vmov		\q\()h, r9, r10
 	adds		r10, r10, #1
-	vmov.32		\q\()h[0], r9
 	adcs		r9, r9, #0
-	vmov.32		\q\()l[1], r8
+	vmov		\q\()l, r7, r8
 	adcs		r8, r8, #0
-	vmov.32		\q\()l[0], r7
 	adc		r7, r7, #0
 	vrev32.8	\q, \q
 	.endm
 
 	/*
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 ctr[], u8 final[])
+	 *		     int rounds, int bytes, u8 ctr[])
 	 */
 ENTRY(aesbs_ctr_encrypt)
 	mov		ip, sp
 	push		{r4-r10, lr}
 
-	ldm		ip, {r5-r7}		// load args 4-6
-	teq		r7, #0
-	addne		r5, r5, #1		// one extra block if final != 0
-
+	ldm		ip, {r5, r6}		// load args 4-5
 	vld1.8		{q0}, [r6]		// load counter
 	vrev32.8	q1, q0
 	vmov		r9, r10, d3
@@ -792,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt)
 	adc		r7, r7, #0
 
 99:	vmov		q1, q0
+	sub		lr, r5, #1
 	vmov		q2, q0
+	adr		ip, 0f
 	vmov		q3, q0
+	and		lr, lr, #112
 	vmov		q4, q0
+	cmp		r5, #112
 	vmov		q5, q0
+	sub		ip, ip, lr, lsl #1
 	vmov		q6, q0
+	add		ip, ip, lr, lsr #2
 	vmov		q7, q0
-
-	adr		ip, 0f
-	sub		lr, r5, #1
-	and		lr, lr, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #5
-	sub		ip, ip, lr, lsl #2
-	movlt		pc, ip			// computed goto if blocks < 8
+	movle		pc, ip			// computed goto if bytes < 112
 
 	next_ctr	q1
 	next_ctr	q2
@@ -820,12 +814,14 @@ ENTRY(aesbs_ctr_encrypt)
 	bl		aesbs_encrypt8
 
 	adr		ip, 1f
-	and		lr, r5, #7
-	cmp		r5, #8
-	movgt		r4, #0
-	ldrle		r4, [sp, #40]		// load final in the last round
-	sub		ip, ip, lr, lsl #2
-	movlt		pc, ip			// computed goto if blocks < 8
+	sub		lr, r5, #1
+	cmp		r5, #128
+	bic		lr, lr, #15
+	ands		r4, r5, #15		// preserves C flag
+	teqcs		r5, r5			// set Z flag if not last iteration
+	sub		ip, ip, lr, lsr #2
+	rsb		r4, r4, #16
+	movcc		pc, ip			// computed goto if bytes < 128
 
 	vld1.8		{q8}, [r1]!
 	vld1.8		{q9}, [r1]!
@@ -834,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt)
 	vld1.8		{q12}, [r1]!
 	vld1.8		{q13}, [r1]!
 	vld1.8		{q14}, [r1]!
-	teq		r4, #0			// skip last block if 'final'
-1:	bne		2f
+1:	subne		r1, r1, r4
 	vld1.8		{q15}, [r1]!
 
-2:	adr		ip, 3f
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #3
-	movlt		pc, ip			// computed goto if blocks < 8
+	add		ip, ip, #2f - 1b
 
 	veor		q0, q0, q8
-	vst1.8		{q0}, [r0]!
 	veor		q1, q1, q9
-	vst1.8		{q1}, [r0]!
 	veor		q4, q4, q10
-	vst1.8		{q4}, [r0]!
 	veor		q6, q6, q11
-	vst1.8		{q6}, [r0]!
 	veor		q3, q3, q12
-	vst1.8		{q3}, [r0]!
 	veor		q7, q7, q13
-	vst1.8		{q7}, [r0]!
 	veor		q2, q2, q14
+	bne		3f
+	veor		q5, q5, q15
+
+	movcc		pc, ip			// computed goto if bytes < 128
+
+	vst1.8		{q0}, [r0]!
+	vst1.8		{q1}, [r0]!
+	vst1.8		{q4}, [r0]!
+	vst1.8		{q6}, [r0]!
+	vst1.8		{q3}, [r0]!
+	vst1.8		{q7}, [r0]!
 	vst1.8		{q2}, [r0]!
-	teq		r4, #0			// skip last block if 'final'
-	W(bne)		5f
-3:	veor		q5, q5, q15
+2:	subne		r0, r0, r4
 	vst1.8		{q5}, [r0]!
 
-4:	next_ctr	q0
+	next_ctr	q0
 
-	subs		r5, r5, #8
+	subs		r5, r5, #128
 	bgt		99b
 
 	vst1.8		{q0}, [r6]
 	pop		{r4-r10, pc}
 
-5:	vst1.8		{q5}, [r4]
-	b		4b
+3:	adr		lr, .Lpermute_table + 16
+	cmp		r5, #16			// Z flag remains cleared
+	sub		lr, lr, r4
+	vld1.8		{q8-q9}, [lr]
+	vtbl.8		d16, {q5}, d16
+	vtbl.8		d17, {q5}, d17
+	veor		q5, q8, q15
+	bcc		4f			// have to reload prev if R5 < 16
+	vtbx.8		d10, {q2}, d18
+	vtbx.8		d11, {q2}, d19
+	mov		pc, ip			// branch back to VST sequence
+
+4:	sub		r0, r0, r4
+	vshr.s8		q9, q9, #7		// create mask for VBIF
+	vld1.8		{q8}, [r0]		// reload
+	vbif		q5, q8, q9
+	vst1.8		{q5}, [r0]
+	pop		{r4-r10, pc}
 ENDPROC(aesbs_ctr_encrypt)
 
+	.align		6
+.Lpermute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
 	.macro		next_tweak, out, in, const, tmp
 	vshr.s64	\tmp, \in, #63
 	vand		\tmp, \tmp, \const
@@ -888,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[], int reorder_last_tweak)
 	 */
+	.align		6
 __xts_prepare8:
 	vld1.8		{q14}, [r7]		// load iv
 	vmov.i32	d30, #0x87		// compose tweak mask vector
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 5c6cd3c63cbc1..f00f042ef3570 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -37,7 +37,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 ctr[], u8 final[]);
+				  int rounds, int blocks, u8 ctr[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[], int);
@@ -243,32 +243,25 @@ static int ctr_encrypt(struct skcipher_request *req)
 	err = skcipher_walk_virt(&walk, req, false);
 
 	while (walk.nbytes > 0) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		int bytes = walk.nbytes;
 
-		if (walk.nbytes < walk.total) {
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-			final = NULL;
-		}
+		if (unlikely(bytes < AES_BLOCK_SIZE))
+			src = dst = memcpy(buf + sizeof(buf) - bytes,
+					   src, bytes);
+		else if (walk.nbytes < walk.total)
+			bytes &= ~(8 * AES_BLOCK_SIZE - 1);
 
 		kernel_neon_begin();
-		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv);
 		kernel_neon_end();
 
-		if (final) {
-			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		if (unlikely(bytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr,
+			       buf + sizeof(buf) - bytes, bytes);
 
-			crypto_xor_cpy(dst, src, final,
-				       walk.total % AES_BLOCK_SIZE);
-
-			err = skcipher_walk_done(&walk, 0);
-			break;
-		}
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes - bytes);
 	}
 
 	return err;
-- 
GitLab


From fc074e130051015e39245a4241956ff122e2f465 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 27 Jan 2022 12:35:44 +0100
Subject: [PATCH 0262/1586] crypto: arm64/aes-neonbs-ctr - fallback to plain
 NEON for final chunk

Instead of processing the entire input with the 8-way bit sliced
algorithm, which is sub-optimal for inputs that are not a multiple of
128 bytes in size, invoke the plain NEON version of CTR for the
remainder of the input after processing the bulk using 128 byte strides.

This allows us to greatly simplify the asm code that implements CTR, and
get rid of all the branches and special code paths. It also gains us a
couple of percent of performance.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-glue.c        |   1 +
 arch/arm64/crypto/aes-neonbs-core.S | 132 ++++++----------------------
 arch/arm64/crypto/aes-neonbs-glue.c |  64 ++++++--------
 3 files changed, 55 insertions(+), 142 deletions(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 7d66f8babb1d6..561dd23325711 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -976,6 +976,7 @@ module_cpu_feature_match(AES, aes_init);
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
 EXPORT_SYMBOL(neon_aes_cbc_encrypt);
+EXPORT_SYMBOL(neon_aes_ctr_encrypt);
 EXPORT_SYMBOL(neon_aes_xts_encrypt);
 EXPORT_SYMBOL(neon_aes_xts_decrypt);
 #endif
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index a3405b8c344b5..f2761481181d4 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -869,133 +869,51 @@ SYM_FUNC_END(aesbs_xts_decrypt)
 
 	/*
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 iv[], u8 final[])
+	 *		     int rounds, int blocks, u8 iv[])
 	 */
 SYM_FUNC_START(aesbs_ctr_encrypt)
-	frame_push	8
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-	mov		x25, x6
-
-	cmp		x25, #0
-	cset		x26, ne
-	add		x23, x23, x26		// do one extra block if final
-
-	ldp		x7, x8, [x24]
-	ld1		{v0.16b}, [x24]
+	ldp		x7, x8, [x5]
+	ld1		{v0.16b}, [x5]
 CPU_LE(	rev		x7, x7		)
 CPU_LE(	rev		x8, x8		)
 	adds		x8, x8, #1
 	adc		x7, x7, xzr
 
-99:	mov		x9, #1
-	lsl		x9, x9, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x9, x9, xzr, le
-
-	tbnz		x9, #1, 0f
-	next_ctr	v1
-	tbnz		x9, #2, 0f
+0:	next_ctr	v1
 	next_ctr	v2
-	tbnz		x9, #3, 0f
 	next_ctr	v3
-	tbnz		x9, #4, 0f
 	next_ctr	v4
-	tbnz		x9, #5, 0f
 	next_ctr	v5
-	tbnz		x9, #6, 0f
 	next_ctr	v6
-	tbnz		x9, #7, 0f
 	next_ctr	v7
 
-0:	mov		bskey, x21
-	mov		rounds, x22
+	mov		bskey, x2
+	mov		rounds, x3
 	bl		aesbs_encrypt8
 
-	lsr		x9, x9, x26		// disregard the extra block
-	tbnz		x9, #0, 0f
-
-	ld1		{v8.16b}, [x20], #16
-	eor		v0.16b, v0.16b, v8.16b
-	st1		{v0.16b}, [x19], #16
-	tbnz		x9, #1, 1f
-
-	ld1		{v9.16b}, [x20], #16
-	eor		v1.16b, v1.16b, v9.16b
-	st1		{v1.16b}, [x19], #16
-	tbnz		x9, #2, 2f
-
-	ld1		{v10.16b}, [x20], #16
-	eor		v4.16b, v4.16b, v10.16b
-	st1		{v4.16b}, [x19], #16
-	tbnz		x9, #3, 3f
+	ld1		{ v8.16b-v11.16b}, [x1], #64
+	ld1		{v12.16b-v15.16b}, [x1], #64
 
-	ld1		{v11.16b}, [x20], #16
-	eor		v6.16b, v6.16b, v11.16b
-	st1		{v6.16b}, [x19], #16
-	tbnz		x9, #4, 4f
+	eor		v8.16b, v0.16b, v8.16b
+	eor		v9.16b, v1.16b, v9.16b
+	eor		v10.16b, v4.16b, v10.16b
+	eor		v11.16b, v6.16b, v11.16b
+	eor		v12.16b, v3.16b, v12.16b
+	eor		v13.16b, v7.16b, v13.16b
+	eor		v14.16b, v2.16b, v14.16b
+	eor		v15.16b, v5.16b, v15.16b
 
-	ld1		{v12.16b}, [x20], #16
-	eor		v3.16b, v3.16b, v12.16b
-	st1		{v3.16b}, [x19], #16
-	tbnz		x9, #5, 5f
+	st1		{ v8.16b-v11.16b}, [x0], #64
+	st1		{v12.16b-v15.16b}, [x0], #64
 
-	ld1		{v13.16b}, [x20], #16
-	eor		v7.16b, v7.16b, v13.16b
-	st1		{v7.16b}, [x19], #16
-	tbnz		x9, #6, 6f
-
-	ld1		{v14.16b}, [x20], #16
-	eor		v2.16b, v2.16b, v14.16b
-	st1		{v2.16b}, [x19], #16
-	tbnz		x9, #7, 7f
+	next_ctr	v0
+	subs		x4, x4, #8
+	b.gt		0b
 
-	ld1		{v15.16b}, [x20], #16
-	eor		v5.16b, v5.16b, v15.16b
-	st1		{v5.16b}, [x19], #16
-
-8:	next_ctr	v0
-	st1		{v0.16b}, [x24]
-	cbz		x23, .Lctr_done
-
-	b		99b
-
-.Lctr_done:
-	frame_pop
+	st1		{v0.16b}, [x5]
+	ldp		x29, x30, [sp], #16
 	ret
-
-	/*
-	 * If we are handling the tail of the input (x6 != NULL), return the
-	 * final keystream block back to the caller.
-	 */
-0:	cbz		x25, 8b
-	st1		{v0.16b}, [x25]
-	b		8b
-1:	cbz		x25, 8b
-	st1		{v1.16b}, [x25]
-	b		8b
-2:	cbz		x25, 8b
-	st1		{v4.16b}, [x25]
-	b		8b
-3:	cbz		x25, 8b
-	st1		{v6.16b}, [x25]
-	b		8b
-4:	cbz		x25, 8b
-	st1		{v3.16b}, [x25]
-	b		8b
-5:	cbz		x25, 8b
-	st1		{v7.16b}, [x25]
-	b		8b
-6:	cbz		x25, 8b
-	st1		{v2.16b}, [x25]
-	b		8b
-7:	cbz		x25, 8b
-	st1		{v5.16b}, [x25]
-	b		8b
 SYM_FUNC_END(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 8df6ad8cb09d6..3189003e1cbe5 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[], u8 final[]);
+				  int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]);
@@ -46,6 +46,8 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				     int rounds, int blocks, u8 iv[]);
+asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
+				     int rounds, int bytes, u8 ctr[]);
 asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
 				     u32 const rk1[], int rounds, int bytes,
 				     u32 const rk2[], u8 iv[], int first);
@@ -58,7 +60,7 @@ struct aesbs_ctx {
 	int	rounds;
 } __aligned(AES_BLOCK_SIZE);
 
-struct aesbs_cbc_ctx {
+struct aesbs_cbc_ctr_ctx {
 	struct aesbs_ctx	key;
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
@@ -128,10 +130,10 @@ static int ecb_decrypt(struct skcipher_request *req)
 	return __ecb_crypt(req, aesbs_ecb_decrypt);
 }
 
-static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx rk;
 	int err;
 
@@ -154,7 +156,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 static int cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	int err;
 
@@ -177,7 +179,7 @@ static int cbc_encrypt(struct skcipher_request *req)
 static int cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	int err;
 
@@ -205,40 +207,32 @@ static int cbc_decrypt(struct skcipher_request *req)
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
 	err = skcipher_walk_virt(&walk, req, false);
 
 	while (walk.nbytes > 0) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
-
-		if (walk.nbytes < walk.total) {
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-			final = NULL;
-		}
+		int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
+		int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE);
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
 
 		kernel_neon_begin();
-		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
-		kernel_neon_end();
-
-		if (final) {
-			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
-
-			crypto_xor_cpy(dst, src, final,
-				       walk.total % AES_BLOCK_SIZE);
-
-			err = skcipher_walk_done(&walk, 0);
-			break;
+		if (blocks >= 8) {
+			aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
+					  blocks, walk.iv);
+			dst += blocks * AES_BLOCK_SIZE;
+			src += blocks * AES_BLOCK_SIZE;
 		}
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+		if (nbytes && walk.nbytes == walk.total) {
+			neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
+					     nbytes, walk.iv);
+			nbytes = 0;
+		}
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	return err;
 }
@@ -402,14 +396,14 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "cbc-aes-neonbs",
 	.base.cra_priority	= 250,
 	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
 	.max_keysize		= AES_MAX_KEY_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_cbc_setkey,
+	.setkey			= aesbs_cbc_ctr_setkey,
 	.encrypt		= cbc_encrypt,
 	.decrypt		= cbc_decrypt,
 }, {
@@ -417,7 +411,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -425,7 +419,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
+	.setkey			= aesbs_cbc_ctr_setkey,
 	.encrypt		= ctr_encrypt,
 	.decrypt		= ctr_encrypt,
 }, {
-- 
GitLab


From dfc6031ec917b7c34a7549c3120f841b2e2be6db Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 27 Jan 2022 12:35:45 +0100
Subject: [PATCH 0263/1586] crypto: arm64/aes-neonbs-xts - use plain NEON for
 non-power-of-2 input sizes

Even though the kernel's implementations of AES-XTS were updated to
implement ciphertext stealing and can operate on inputs of any size
larger than or equal to the AES block size, this feature is rarely used
in practice.

In fact, in the kernel, AES-XTS is only used to operate on 4096 or 512
byte blocks, which means that not only the ciphertext stealing is
effectively dead code, the logic in the bit sliced NEON implementation
to deal with fewer than 8 blocks at a time is also never used.

Since the bit-sliced NEON driver already depends on the plain NEON
version, which is slower but can operate on smaller data quantities more
straightforwardly, let's fallback to the plain NEON implementation of
XTS for any residual inputs that are not multiples of 128 bytes. This
allows us to remove a lot of complicated logic that rarely gets
exercised in practice.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-neonbs-core.S | 132 +++++++++-------------------
 arch/arm64/crypto/aes-neonbs-glue.c |  33 +++----
 2 files changed, 57 insertions(+), 108 deletions(-)

diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index f2761481181d4..d427f4556b6eb 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -735,119 +735,67 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
 	 *		     int blocks, u8 iv[])
 	 */
 SYM_FUNC_START_LOCAL(__xts_crypt8)
-	mov		x6, #1
-	lsl		x6, x6, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x6, x6, xzr, mi
+	movi		v18.2s, #0x1
+	movi		v19.2s, #0x87
+	uzp1		v18.4s, v18.4s, v19.4s
+
+	ld1		{v0.16b-v3.16b}, [x1], #64
+	ld1		{v4.16b-v7.16b}, [x1], #64
+
+	next_tweak	v26, v25, v18, v19
+	next_tweak	v27, v26, v18, v19
+	next_tweak	v28, v27, v18, v19
+	next_tweak	v29, v28, v18, v19
+	next_tweak	v30, v29, v18, v19
+	next_tweak	v31, v30, v18, v19
+	next_tweak	v16, v31, v18, v19
+	next_tweak	v17, v16, v18, v19
 
-	ld1		{v0.16b}, [x20], #16
-	next_tweak	v26, v25, v30, v31
 	eor		v0.16b, v0.16b, v25.16b
-	tbnz		x6, #1, 0f
-
-	ld1		{v1.16b}, [x20], #16
-	next_tweak	v27, v26, v30, v31
 	eor		v1.16b, v1.16b, v26.16b
-	tbnz		x6, #2, 0f
-
-	ld1		{v2.16b}, [x20], #16
-	next_tweak	v28, v27, v30, v31
 	eor		v2.16b, v2.16b, v27.16b
-	tbnz		x6, #3, 0f
-
-	ld1		{v3.16b}, [x20], #16
-	next_tweak	v29, v28, v30, v31
 	eor		v3.16b, v3.16b, v28.16b
-	tbnz		x6, #4, 0f
-
-	ld1		{v4.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset]
 	eor		v4.16b, v4.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #5, 0f
-
-	ld1		{v5.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 16]
-	eor		v5.16b, v5.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #6, 0f
+	eor		v5.16b, v5.16b, v30.16b
+	eor		v6.16b, v6.16b, v31.16b
+	eor		v7.16b, v7.16b, v16.16b
 
-	ld1		{v6.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 32]
-	eor		v6.16b, v6.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #7, 0f
+	stp		q16, q17, [sp, #16]
 
-	ld1		{v7.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 48]
-	eor		v7.16b, v7.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-
-0:	mov		bskey, x21
-	mov		rounds, x22
+	mov		bskey, x2
+	mov		rounds, x3
 	br		x16
 SYM_FUNC_END(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	frame_push	6, 64
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
+	stp		x29, x30, [sp, #-48]!
+	mov		x29, sp
 
-	movi		v30.2s, #0x1
-	movi		v25.2s, #0x87
-	uzp1		v30.4s, v30.4s, v25.4s
-	ld1		{v25.16b}, [x24]
+	ld1		{v25.16b}, [x5]
 
-99:	adr		x16, \do8
+0:	adr		x16, \do8
 	bl		__xts_crypt8
 
-	ldp		q16, q17, [sp, #.Lframe_local_offset]
-	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
+	eor		v16.16b, \o0\().16b, v25.16b
+	eor		v17.16b, \o1\().16b, v26.16b
+	eor		v18.16b, \o2\().16b, v27.16b
+	eor		v19.16b, \o3\().16b, v28.16b
 
-	eor		\o0\().16b, \o0\().16b, v25.16b
-	eor		\o1\().16b, \o1\().16b, v26.16b
-	eor		\o2\().16b, \o2\().16b, v27.16b
-	eor		\o3\().16b, \o3\().16b, v28.16b
+	ldp		q24, q25, [sp, #16]
 
-	st1		{\o0\().16b}, [x19], #16
-	mov		v25.16b, v26.16b
-	tbnz		x6, #1, 1f
-	st1		{\o1\().16b}, [x19], #16
-	mov		v25.16b, v27.16b
-	tbnz		x6, #2, 1f
-	st1		{\o2\().16b}, [x19], #16
-	mov		v25.16b, v28.16b
-	tbnz		x6, #3, 1f
-	st1		{\o3\().16b}, [x19], #16
-	mov		v25.16b, v29.16b
-	tbnz		x6, #4, 1f
-
-	eor		\o4\().16b, \o4\().16b, v16.16b
-	eor		\o5\().16b, \o5\().16b, v17.16b
-	eor		\o6\().16b, \o6\().16b, v18.16b
-	eor		\o7\().16b, \o7\().16b, v19.16b
-
-	st1		{\o4\().16b}, [x19], #16
-	tbnz		x6, #5, 1f
-	st1		{\o5\().16b}, [x19], #16
-	tbnz		x6, #6, 1f
-	st1		{\o6\().16b}, [x19], #16
-	tbnz		x6, #7, 1f
-	st1		{\o7\().16b}, [x19], #16
+	eor		v20.16b, \o4\().16b, v29.16b
+	eor		v21.16b, \o5\().16b, v30.16b
+	eor		v22.16b, \o6\().16b, v31.16b
+	eor		v23.16b, \o7\().16b, v24.16b
 
-	cbz		x23, 1f
-	st1		{v25.16b}, [x24]
+	st1		{v16.16b-v19.16b}, [x0], #64
+	st1		{v20.16b-v23.16b}, [x0], #64
 
-	b		99b
+	subs		x4, x4, #8
+	b.gt		0b
 
-1:	st1		{v25.16b}, [x24]
-	frame_pop
+	st1		{v25.16b}, [x5]
+	ldp		x29, x30, [sp], #48
 	ret
 	.endm
 
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 3189003e1cbe5..bac4cabef6073 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -302,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 		return err;
 
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
-		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
-
-		if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
-			blocks = round_down(blocks,
-					    walk.stride / AES_BLOCK_SIZE);
-
+		int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
 		out = walk.dst.virt.addr;
 		in = walk.src.virt.addr;
 		nbytes = walk.nbytes;
 
 		kernel_neon_begin();
-		if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
-			if (first)
+		if (blocks >= 8) {
+			if (first == 1)
 				neon_aes_ecb_encrypt(walk.iv, walk.iv,
 						     ctx->twkey,
 						     ctx->key.rounds, 1);
-			first = 0;
+			first = 2;
 
 			fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
 			   walk.iv);
@@ -327,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 			in += blocks * AES_BLOCK_SIZE;
 			nbytes -= blocks * AES_BLOCK_SIZE;
 		}
-
-		if (walk.nbytes == walk.total && nbytes > 0)
-			goto xts_tail;
-
+		if (walk.nbytes == walk.total && nbytes > 0) {
+			if (encrypt)
+				neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+						     ctx->key.rounds, nbytes,
+						     ctx->twkey, walk.iv, first);
+			else
+				neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+						     ctx->key.rounds, nbytes,
+						     ctx->twkey, walk.iv, first);
+			nbytes = first = 0;
+		}
 		kernel_neon_end();
 		err = skcipher_walk_done(&walk, nbytes);
 	}
@@ -355,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 	nbytes = walk.nbytes;
 
 	kernel_neon_begin();
-xts_tail:
 	if (encrypt)
 		neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
-				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+				     nbytes, ctx->twkey, walk.iv, first);
 	else
 		neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
-				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+				     nbytes, ctx->twkey, walk.iv, first);
 	kernel_neon_end();
 
 	return skcipher_walk_done(&walk, 0);
-- 
GitLab


From a43bed8220f26178f0f63bf4c0fc23b7b8b76763 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 28 Jan 2022 08:02:54 +0100
Subject: [PATCH 0264/1586] hwrng: core - credit entropy for low quality
 sources of randomness

In case the entropy quality is low, there may be less than one bit to
credit in the call to add_hwgenerator_randomness(): The number of bytes
returned by rng_get_data() multiplied by the current quality (in entropy
bits per 1024 bits of input) must be larger than 128 to credit at least
one bit. However, imx-rngc.c sets the quality to 19, but may return less
than 32 bytes; hid_u2fzero.c sets the quality to 1; and users may override
the quality setting manually.

In case there is less than one bit to credit, keep track of it and add
that credit to the next iteration.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index bc9f95cbac921..f327f7493585e 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -424,6 +424,7 @@ static int __init register_miscdev(void)
 
 static int hwrng_fillfn(void *unused)
 {
+	size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */
 	long rc;
 
 	while (!kthread_should_stop()) {
@@ -445,9 +446,17 @@ static int hwrng_fillfn(void *unused)
 			msleep_interruptible(10000);
 			continue;
 		}
+
+		/* If we cannot credit at least one bit of entropy,
+		 * keep track of the remainder for the next iteration
+		 */
+		entropy = rc * current_quality * 8 + entropy_credit;
+		if ((entropy >> 10) == 0)
+			entropy_credit = entropy;
+
 		/* Outside lock, sure, but y'know: randomness. */
 		add_hwgenerator_randomness((void *)rng_fillbuf, rc,
-					   rc * current_quality * 8 >> 10);
+					   entropy >> 10);
 	}
 	hwrng_fill = NULL;
 	return 0;
-- 
GitLab


From 16d20a08f15ee6ad9a4c1e17ede613927d1c553e Mon Sep 17 00:00:00 2001
From: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
Date: Fri, 28 Jan 2022 12:47:55 +0530
Subject: [PATCH 0265/1586] crypto: atmel - add support for AES and SHA IPs
 available on lan966x SoC

This patch adds support for hardware version of AES and SHA IPs
available on lan966x SoC.

Signed-off-by: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Tested-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-aes.c | 1 +
 drivers/crypto/atmel-sha.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index fe05584031914..f72c6b3e4ad81 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2509,6 +2509,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xff0) {
+	case 0x700:
 	case 0x500:
 		dd->caps.has_dualbuff = 1;
 		dd->caps.has_cfb64 = 1;
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 1b13f601fd959..d1628112dacc1 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2508,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xff0) {
+	case 0x700:
 	case 0x510:
 		dd->caps.has_dma = 1;
 		dd->caps.has_dualbuff = 1;
-- 
GitLab


From 388ac25efc8ce3bf9768ce7bf24268d6fac285d5 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 28 Jan 2022 16:38:35 +0800
Subject: [PATCH 0266/1586] crypto: tcrypt - remove all multibuffer ahash tests

The multibuffer algorithms was removed already in 2018, so it is
necessary to clear the test code left by tcrypt.

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 224 ------------------------------------------------
 1 file changed, 224 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 82b5eef2246a2..2a808e843de52 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -724,200 +724,6 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 	return crypto_wait_req(ret, wait);
 }
 
-struct test_mb_ahash_data {
-	struct scatterlist sg[XBUFSIZE];
-	char result[64];
-	struct ahash_request *req;
-	struct crypto_wait wait;
-	char *xbuf[XBUFSIZE];
-};
-
-static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
-				   int *rc)
-{
-	int i, err = 0;
-
-	/* Fire up a bunch of concurrent requests */
-	for (i = 0; i < num_mb; i++)
-		rc[i] = crypto_ahash_digest(data[i].req);
-
-	/* Wait for all requests to finish */
-	for (i = 0; i < num_mb; i++) {
-		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
-
-		if (rc[i]) {
-			pr_info("concurrent request %d error %d\n", i, rc[i]);
-			err = rc[i];
-		}
-	}
-
-	return err;
-}
-
-static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
-				 int secs, u32 num_mb)
-{
-	unsigned long start, end;
-	int bcount;
-	int ret = 0;
-	int *rc;
-
-	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
-	if (!rc)
-		return -ENOMEM;
-
-	for (start = jiffies, end = start + secs * HZ, bcount = 0;
-	     time_before(jiffies, end); bcount++) {
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		if (ret)
-			goto out;
-	}
-
-	pr_cont("%d operations in %d seconds (%llu bytes)\n",
-		bcount * num_mb, secs, (u64)bcount * blen * num_mb);
-
-out:
-	kfree(rc);
-	return ret;
-}
-
-static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
-				u32 num_mb)
-{
-	unsigned long cycles = 0;
-	int ret = 0;
-	int i;
-	int *rc;
-
-	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
-	if (!rc)
-		return -ENOMEM;
-
-	/* Warm-up run. */
-	for (i = 0; i < 4; i++) {
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		if (ret)
-			goto out;
-	}
-
-	/* The real thing. */
-	for (i = 0; i < 8; i++) {
-		cycles_t start, end;
-
-		start = get_cycles();
-		ret = do_mult_ahash_op(data, num_mb, rc);
-		end = get_cycles();
-
-		if (ret)
-			goto out;
-
-		cycles += end - start;
-	}
-
-	pr_cont("1 operation in %lu cycles (%d bytes)\n",
-		(cycles + 4) / (8 * num_mb), blen);
-
-out:
-	kfree(rc);
-	return ret;
-}
-
-static void test_mb_ahash_speed(const char *algo, unsigned int secs,
-				struct hash_speed *speed, u32 num_mb)
-{
-	struct test_mb_ahash_data *data;
-	struct crypto_ahash *tfm;
-	unsigned int i, j, k;
-	int ret;
-
-	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return;
-
-	tfm = crypto_alloc_ahash(algo, 0, 0);
-	if (IS_ERR(tfm)) {
-		pr_err("failed to load transform for %s: %ld\n",
-			algo, PTR_ERR(tfm));
-		goto free_data;
-	}
-
-	for (i = 0; i < num_mb; ++i) {
-		if (testmgr_alloc_buf(data[i].xbuf))
-			goto out;
-
-		crypto_init_wait(&data[i].wait);
-
-		data[i].req = ahash_request_alloc(tfm, GFP_KERNEL);
-		if (!data[i].req) {
-			pr_err("alg: hash: Failed to allocate request for %s\n",
-			       algo);
-			goto out;
-		}
-
-		ahash_request_set_callback(data[i].req, 0, crypto_req_done,
-					   &data[i].wait);
-
-		sg_init_table(data[i].sg, XBUFSIZE);
-		for (j = 0; j < XBUFSIZE; j++) {
-			sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
-			memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
-		}
-	}
-
-	pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
-		get_driver_name(crypto_ahash, tfm));
-
-	for (i = 0; speed[i].blen != 0; i++) {
-		/* For some reason this only tests digests. */
-		if (speed[i].blen != speed[i].plen)
-			continue;
-
-		if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
-			pr_err("template (%u) too big for tvmem (%lu)\n",
-			       speed[i].blen, XBUFSIZE * PAGE_SIZE);
-			goto out;
-		}
-
-		if (klen)
-			crypto_ahash_setkey(tfm, tvmem[0], klen);
-
-		for (k = 0; k < num_mb; k++)
-			ahash_request_set_crypt(data[k].req, data[k].sg,
-						data[k].result, speed[i].blen);
-
-		pr_info("test%3u "
-			"(%5u byte blocks,%5u bytes per update,%4u updates): ",
-			i, speed[i].blen, speed[i].plen,
-			speed[i].blen / speed[i].plen);
-
-		if (secs) {
-			ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
-						    num_mb);
-			cond_resched();
-		} else {
-			ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
-		}
-
-
-		if (ret) {
-			pr_err("At least one hashing failed ret=%d\n", ret);
-			break;
-		}
-	}
-
-out:
-	for (k = 0; k < num_mb; ++k)
-		ahash_request_free(data[k].req);
-
-	for (k = 0; k < num_mb; ++k)
-		testmgr_free_buf(data[k].xbuf);
-
-	crypto_free_ahash(tfm);
-
-free_data:
-	kfree(data);
-}
-
 static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
 				     char *out, int secs)
 {
@@ -2574,36 +2380,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_ahash_speed("sm3", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 450:
-		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 451:
-		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 452:
-		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 453:
-		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
-				    num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 454:
-		test_mb_ahash_speed("streebog256", sec,
-				    generic_hash_speed_template, num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
-	case 455:
-		test_mb_ahash_speed("streebog512", sec,
-				    generic_hash_speed_template, num_mb);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
 	case 499:
 		break;
 
-- 
GitLab


From 2d841af23ae8f398c85dd1ff2dc24b5ec8ba4569 Mon Sep 17 00:00:00 2001
From: Shijith Thotton <sthotton@marvell.com>
Date: Fri, 28 Jan 2022 19:27:42 +0530
Subject: [PATCH 0267/1586] crypto: octeontx2 - remove CONFIG_DM_CRYPT check

No issues were found while using the driver with dm-crypt enabled. So
CONFIG_DM_CRYPT check in the driver can be removed.

This also fixes the NULL pointer dereference in driver release if
CONFIG_DM_CRYPT is enabled.

...
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
...
Call trace:
 crypto_unregister_alg+0x68/0xfc
 crypto_unregister_skciphers+0x44/0x60
 otx2_cpt_crypto_exit+0x100/0x1a0
 otx2_cptvf_remove+0xf8/0x200
 pci_device_remove+0x3c/0xd4
 __device_release_driver+0x188/0x234
 device_release_driver+0x2c/0x4c
...

Fixes: 6f03f0e8b6c8 ("crypto: octeontx2 - register with linux crypto framework")
Signed-off-by: Shijith Thotton <sthotton@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/marvell/octeontx2/otx2_cptvf_algs.c  | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
index 2748a3327e391..f8f8542ce3e47 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
@@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void)
 {
 	int i, err = 0;
 
-	if (!IS_ENABLED(CONFIG_DM_CRYPT)) {
-		for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
-			otx2_cpt_skciphers[i].base.cra_flags &=
-							~CRYPTO_ALG_DEAD;
-
-		err = crypto_register_skciphers(otx2_cpt_skciphers,
-						ARRAY_SIZE(otx2_cpt_skciphers));
-		if (err)
-			return err;
-	}
+	for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++)
+		otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
+
+	err = crypto_register_skciphers(otx2_cpt_skciphers,
+					ARRAY_SIZE(otx2_cpt_skciphers));
+	if (err)
+		return err;
 
 	for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++)
 		otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD;
-- 
GitLab


From c717993dd76a1049093af5c262e751d901b8da10 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Sun, 30 Jan 2022 22:53:25 +0000
Subject: [PATCH 0268/1586] crypto: marvell/octeontx - remove redundant
 initialization of variable c_size

Variable c_size is being initialized with a value that is never read, it
is being re-assigned with a different value later on. The initialization
is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx/otx_cptvf_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
index b681bd2dc6add..36d72e35ebeb6 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -204,7 +204,6 @@ static int alloc_command_queues(struct otx_cptvf *cptvf,
 
 	/* per queue initialization */
 	for (i = 0; i < cptvf->num_queues; i++) {
-		c_size = 0;
 		rem_q_size = q_size;
 		first = NULL;
 		last = NULL;
-- 
GitLab


From 95e26b0391d085bbdbe1a82ffaaf2f92a3f71433 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Sun, 30 Jan 2022 23:05:15 +0000
Subject: [PATCH 0269/1586] crypto: sl3516 - remove redundant initializations
 of pointers in_sg and out_sg

Pointers in_sg and out_sg are being initialized with values that are
never read, they are being re-assigned the same values later on. The
initializations are redundant, remove them in preference to the later
assignments that are closer to when the pointers are being used.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/gemini/sl3516-ce-cipher.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index c1c2b1d866639..53e3fefb81de5 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -23,8 +23,8 @@ static bool sl3516_ce_need_fallback(struct skcipher_request *areq)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
 	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sl3516_ce_dev *ce = op->ce;
-	struct scatterlist *in_sg = areq->src;
-	struct scatterlist *out_sg = areq->dst;
+	struct scatterlist *in_sg;
+	struct scatterlist *out_sg;
 	struct scatterlist *sg;
 
 	if (areq->cryptlen == 0 || areq->cryptlen % 16) {
-- 
GitLab


From f154066b61dfde618d98fdafc8cadde076c7f222 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 6 Feb 2022 09:20:08 -0800
Subject: [PATCH 0270/1586] gcc-plugins/stackleak: Provide verbose mode

In order to compare instrumentation between builds, make the verbose
mode of the plugin available during the build. This is rarely needed
(behind EXPERT) and very noisy (disabled for COMPILE_TEST).

Cc: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/Makefile.gcc-plugins |  2 ++
 security/Kconfig.hardening   | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 1d16ca1b78c91..f67153b260c08 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -37,6 +37,8 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)		\
 		+= -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)		\
 		+= -fplugin-arg-stackleak_plugin-arch=$(SRCARCH)
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK_VERBOSE)	\
+		+= -fplugin-arg-stackleak_plugin-verbose
 ifdef CONFIG_GCC_PLUGIN_STACKLEAK
     DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
 endif
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index d051f8ceefddd..ded4d7c0d1322 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -174,6 +174,16 @@ config GCC_PLUGIN_STACKLEAK
 	   * https://grsecurity.net/
 	   * https://pax.grsecurity.net/
 
+config GCC_PLUGIN_STACKLEAK_VERBOSE
+	bool "Report stack depth analysis instrumentation" if EXPERT
+	depends on GCC_PLUGIN_STACKLEAK
+	depends on !COMPILE_TEST	# too noisy
+	help
+	  This option will cause a warning to be printed each time the
+	  stackleak plugin finds a function it thinks needs to be
+	  instrumented. This is useful for comparing coverage between
+	  builds.
+
 config STACKLEAK_TRACK_MIN_SIZE
 	int "Minimum stack frame size of functions tracked by STACKLEAK"
 	default 100
-- 
GitLab


From 27e9faf415dbf94af19b9c827842435edbc1fbbc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 6 Feb 2022 09:08:20 -0800
Subject: [PATCH 0271/1586] gcc-plugins/stackleak: Exactly match strings
 instead of prefixes

Since STRING_CST may not be NUL terminated, strncmp() was used for check
for equality. However, this may lead to mismatches for longer section
names where the start matches the tested-for string. Test for exact
equality by checking for the presences of NUL termination.

Cc: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/stackleak_plugin.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index e9db7dcb3e5f4..b04aa8e91a41f 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void)
 	return 0;
 }
 
+/*
+ * STRING_CST may or may not be NUL terminated:
+ * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html
+ */
+static inline bool string_equal(tree node, const char *string, int length)
+{
+	if (TREE_STRING_LENGTH(node) < length)
+		return false;
+	if (TREE_STRING_LENGTH(node) > length + 1)
+		return false;
+	if (TREE_STRING_LENGTH(node) == length + 1 &&
+	    TREE_STRING_POINTER(node)[length] != '\0')
+		return false;
+	return !memcmp(TREE_STRING_POINTER(node), string, length);
+}
+#define STRING_EQUAL(node, str)	string_equal(node, str, strlen(str))
+
 static bool stackleak_gate(void)
 {
 	tree section;
@@ -438,13 +455,13 @@ static bool stackleak_gate(void)
 	if (section && TREE_VALUE(section)) {
 		section = TREE_VALUE(TREE_VALUE(section));
 
-		if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
+		if (STRING_EQUAL(section, ".init.text"))
 			return false;
-		if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
+		if (STRING_EQUAL(section, ".devinit.text"))
 			return false;
-		if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
+		if (STRING_EQUAL(section, ".cpuinit.text"))
 			return false;
-		if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
+		if (STRING_EQUAL(section, ".meminit.text"))
 			return false;
 	}
 
-- 
GitLab


From ae978009fc013e3166c9f523f8b17e41a3c0286e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 6 Feb 2022 09:12:50 -0800
Subject: [PATCH 0272/1586] gcc-plugins/stackleak: Ignore .noinstr.text and
 .entry.text

The .noinstr.text section functions may not have "current()" sanely
available. Similarly true for .entry.text, though such a check is
currently redundant. Add a check for both. In an x86_64 defconfig build,
the following functions no longer receive stackleak instrumentation:

	__do_fast_syscall_32()
	do_int80_syscall_32()
	do_machine_check()
	do_syscall_64()
	exc_general_protection()
	fixup_bad_iret()

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/stackleak_plugin.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index b04aa8e91a41f..42f0252ee2a4e 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -463,6 +463,10 @@ static bool stackleak_gate(void)
 			return false;
 		if (STRING_EQUAL(section, ".meminit.text"))
 			return false;
+		if (STRING_EQUAL(section, ".noinstr.text"))
+			return false;
+		if (STRING_EQUAL(section, ".entry.text"))
+			return false;
 	}
 
 	return track_frame_size >= 0;
-- 
GitLab


From 5a6bbd1d18cabf5a680e726f0ef8f6dda0105fe8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 3 Nov 2021 14:14:53 +0000
Subject: [PATCH 0273/1586] dt-bindings: arm-pmu: Document Apple PMU compatible
 strings

As we are about to add support fur the Apple PMUs, document the compatible
strings associated with the two micro-architectures present in the Apple M1.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/devicetree/bindings/arm/pmu.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index 981bac4516988..7a04b8aaaec30 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -20,6 +20,8 @@ properties:
     items:
       - enum:
           - apm,potenza-pmu
+          - apple,firestorm-pmu
+          - apple,icestorm-pmu
           - arm,armv8-pmuv3 # Only for s/w models
           - arm,arm1136-pmu
           - arm,arm1176-pmu
-- 
GitLab


From 74703b13f9d2ef286ef588f29295a2fd30b5f295 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 1 Nov 2021 19:58:42 +0000
Subject: [PATCH 0274/1586] dt-bindings: apple,aic: Add CPU PMU per-cpu
 pseudo-interrupts

Advertise the two pseudo-interrupts that tied to the two PMU
flavours present in the Apple M1 SoC.

We choose the expose two different pseudo-interrupts to the OS
as the e-core PMU is obviously different from the p-core one,
effectively presenting two different devices.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 .../devicetree/bindings/interrupt-controller/apple,aic.yaml     | 2 ++
 include/dt-bindings/interrupt-controller/apple-aic.h            | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
index 97359024709af..c7577d401786c 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@ -56,6 +56,8 @@ properties:
           - 1: virtual HV timer
           - 2: physical guest timer
           - 3: virtual guest timer
+          - 4: 'efficient' CPU PMU
+          - 5: 'performance' CPU PMU
 
       The 3rd cell contains the interrupt flags. This is normally
       IRQ_TYPE_LEVEL_HIGH (4).
diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h
index 604f2bb30ac0d..bf3aac0e54915 100644
--- a/include/dt-bindings/interrupt-controller/apple-aic.h
+++ b/include/dt-bindings/interrupt-controller/apple-aic.h
@@ -11,5 +11,7 @@
 #define AIC_TMR_HV_VIRT		1
 #define AIC_TMR_GUEST_PHYS	2
 #define AIC_TMR_GUEST_VIRT	3
+#define AIC_CPU_PMU_E		4
+#define AIC_CPU_PMU_P		5
 
 #endif
-- 
GitLab


From dba07ad11384d6a4ece4acda1fbe726222ca7ad0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 14 Dec 2021 16:49:04 +0000
Subject: [PATCH 0275/1586] dt-bindings: apple,aic: Add affinity description
 for per-cpu pseudo-interrupts

Some of the FIQ per-cpu pseudo-interrupts are better described with
a specific affinity, the most obvious candidate being the CPU PMUs.

Augment the AIC binding to be able to specify that affinity in the
interrupt controller node.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 .../interrupt-controller/apple,aic.yaml       | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
index c7577d401786c..85c85b694217c 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@ -70,6 +70,35 @@ properties:
   power-domains:
     maxItems: 1
 
+  affinities:
+    type: object
+    additionalProperties: false
+    description:
+      FIQ affinity can be expressed as a single "affinities" node,
+      containing a set of sub-nodes, one per FIQ with a non-default
+      affinity.
+    patternProperties:
+      "^.+-affinity$":
+        type: object
+        additionalProperties: false
+        properties:
+          apple,fiq-index:
+            description:
+              The interrupt number specified as a FIQ, and for which
+              the affinity is not the default.
+            $ref: /schemas/types.yaml#/definitions/uint32
+            maximum: 5
+
+          cpus:
+            $ref: /schemas/types.yaml#/definitions/phandle-array
+            description:
+              Should be a list of phandles to CPU nodes (as described in
+              Documentation/devicetree/bindings/arm/cpus.yaml).
+
+        required:
+          - fiq-index
+          - cpus
+
 required:
   - compatible
   - '#interrupt-cells'
-- 
GitLab


From a5e8801202b318622ea526aa5625e5f7eceb4d26 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 3 Nov 2021 13:35:25 +0000
Subject: [PATCH 0276/1586] irqchip/apple-aic: Parse FIQ affinities from
 device-tree

In order to be able to tell the core IRQ code about the affinity
of the PMU interrupt in later patches, parse the affinities kindly
provided in the device-tree.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-apple-aic.c | 49 +++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 38091ebb94033..22d9b2058612b 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -177,6 +177,9 @@ struct aic_irq_chip {
 	void __iomem *base;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
+	struct {
+		cpumask_t aff;
+	} *fiq_aff[AIC_NR_FIQ];
 	int nr_hw;
 };
 
@@ -793,12 +796,50 @@ static struct gic_kvm_info vgic_info __initdata = {
 	.no_hw_deactivation	= true,
 };
 
+static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
+{
+	int i, n;
+	u32 fiq;
+
+	if (of_property_read_u32(aff, "apple,fiq-index", &fiq) ||
+	    WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq])
+		return;
+
+	n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32));
+	if (WARN_ON(n < 0))
+		return;
+
+	ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL);
+	if (!ic->fiq_aff[fiq])
+		return;
+
+	for (i = 0; i < n; i++) {
+		struct device_node *cpu_node;
+		u32 cpu_phandle;
+		int cpu;
+
+		if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle))
+			continue;
+
+		cpu_node = of_find_node_by_phandle(cpu_phandle);
+		if (WARN_ON(!cpu_node))
+			continue;
+
+		cpu = of_cpu_node_to_id(cpu_node);
+		if (WARN_ON(cpu < 0))
+			continue;
+
+		cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff);
+	}
+}
+
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
 	int i;
 	void __iomem *regs;
 	u32 info;
 	struct aic_irq_chip *irqc;
+	struct device_node *affs;
 
 	regs = of_iomap(node, 0);
 	if (WARN_ON(!regs))
@@ -832,6 +873,14 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		return -ENODEV;
 	}
 
+	affs = of_get_child_by_name(node, "affinities");
+	if (affs) {
+		struct device_node *chld;
+
+		for_each_child_of_node(affs, chld)
+			build_fiq_affinity(irqc, chld);
+	}
+
 	set_handle_irq(aic_handle_irq);
 	set_handle_fiq(aic_handle_fiq);
 
-- 
GitLab


From c7708816c9442beb32488e07b0fb47b6f66577cb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 1 Nov 2021 19:59:20 +0000
Subject: [PATCH 0277/1586] irqchip/apple-aic: Wire PMU interrupts

Add the necessary code to configure and P and E-core PMU interrupts
with their respective affinities. When such an interrupt fires, map
it onto the right pseudo-interrupt.

Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-apple-aic.c | 34 +++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 22d9b2058612b..873544e58676f 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -155,7 +155,7 @@
 #define SYS_IMP_APL_UPMSR_EL1		sys_reg(3, 7, 15, 6, 4)
 #define UPMSR_IACT			BIT(0)
 
-#define AIC_NR_FIQ		4
+#define AIC_NR_FIQ		6
 #define AIC_NR_SWIPI		32
 
 /*
@@ -415,16 +415,15 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 						  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
 	}
 
-	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
-			(FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
-		/*
-		 * Not supported yet, let's figure out how to handle this when
-		 * we implement these proprietary performance counters. For now,
-		 * just mask it and move on.
-		 */
-		pr_err_ratelimited("PMC FIQ fired. Masking.\n");
-		sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
-				   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+	if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
+		int irq;
+		if (cpumask_test_cpu(smp_processor_id(),
+				     &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))
+			irq = AIC_CPU_PMU_P;
+		else
+			irq = AIC_CPU_PMU_E;
+		generic_handle_domain_irq(aic_irqc->hw_domain,
+					  aic_irqc->nr_hw + irq);
 	}
 
 	if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&
@@ -464,7 +463,18 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
 				    handle_fasteoi_irq, NULL, NULL);
 		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
 	} else {
-		irq_set_percpu_devid(irq);
+		int fiq = hw - ic->nr_hw;
+
+		switch (fiq) {
+		case AIC_CPU_PMU_P:
+		case AIC_CPU_PMU_E:
+			irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff);
+			break;
+		default:
+			irq_set_percpu_devid(irq);
+			break;
+		}
+
 		irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 	}
-- 
GitLab


From 1852e22b318b8d1c02b574da679b1b74f3686090 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 14 Dec 2021 15:56:55 +0000
Subject: [PATCH 0278/1586] arm64: dts: apple: Add t8103 PMU interrupt
 affinities

The two PMU pseudo interrupts have specific affinities. One set
is affine to the small cores, and the other set affine to the
big ones.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index 19afbc91020a2..a2e006538c568 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -213,6 +213,18 @@
 			interrupt-controller;
 			reg = <0x2 0x3b100000 0x0 0x8000>;
 			power-domains = <&ps_aic>;
+
+			affinities {
+				e-core-pmu-affinity {
+					apple,fiq-index = <AIC_CPU_PMU_E>;
+					cpus = <&cpu0 &cpu1 &cpu2 &cpu3>;
+				};
+
+				p-core-pmu-affinity {
+					apple,fiq-index = <AIC_CPU_PMU_P>;
+					cpus = <&cpu4 &cpu5 &cpu6 &cpu7>;
+				};
+			};
 		};
 
 		pmgr: power-management@23b700000 {
-- 
GitLab


From 0f522efcd79634a6113195842ee763dc6ebacfbb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 2 Nov 2021 17:09:49 +0000
Subject: [PATCH 0279/1586] arm64: dts: apple: Add t8303 PMU nodes

Advertise the two PMU nodes for the t8103 SoC.

Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
index a2e006538c568..9f8f4145db880 100644
--- a/arch/arm64/boot/dts/apple/t8103.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -97,6 +97,18 @@
 			     <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	pmu-e {
+		compatible = "apple,icestorm-pmu";
+		interrupt-parent = <&aic>;
+		interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	pmu-p {
+		compatible = "apple,firestorm-pmu";
+		interrupt-parent = <&aic>;
+		interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>;
+	};
+
 	clkref: clock-ref {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
-- 
GitLab


From 11db7410cfcba2e5ffed7b8bb2a57d4dd5e22063 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 3 Nov 2021 13:55:19 +0000
Subject: [PATCH 0280/1586] irqchip/apple-aic: Move PMU-specific registers to
 their own include file

As we are about to have a PMU driver, move the PMU bits from the AIC
driver into a common include file.

Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/apple_m1_pmu.h | 19 +++++++++++++++++++
 drivers/irqchip/irq-apple-aic.c       | 11 +----------
 2 files changed, 20 insertions(+), 10 deletions(-)
 create mode 100644 arch/arm64/include/asm/apple_m1_pmu.h

diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
new file mode 100644
index 0000000000000..b848af7faadc9
--- /dev/null
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ASM_APPLE_M1_PMU_h
+#define __ASM_APPLE_M1_PMU_h
+
+#include <linux/bits.h>
+#include <asm/sysreg.h>
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1	sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_IMODE		GENMASK(10, 8)
+#define PMCR0_IMODE_OFF		0
+#define PMCR0_IMODE_PMI		1
+#define PMCR0_IMODE_AIC		2
+#define PMCR0_IMODE_HALT	3
+#define PMCR0_IMODE_FIQ		4
+#define PMCR0_IACT		BIT(11)
+
+#endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 873544e58676f..b40199c6625e3 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -55,6 +55,7 @@
 #include <linux/limits.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
+#include <asm/apple_m1_pmu.h>
 #include <asm/exception.h>
 #include <asm/sysreg.h>
 #include <asm/virt.h>
@@ -109,16 +110,6 @@
  * Note: sysreg-based IPIs are not supported yet.
  */
 
-/* Core PMC control register */
-#define SYS_IMP_APL_PMCR0_EL1		sys_reg(3, 1, 15, 0, 0)
-#define PMCR0_IMODE			GENMASK(10, 8)
-#define PMCR0_IMODE_OFF			0
-#define PMCR0_IMODE_PMI			1
-#define PMCR0_IMODE_AIC			2
-#define PMCR0_IMODE_HALT		3
-#define PMCR0_IMODE_FIQ			4
-#define PMCR0_IACT			BIT(11)
-
 /* IPI request registers */
 #define SYS_IMP_APL_IPI_RR_LOCAL_EL1	sys_reg(3, 5, 15, 0, 0)
 #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1	sys_reg(3, 5, 15, 0, 1)
-- 
GitLab


From 1018a5463a063715365784704c4e8cdf2eec4b04 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 4 Feb 2022 15:19:46 +0800
Subject: [PATCH 0281/1586] f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy

Once F2FS_IPU_FORCE policy is enabled in some cases:
a) f2fs forces to use F2FS_IPU_FORCE in a small-sized volume
b) user sets F2FS_IPU_FORCE policy via sysfs

Then we may fail to defragment file due to IPU policy check, it doesn't
make sense, let's introduce a new IPU policy to allow OPU during file
defragmentation.

In small-sized volume, let's enable F2FS_IPU_HONOR_OPU_WRITE policy
by default.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  3 ++-
 fs/f2fs/data.c                          | 18 +++++++++++++-----
 fs/f2fs/f2fs.h                          |  3 ++-
 fs/f2fs/file.c                          | 18 +++++++++++-------
 fs/f2fs/segment.h                       |  5 ++++-
 fs/f2fs/super.c                         |  3 ++-
 6 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 87d3884c90ea6..7b50bf82f14dd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -55,8 +55,9 @@ Description:	Controls the in-place-update policy.
 		0x04  F2FS_IPU_UTIL
 		0x08  F2FS_IPU_SSR_UTIL
 		0x10  F2FS_IPU_FSYNC
-		0x20  F2FS_IPU_ASYNC,
+		0x20  F2FS_IPU_ASYNC
 		0x40  F2FS_IPU_NOCACHE
+		0x80  F2FS_IPU_HONOR_OPU_WRITE
 		====  =================
 
 		Refer segment.h for details.
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0f124e8de1d4e..6b5f389ba998b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2460,6 +2460,9 @@ static inline bool check_inplace_update_policy(struct inode *inode,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	unsigned int policy = SM_I(sbi)->ipu_policy;
 
+	if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
+			is_inode_flag_set(inode, FI_OPU_WRITE))
+		return false;
 	if (policy & (0x1 << F2FS_IPU_FORCE))
 		return true;
 	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
@@ -2530,6 +2533,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
 		return true;
 
+	if (is_inode_flag_set(inode, FI_OPU_WRITE))
+		return true;
+
 	if (fio) {
 		if (page_private_gcing(fio->page))
 			return true;
@@ -3154,8 +3160,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 			f2fs_available_free_memory(sbi, DIRTY_DENTS))
 		goto skip_write;
 
-	/* skip writing during file defragment */
-	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
+	/* skip writing in file defragment preparing stage */
+	if (is_inode_flag_set(inode, FI_SKIP_WRITES))
 		goto skip_write;
 
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
@@ -3725,6 +3731,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 	filemap_invalidate_lock(inode->i_mapping);
 
 	set_inode_flag(inode, FI_ALIGNED_WRITE);
+	set_inode_flag(inode, FI_OPU_WRITE);
 
 	for (; secidx < end_sec; secidx++) {
 		f2fs_down_write(&sbi->pin_sem);
@@ -3733,7 +3740,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
 		f2fs_unlock_op(sbi);
 
-		set_inode_flag(inode, FI_DO_DEFRAG);
+		set_inode_flag(inode, FI_SKIP_WRITES);
 
 		for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
 			struct page *page;
@@ -3750,7 +3757,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 			f2fs_put_page(page, 1);
 		}
 
-		clear_inode_flag(inode, FI_DO_DEFRAG);
+		clear_inode_flag(inode, FI_SKIP_WRITES);
 
 		ret = filemap_fdatawrite(inode->i_mapping);
 
@@ -3761,7 +3768,8 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
 	}
 
 done:
-	clear_inode_flag(inode, FI_DO_DEFRAG);
+	clear_inode_flag(inode, FI_SKIP_WRITES);
+	clear_inode_flag(inode, FI_OPU_WRITE);
 	clear_inode_flag(inode, FI_ALIGNED_WRITE);
 
 	filemap_invalidate_unlock(inode->i_mapping);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 51c1392708e6e..3b4bf1c3f1ed6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -740,7 +740,8 @@ enum {
 	FI_DROP_CACHE,		/* drop dirty page cache */
 	FI_DATA_EXIST,		/* indicate data exists */
 	FI_INLINE_DOTS,		/* indicate inline dot dentries */
-	FI_DO_DEFRAG,		/* indicate defragment is running */
+	FI_SKIP_WRITES,		/* should skip data page writeback */
+	FI_OPU_WRITE,		/* used for opu per file */
 	FI_DIRTY_FILE,		/* indicate regular/symlink has dirty pages */
 	FI_PREALLOCATED_ALL,	/* all blocks for write were preallocated */
 	FI_HOT_DATA,		/* indicate file is hot */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6ccdd6e347e2c..42fbdcf0ccc9b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2559,10 +2559,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 	bool fragmented = false;
 	int err;
 
-	/* if in-place-update policy is enabled, don't waste time here */
-	if (f2fs_should_update_inplace(inode, NULL))
-		return -EINVAL;
-
 	pg_start = range->start >> PAGE_SHIFT;
 	pg_end = (range->start + range->len) >> PAGE_SHIFT;
 
@@ -2570,6 +2566,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 
 	inode_lock(inode);
 
+	/* if in-place-update policy is enabled, don't waste time here */
+	set_inode_flag(inode, FI_OPU_WRITE);
+	if (f2fs_should_update_inplace(inode, NULL)) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	/* writeback all dirty pages in the range */
 	err = filemap_write_and_wait_range(inode->i_mapping, range->start,
 						range->start + range->len - 1);
@@ -2651,7 +2654,7 @@ do_map:
 			goto check;
 		}
 
-		set_inode_flag(inode, FI_DO_DEFRAG);
+		set_inode_flag(inode, FI_SKIP_WRITES);
 
 		idx = map.m_lblk;
 		while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
@@ -2676,15 +2679,16 @@ check:
 		if (map.m_lblk < pg_end && cnt < blk_per_seg)
 			goto do_map;
 
-		clear_inode_flag(inode, FI_DO_DEFRAG);
+		clear_inode_flag(inode, FI_SKIP_WRITES);
 
 		err = filemap_fdatawrite(inode->i_mapping);
 		if (err)
 			goto out;
 	}
 clear_out:
-	clear_inode_flag(inode, FI_DO_DEFRAG);
+	clear_inode_flag(inode, FI_SKIP_WRITES);
 out:
+	clear_inode_flag(inode, FI_OPU_WRITE);
 	inode_unlock(inode);
 	if (!err)
 		range->len = (u64)total << PAGE_SHIFT;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0291cd55cf09b..5c94caf0c0a1d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -651,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi)
  *                     pages over min_fsync_blocks. (=default option)
  * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
  * F2FS_IPU_NOCACHE - disable IPU bio cache.
- * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
+ * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has
+ *                            FI_OPU_WRITE flag.
+ * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode)
  */
 #define DEF_MIN_IPU_UTIL	70
 #define DEF_MIN_FSYNC_BLOCKS	8
@@ -667,6 +669,7 @@ enum {
 	F2FS_IPU_FSYNC,
 	F2FS_IPU_ASYNC,
 	F2FS_IPU_NOCACHE,
+	F2FS_IPU_HONOR_OPU_WRITE,
 };
 
 static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9af6c20532ece..806836184ebc9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3957,7 +3957,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
 		if (f2fs_block_unit_discard(sbi))
 			sm_i->dcc_info->discard_granularity = 1;
-		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
+		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE |
+					1 << F2FS_IPU_HONOR_OPU_WRITE;
 	}
 
 	sbi->readdir_ra = 1;
-- 
GitLab


From 345be4275cad454ae7e25884369a9c6c25e56279 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 7 Feb 2022 09:38:29 -0700
Subject: [PATCH 0282/1586] thermal: netlink: Fix parameter type of
 thermal_genl_cpu_capability_event() stub

When building with CONFIG_THERMAL_NETLINK=n, there is a spew of warnings
along the lines of:

  In file included from drivers/thermal/thermal_core.c:27:
  In file included from drivers/thermal/thermal_core.h:15:
  drivers/thermal/thermal_netlink.h:113:71: warning: declaration of 'struct cpu_capability' will not be visible outside of this function [-Wvisibility]
  static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps)
                                                                        ^
  1 warning generated.

'struct cpu_capability' is not forward declared anywhere in the header.
As it turns out, this should really be 'struct thermal_genl_cpu_caps',
which silences the warning and makes the parameter types of the stub
match the full function.

Fixes: e4b1eb24ce5a ("thermal: netlink: Add a new event to notify CPU capabilities change")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index 04d1adbbc0124..1052f523188da 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -110,7 +110,7 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
 	return 0;
 }
 
-static inline int thermal_genl_cpu_capability_event(int count, struct cpu_capability *caps)
+static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps)
 {
 	return 0;
 }
-- 
GitLab


From fa31a4d669bd471e9510db1abf9b91e1a6be6ff7 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 3 Feb 2022 11:43:07 -0800
Subject: [PATCH 0283/1586] x86/cpufeatures: Put the AMX macros in the word 18
 block

These macros are for bits in CPUID.(EAX=7,ECX=0):EDX, not for bits in
CPUID(EAX=7,ECX=1):EAX. Put them with their brethren.

  [ bp: Sort word 18 bits properly, as caught by Like Xu
    <like.xu.linux@gmail.com> ]

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20220203194308.2469117-1-jmattson@google.com
---
 arch/x86/include/asm/cpufeatures.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6db4e2932b3d8..5cd22090e53de 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,9 +299,6 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_AMX_BF16		(18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8		(18*32+25) /* AMX int8 Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
@@ -390,7 +387,10 @@
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AMX_BF16		(18*32+22) /* AMX bf16 Support */
 #define X86_FEATURE_AVX512_FP16		(18*32+23) /* AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8		(18*32+25) /* AMX int8 Support */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
-- 
GitLab


From 2f04aa69ab5c5c40d2e3e51fd73ce2ecb651e9ba Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Fri, 4 Feb 2022 16:52:40 +0100
Subject: [PATCH 0284/1586] regulator: Add bindings for TPS62864x

Add bindings for the TPS62864/TPS6286/TPS62868/TPS62869 voltage
regulators.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220204155241.576342-2-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/ti,tps62864.yaml       | 62 +++++++++++++++++++
 include/dt-bindings/regulator/ti,tps62864.h   |  9 +++
 2 files changed, 71 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
 create mode 100644 include/dt-bindings/regulator/ti,tps62864.h

diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
new file mode 100644
index 0000000000000..e3d739a37ab04
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/ti,tps62864.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI TPS62864/TPS6286/TPS62868/TPS62869 voltage regulator
+
+maintainers:
+  - Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+properties:
+  compatible:
+    enum:
+      - ti,tps62864
+      - ti,tps62866
+      - ti,tps62868
+      - ti,tps62869
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    type: object
+
+    patternProperties:
+      "SW":
+        type: object
+        $ref: regulator.yaml#
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/regulator/ti,tps62864.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      regulator@48 {
+        compatible = "ti,tps62864";
+        reg = <0x48>;
+
+        regulators {
+          SW {
+            regulator-name = "+0.85V";
+            regulator-min-microvolt = <800000>;
+            regulator-max-microvolt = <890000>;
+            regulator-initial-mode = <TPS62864_MODE_FPWM>;
+          };
+        };
+      };
+    };
+
+...
diff --git a/include/dt-bindings/regulator/ti,tps62864.h b/include/dt-bindings/regulator/ti,tps62864.h
new file mode 100644
index 0000000000000..8db31f23d9561
--- /dev/null
+++ b/include/dt-bindings/regulator/ti,tps62864.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef _DT_BINDINGS_REGULATOR_TI_TPS62864_H
+#define _DT_BINDINGS_REGULATOR_TI_TPS62864_H
+
+#define TPS62864_MODE_NORMAL			0
+#define TPS62864_MODE_FPWM			1
+
+#endif
-- 
GitLab


From e2a01b4e8806087743e5ee42f9dcedfc741d4112 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Fri, 4 Feb 2022 16:52:41 +0100
Subject: [PATCH 0285/1586] regulator: Add support for TPS6286x

TI's TPS62864/TPS6286/TPS62868/TPS62869 are high-frequency synchronous
step-down converters controlled via I2C.  There are differences in the
electrical characteristics and packaging between the variants, but the
register interfaces are identical.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220204155241.576342-3-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig              |   9 ++
 drivers/regulator/Makefile             |   1 +
 drivers/regulator/tps6286x-regulator.c | 159 +++++++++++++++++++++++++
 3 files changed, 169 insertions(+)
 create mode 100644 drivers/regulator/tps6286x-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 22503e4f53272..00559c214f570 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1264,6 +1264,15 @@ config REGULATOR_TPS62360
 	  high-frequency synchronous step down dc-dc converter optimized
 	  for battery-powered portable applications.
 
+config REGULATOR_TPS6286X
+	tristate "TI TPS6286x Power Regulator"
+	depends on I2C && OF
+	select REGMAP_I2C
+	help
+	  This driver supports TPS6236x voltage regulator chips. These are
+	  high-frequency synchronous step-down converters with an I2C
+	  interface.
+
 config REGULATOR_TPS65023
 	tristate "TI TPS65023 Power regulators"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 2e1b087489fa5..4b8794a73e170 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_REGULATOR_SY8827N) += sy8827n.o
 obj-$(CONFIG_REGULATOR_TI_ABB) += ti-abb-regulator.o
 obj-$(CONFIG_REGULATOR_TPS6105X) += tps6105x-regulator.o
 obj-$(CONFIG_REGULATOR_TPS62360) += tps62360-regulator.o
+obj-$(CONFIG_REGULATOR_TPS6286X) += tps6286x-regulator.o
 obj-$(CONFIG_REGULATOR_TPS65023) += tps65023-regulator.o
 obj-$(CONFIG_REGULATOR_TPS6507X) += tps6507x-regulator.o
 obj-$(CONFIG_REGULATOR_TPS65086) += tps65086-regulator.o
diff --git a/drivers/regulator/tps6286x-regulator.c b/drivers/regulator/tps6286x-regulator.c
new file mode 100644
index 0000000000000..e29deda30d75d
--- /dev/null
+++ b/drivers/regulator/tps6286x-regulator.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright Axis Communications AB
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/driver.h>
+
+#include <dt-bindings/regulator/ti,tps62864.h>
+
+#define TPS6286X_VOUT1		0x01
+#define TPS6286X_VOUT1_VO1_SET	GENMASK(7, 0)
+
+#define TPS6286X_CONTROL	0x03
+#define TPS6286X_CONTROL_FPWM	BIT(4)
+#define TPS6286X_CONTROL_SWEN	BIT(5)
+
+#define TPS6286X_MIN_MV		400
+#define TPS6286X_MAX_MV		1675
+#define TPS6286X_STEP_MV	5
+
+static const struct regmap_config tps6286x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int tps6286x_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	unsigned int val;
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		val = 0;
+		break;
+	case REGULATOR_MODE_FAST:
+		val = TPS6286X_CONTROL_FPWM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(rdev->regmap, TPS6286X_CONTROL,
+				  TPS6286X_CONTROL_FPWM, val);
+}
+
+static unsigned int tps6286x_get_mode(struct regulator_dev *rdev)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, TPS6286X_CONTROL, &val);
+	if (ret < 0)
+		return 0;
+
+	return (val & TPS6286X_CONTROL_FPWM) ? REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL;
+}
+
+static const struct regulator_ops tps6286x_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.set_mode = tps6286x_set_mode,
+	.get_mode = tps6286x_get_mode,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+};
+
+static unsigned int tps6286x_of_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case TPS62864_MODE_NORMAL:
+		return REGULATOR_MODE_NORMAL;
+	case TPS62864_MODE_FPWM:
+		return REGULATOR_MODE_FAST;
+	default:
+		return REGULATOR_MODE_INVALID;
+	}
+}
+
+static const struct regulator_desc tps6286x_reg = {
+	.name = "tps6286x",
+	.of_match = of_match_ptr("SW"),
+	.owner = THIS_MODULE,
+	.ops = &tps6286x_regulator_ops,
+	.of_map_mode = tps6286x_of_map_mode,
+	.regulators_node = of_match_ptr("regulators"),
+	.type = REGULATOR_VOLTAGE,
+	.n_voltages = ((TPS6286X_MAX_MV - TPS6286X_MIN_MV) / TPS6286X_STEP_MV) + 1,
+	.min_uV = TPS6286X_MIN_MV * 1000,
+	.uV_step = TPS6286X_STEP_MV * 1000,
+	.vsel_reg = TPS6286X_VOUT1,
+	.vsel_mask = TPS6286X_VOUT1_VO1_SET,
+	.enable_reg = TPS6286X_CONTROL,
+	.enable_mask = TPS6286X_CONTROL_SWEN,
+	.ramp_delay = 1000,
+	/* tDelay + tRamp, rounded up */
+	.enable_time = 3000,
+};
+
+static const struct of_device_id tps6286x_dt_ids[] = {
+	{ .compatible = "ti,tps62864", },
+	{ .compatible = "ti,tps62866", },
+	{ .compatible = "ti,tps62868", },
+	{ .compatible = "ti,tps62869", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tps6286x_dt_ids);
+
+static int tps6286x_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct device *dev = &i2c->dev;
+	struct regulator_config config = {};
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+
+	regmap = devm_regmap_init_i2c(i2c, &tps6286x_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	config.dev = &i2c->dev;
+	config.of_node = dev->of_node;
+	config.regmap = regmap;
+
+	rdev = devm_regulator_register(&i2c->dev, &tps6286x_reg, &config);
+	if (IS_ERR(rdev)) {
+		dev_err(&i2c->dev, "Failed to register tps6286x regulator\n");
+		return PTR_ERR(rdev);
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id tps6286x_i2c_id[] = {
+	{ "tps62864", 0 },
+	{ "tps62866", 0 },
+	{ "tps62868", 0 },
+	{ "tps62869", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, tps6286x_i2c_id);
+
+static struct i2c_driver tps6286x_regulator_driver = {
+	.driver = {
+		.name = "tps6286x",
+		.of_match_table = of_match_ptr(tps6286x_dt_ids),
+	},
+	.probe = tps6286x_i2c_probe,
+	.id_table = tps6286x_i2c_id,
+};
+
+module_i2c_driver(tps6286x_regulator_driver);
+
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 60edd652005fe2931565f658f23f86d094f4c1f0 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 25 Jan 2022 09:23:28 +0800
Subject: [PATCH 0286/1586] spi: Convert spi-slave-mt27xx to json-schema

Convert Mediatek ARM SOC's SPI Slave controller binding
to json-schema format.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220125012330.13449-2-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../spi/mediatek,spi-slave-mt27xx.yaml        | 58 +++++++++++++++++++
 .../bindings/spi/spi-slave-mt27xx.txt         | 33 -----------
 2 files changed, 58 insertions(+), 33 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml
new file mode 100644
index 0000000000000..7977799a8ee10
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-slave-mt27xx.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-slave-mt27xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI Slave controller for MediaTek ARM SoCs
+
+maintainers:
+  - Leilk Liu <leilk.liu@mediatek.com>
+
+allOf:
+  - $ref: "/schemas/spi/spi-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt2712-spi-slave
+      - mediatek,mt8195-spi-slave
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: spi
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt2712-clk.h>
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    spi@10013000 {
+      compatible = "mediatek,mt2712-spi-slave";
+      reg = <0x10013000 0x100>;
+      interrupts = <GIC_SPI 283 IRQ_TYPE_LEVEL_LOW>;
+      clocks = <&infracfg CLK_INFRA_AO_SPI1>;
+      clock-names = "spi";
+      assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>;
+      assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt b/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt
deleted file mode 100644
index 9192724540fd9..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-slave-mt27xx.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Binding for MTK SPI Slave controller
-
-Required properties:
-- compatible: should be one of the following.
-    - mediatek,mt2712-spi-slave: for mt2712 platforms
-    - mediatek,mt8195-spi-slave: for mt8195 platforms
-- reg: Address and length of the register set for the device.
-- interrupts: Should contain spi interrupt.
-- clocks: phandles to input clocks.
-  It's clock gate, and should be <&infracfg CLK_INFRA_AO_SPI1>.
-- clock-names: should be "spi" for the clock gate.
-
-Optional properties:
-- assigned-clocks: it's mux clock, should be <&topckgen CLK_TOP_SPISLV_SEL>.
-- assigned-clock-parents: parent of mux clock.
-  It's PLL, and should be one of the following.
-   -  <&topckgen CLK_TOP_UNIVPLL1_D2>: specify parent clock 312MHZ.
-				       It's the default one.
-   -  <&topckgen CLK_TOP_UNIVPLL1_D4>: specify parent clock 156MHZ.
-   -  <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ.
-   -  <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ.
-
-Example:
-- SoC Specific Portion:
-spis1: spi@10013000 {
-	compatible = "mediatek,mt2712-spi-slave";
-	reg = <0 0x10013000 0 0x100>;
-	interrupts = <GIC_SPI 283 IRQ_TYPE_LEVEL_LOW>;
-	clocks = <&infracfg CLK_INFRA_AO_SPI1>;
-	clock-names = "spi";
-	assigned-clocks = <&topckgen CLK_TOP_SPISLV_SEL>;
-	assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>;
-};
-- 
GitLab


From 08727dd751234bc878be64e60ea6f0d8ea0d54c0 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 25 Jan 2022 09:23:29 +0800
Subject: [PATCH 0287/1586] spi: Convert spi-mt65xx to json-schema

Convert Mediatek ARM SOC's SPI Master controller binding
to json-schema format.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220125012330.13449-3-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/mediatek,spi-mt65xx.yaml     | 101 ++++++++++++++++++
 .../devicetree/bindings/spi/spi-mt65xx.txt    |  68 ------------
 2 files changed, 101 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-mt65xx.txt

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
new file mode 100644
index 0000000000000..ea977fba49a77
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/mediatek,spi-mt65xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SPI Bus controller for MediaTek ARM SoCs
+
+maintainers:
+  - Leilk Liu <leilk.liu@mediatek.com>
+
+allOf:
+  - $ref: "/schemas/spi/spi-controller.yaml#"
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - mediatek,mt7629-spi
+          - const: mediatek,mt7622-spi
+      - items:
+          - enum:
+              - mediatek,mt8516-spi
+          - const: mediatek,mt2712-spi
+      - items:
+          - enum:
+              - mediatek,mt6779-spi
+              - mediatek,mt8192-spi
+              - mediatek,mt8195-spi
+          - const: mediatek,mt6765-spi
+      - items:
+          - enum:
+              - mediatek,mt2701-spi
+              - mediatek,mt2712-spi
+              - mediatek,mt6589-spi
+              - mediatek,mt6765-spi
+              - mediatek,mt6893-spi
+              - mediatek,mt7622-spi
+              - mediatek,mt8135-spi
+              - mediatek,mt8173-spi
+              - mediatek,mt8183-spi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: clock used for the parent clock
+      - description: clock used for the muxes clock
+      - description: clock used for the clock gate
+
+  clock-names:
+    items:
+      - const: parent-clk
+      - const: sel-clk
+      - const: spi-clk
+
+  mediatek,pad-select:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    maxItems: 4
+    items:
+      enum: [0, 1, 2, 3]
+    description:
+      specify which pins group(ck/mi/mo/cs) spi controller used.
+      This is an array.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - '#address-cells'
+  - '#size-cells'
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8173-clk.h>
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    spi@1100a000 {
+      compatible = "mediatek,mt8173-spi";
+      #address-cells = <1>;
+      #size-cells = <0>;
+      reg = <0x1100a000 0x1000>;
+      interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_LOW>;
+      clocks = <&topckgen CLK_TOP_SYSPLL3_D2>,
+               <&topckgen CLK_TOP_SPI_SEL>,
+               <&pericfg CLK_PERI_SPI0>;
+      clock-names = "parent-clk", "sel-clk", "spi-clk";
+      cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>;
+      mediatek,pad-select = <1>, <0>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
deleted file mode 100644
index 2a24969159cc1..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-Binding for MTK SPI controller
-
-Required properties:
-- compatible: should be one of the following.
-    - mediatek,mt2701-spi: for mt2701 platforms
-    - mediatek,mt2712-spi: for mt2712 platforms
-    - mediatek,mt6589-spi: for mt6589 platforms
-    - mediatek,mt6765-spi: for mt6765 platforms
-    - mediatek,mt7622-spi: for mt7622 platforms
-    - "mediatek,mt7629-spi", "mediatek,mt7622-spi": for mt7629 platforms
-    - mediatek,mt8135-spi: for mt8135 platforms
-    - mediatek,mt8173-spi: for mt8173 platforms
-    - mediatek,mt8183-spi: for mt8183 platforms
-    - mediatek,mt6893-spi: for mt6893 platforms
-    - "mediatek,mt8192-spi", "mediatek,mt6765-spi": for mt8192 platforms
-    - "mediatek,mt8195-spi", "mediatek,mt6765-spi": for mt8195 platforms
-    - "mediatek,mt8516-spi", "mediatek,mt2712-spi": for mt8516 platforms
-    - "mediatek,mt6779-spi", "mediatek,mt6765-spi": for mt6779 platforms
-
-- #address-cells: should be 1.
-
-- #size-cells: should be 0.
-
-- reg: Address and length of the register set for the device
-
-- interrupts: Should contain spi interrupt
-
-- clocks: phandles to input clocks.
-  The first should be one of the following. It's PLL.
-   -  <&clk26m>: specify parent clock 26MHZ.
-   -  <&topckgen CLK_TOP_SYSPLL3_D2>: specify parent clock 109MHZ.
-				      It's the default one.
-   -  <&topckgen CLK_TOP_SYSPLL4_D2>: specify parent clock 78MHZ.
-   -  <&topckgen CLK_TOP_UNIVPLL2_D4>: specify parent clock 104MHZ.
-   -  <&topckgen CLK_TOP_UNIVPLL1_D8>: specify parent clock 78MHZ.
-  The second should be <&topckgen CLK_TOP_SPI_SEL>. It's clock mux.
-  The third is <&pericfg CLK_PERI_SPI0>. It's clock gate.
-
-- clock-names: shall be "parent-clk" for the parent clock, "sel-clk" for the
-  muxes clock, and "spi-clk" for the clock gate.
-
-Optional properties:
--cs-gpios: see spi-bus.txt.
-
-- mediatek,pad-select: specify which pins group(ck/mi/mo/cs) spi
-  controller used. This is an array, the element value should be 0~3,
-  only required for MT8173.
-    0: specify GPIO69,70,71,72 for spi pins.
-    1: specify GPIO102,103,104,105 for spi pins.
-    2: specify GPIO128,129,130,131 for spi pins.
-    3: specify GPIO5,6,7,8 for spi pins.
-
-Example:
-
-- SoC Specific Portion:
-spi: spi@1100a000 {
-	compatible = "mediatek,mt8173-spi";
-	#address-cells = <1>;
-	#size-cells = <0>;
-	reg = <0 0x1100a000 0 0x1000>;
-	interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_LOW>;
-	clocks = <&topckgen CLK_TOP_SYSPLL3_D2>,
-		 <&topckgen CLK_TOP_SPI_SEL>,
-		 <&pericfg CLK_PERI_SPI0>;
-	clock-names = "parent-clk", "sel-clk", "spi-clk";
-	cs-gpios = <&pio 105 GPIO_ACTIVE_LOW>, <&pio 72 GPIO_ACTIVE_LOW>;
-	mediatek,pad-select = <1>, <0>;
-};
-- 
GitLab


From ccbc5d0a92c5f1d42d1a1635b53f1987821e9cdd Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 25 Jan 2022 09:23:30 +0800
Subject: [PATCH 0288/1586] spi: Add compatible for Mediatek MT8186

This commit adds dt-binding documentation of spi bus for Mediatek MT8186 SoC
Platform.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220125012330.13449-4-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
index ea977fba49a77..bfa44acb1bdd0 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -26,6 +26,7 @@ properties:
       - items:
           - enum:
               - mediatek,mt6779-spi
+              - mediatek,mt8186-spi
               - mediatek,mt8192-spi
               - mediatek,mt8195-spi
           - const: mediatek,mt6765-spi
-- 
GitLab


From 833026ad56f76d1a1035d6511fd5aeed308465fd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 25 Jan 2022 09:52:02 +0300
Subject: [PATCH 0289/1586] spi: spidev: prevent spidev->speed_hz from being
 zero

A zero value for spi->max_speed_hz or spidev->speed_hz does not make
sense and trying to set that can lead to divide by zero crashes in
a some of the drivers.

drivers/spi/spi-s3c64xx.c:874 s3c64xx_spi_setup() error: potential divide by zero bug '/ spi->max_speed_hz'.
drivers/spi/spi-fsl-dspi.c:613 hz_to_spi_baud() error: potential divide by zero bug '/ speed_hz'.
drivers/spi/spi-xlp.c:146 xlp_spi_setup() error: potential divide by zero bug '/ (spi->max_speed_hz)'.
drivers/spi/spi-orion.c:162 orion_spi_baudrate_set() error: potential divide by zero bug '/ speed'.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20220125065202.GA8807@kili
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spidev.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index a5cceca8b82b6..dd824db63fbe6 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -453,22 +453,29 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				dev_dbg(&spi->dev, "%d bits per word\n", tmp);
 		}
 		break;
-	case SPI_IOC_WR_MAX_SPEED_HZ:
+	case SPI_IOC_WR_MAX_SPEED_HZ: {
+		u32 save;
+
 		retval = get_user(tmp, (__u32 __user *)arg);
-		if (retval == 0) {
-			u32	save = spi->max_speed_hz;
+		if (retval)
+			break;
+		if (tmp == 0) {
+			retval = -EINVAL;
+			break;
+		}
 
-			spi->max_speed_hz = tmp;
-			retval = spi_setup(spi);
-			if (retval == 0) {
-				spidev->speed_hz = tmp;
-				dev_dbg(&spi->dev, "%d Hz (max)\n",
-					spidev->speed_hz);
-			}
-			spi->max_speed_hz = save;
+		save = spi->max_speed_hz;
+
+		spi->max_speed_hz = tmp;
+		retval = spi_setup(spi);
+		if (retval == 0) {
+			spidev->speed_hz = tmp;
+			dev_dbg(&spi->dev, "%d Hz (max)\n", spidev->speed_hz);
 		}
-		break;
 
+		spi->max_speed_hz = save;
+		break;
+	}
 	default:
 		/* segmented and/or full-duplex I/O request */
 		/* Check message and copy into scratch area */
-- 
GitLab


From 47e8fe57a66f72c5734b981b21557c732b9a5eb6 Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Mon, 7 Feb 2022 09:57:22 +0800
Subject: [PATCH 0290/1586] spi: Modify irq request position and modify
 parameters

- Change irq request position to the back.
- Add temporary varilable and setting (as suggested by Mr. Andy Shevchenko)

Fixes: f62ca4e2a863 ("spi: Add spi driver for Sunplus SP7021")
Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/a94e3b123773fe303221d2bd2e4ce36ffa905a1c.1644198957.git.lhjeff911@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sunplus-sp7021.c | 63 ++++++++++++++++----------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
index e5bdeb3eba458..ba5ed9f7277a3 100644
--- a/drivers/spi/spi-sunplus-sp7021.c
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -110,7 +110,8 @@ static irqreturn_t sp7021_spi_slave_irq(int irq, void *dev)
 	unsigned int data_status;
 
 	data_status = readl(pspim->s_base + SP7021_DATA_RDY_REG);
-	writel(data_status | SP7021_SLAVE_CLR_INT, pspim->s_base + SP7021_DATA_RDY_REG);
+	data_status |= SP7021_SLAVE_CLR_INT;
+	writel(data_status , pspim->s_base + SP7021_DATA_RDY_REG);
 	complete(&pspim->slave_isr);
 	return IRQ_HANDLED;
 }
@@ -127,14 +128,16 @@ static int sp7021_spi_slave_abort(struct spi_controller *ctlr)
 static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
+	u32 value;
 
 	reinit_completion(&pspim->slave_isr);
-	writel(SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3),
-	       pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+	value = SP7021_SLAVE_DMA_EN | SP7021_SLAVE_DMA_RW | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3);
+	writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
 	writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
 	writel(xfer->tx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
-	writel(readl(pspim->s_base + SP7021_DATA_RDY_REG) | SP7021_SLAVE_DATA_RDY,
-	       pspim->s_base + SP7021_DATA_RDY_REG);
+	value = readl(pspim->s_base + SP7021_DATA_RDY_REG);
+	value |= SP7021_SLAVE_DATA_RDY;
+	writel(value, pspim->s_base + SP7021_DATA_RDY_REG);
 	if (wait_for_completion_interruptible(&pspim->isr_done)) {
 		dev_err(&spi->dev, "%s() wait_for_completion err\n", __func__);
 		return -EINTR;
@@ -145,11 +148,11 @@ static int sp7021_spi_slave_tx(struct spi_device *spi, struct spi_transfer *xfer
 static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct sp7021_spi_ctlr *pspim = spi_controller_get_devdata(spi->controller);
-	int ret = 0;
+	u32 value;
 
 	reinit_completion(&pspim->isr_done);
-	writel(SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3),
-	       pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
+	value = SP7021_SLAVE_DMA_EN | FIELD_PREP(SP7021_SLAVE_DMA_CMD, 3);
+	writel(value, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
 	writel(xfer->len, pspim->s_base + SP7021_SLAVE_DMA_LENGTH_REG);
 	writel(xfer->rx_dma, pspim->s_base + SP7021_SLAVE_DMA_ADDR_REG);
 	if (wait_for_completion_interruptible(&pspim->isr_done)) {
@@ -157,7 +160,7 @@ static int sp7021_spi_slave_rx(struct spi_device *spi, struct spi_transfer *xfer
 		return -EINTR;
 	}
 	writel(SP7021_SLAVE_SW_RST, pspim->s_base + SP7021_SLAVE_DMA_CTRL_REG);
-	return ret;
+	return 0;
 }
 
 static void sp7021_spi_master_rb(struct sp7021_spi_ctlr *pspim, unsigned int len)
@@ -188,7 +191,6 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
 	unsigned int tx_cnt, total_len;
 	unsigned int tx_len, rx_cnt;
 	unsigned int fd_status;
-	unsigned long flags;
 	bool isrdone = false;
 	u32 value;
 
@@ -203,7 +205,7 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
 	if (tx_len == 0 && total_len == 0)
 		return IRQ_NONE;
 
-	spin_lock_irqsave(&pspim->lock, flags);
+	spin_lock_irq(&pspim->lock);
 
 	rx_cnt = FIELD_GET(SP7021_RX_CNT_MASK, fd_status);
 	if (fd_status & SP7021_RX_FULL_FLAG)
@@ -243,7 +245,7 @@ static irqreturn_t sp7021_spi_master_irq(int irq, void *dev)
 
 	if (isrdone)
 		complete(&pspim->isr_done);
-	spin_unlock_irqrestore(&pspim->lock, flags);
+	spin_unlock_irq(&pspim->lock);
 	return IRQ_HANDLED;
 }
 
@@ -296,11 +298,10 @@ static void sp7021_spi_setup_clk(struct spi_controller *ctlr, struct spi_transfe
 	u32 clk_rate, clk_sel, div;
 
 	clk_rate = clk_get_rate(pspim->spi_clk);
-	div = clk_rate / xfer->speed_hz;
-	if (div < 2)
-		div = 2;
+	div = max(2U, clk_rate / xfer->speed_hz);
+
 	clk_sel = (div / 2) - 1;
-	pspim->xfer_conf &= SP7021_CLK_MASK;
+	pspim->xfer_conf &= ~SP7021_CLK_MASK;
 	pspim->xfer_conf |= FIELD_PREP(SP7021_CLK_MASK, clk_sel);
 	writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
 }
@@ -313,7 +314,6 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp
 	unsigned int xfer_cnt, xfer_len, last_len;
 	unsigned int i, len_temp;
 	u32 reg_temp;
-	int ret;
 
 	xfer_cnt = xfer->len / SP7021_SPI_DATA_SIZE;
 	last_len = xfer->len % SP7021_SPI_DATA_SIZE;
@@ -366,9 +366,8 @@ static int sp7021_spi_master_transfer_one(struct spi_controller *ctlr, struct sp
 			writel(pspim->xfer_conf, pspim->m_base + SP7021_SPI_CONFIG_REG);
 
 		mutex_unlock(&pspim->buf_lock);
-		ret = 0;
 	}
-	return ret;
+	return 0;
 }
 
 static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
@@ -376,12 +375,12 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi
 {
 	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
 	struct device *dev = pspim->dev;
-	int mode, ret = 0;
+	int mode, ret;
 
 	mode = SP7021_SPI_IDLE;
 	if (xfer->tx_buf && xfer->rx_buf) {
 		dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__);
-		ret = -EINVAL;
+		return -EINVAL;
 	} else if (xfer->tx_buf) {
 		xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf,
 					      xfer->len, DMA_TO_DEVICE);
@@ -445,7 +444,7 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev)
 		ctlr = devm_spi_alloc_master(dev, sizeof(*pspim));
 	if (!ctlr)
 		return -ENOMEM;
-	device_set_node(&ctlr->dev, pdev->dev.fwnode);
+	device_set_node(&ctlr->dev, dev_fwnode(dev));
 	ctlr->bus_num = pdev->id;
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
 	ctlr->auto_runtime_pm = true;
@@ -488,16 +487,6 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev)
 	if (pspim->s_irq < 0)
 		return pspim->s_irq;
 
-	ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq,
-			       IRQF_TRIGGER_RISING, pdev->name, pspim);
-	if (ret)
-		return ret;
-
-	ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq,
-			       IRQF_TRIGGER_RISING, pdev->name, pspim);
-	if (ret)
-		return ret;
-
 	pspim->spi_clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(pspim->spi_clk))
 		return dev_err_probe(dev, PTR_ERR(pspim->spi_clk), "clk get fail\n");
@@ -522,6 +511,16 @@ static int sp7021_spi_controller_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ret = devm_request_irq(dev, pspim->m_irq, sp7021_spi_master_irq,
+			       IRQF_TRIGGER_RISING, pdev->name, pspim);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(dev, pspim->s_irq, sp7021_spi_slave_irq,
+			       IRQF_TRIGGER_RISING, pdev->name, pspim);
+	if (ret)
+		return ret;
+
 	pm_runtime_enable(dev);
 	ret = spi_register_controller(ctlr);
 	if (ret) {
-- 
GitLab


From d08de0259dfe172caf073b921c6b27ff089605a9 Mon Sep 17 00:00:00 2001
From: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Date: Sat, 29 Jan 2022 01:04:53 -0300
Subject: [PATCH 0291/1586] spi: ath79: add mem_ops for fast-read

Reading from memory is 3x faster than bit-bang read operation. Also,
for tl-wr2543nd, the bit-bang read was sporadically returning random
data, possibly a HW defect, while fast-read works as expected.

Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Link: https://lore.kernel.org/r/20220129040453.8476-1-luizluca@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index d1e287d2d9cdc..607e7a49fb89f 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/bitops.h>
 #include <linux/clk.h>
@@ -133,6 +134,38 @@ static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs,
 	return ath79_spi_rr(sp, AR71XX_SPI_REG_RDS);
 }
 
+static int ath79_exec_mem_op(struct spi_mem *mem,
+			     const struct spi_mem_op *op)
+{
+	struct ath79_spi *sp = ath79_spidev_to_sp(mem->spi);
+
+	/* Ensures that reading is performed on device connected to hardware cs0 */
+	if (mem->spi->chip_select || mem->spi->cs_gpiod)
+		return -ENOTSUPP;
+
+	/* Only use for fast-read op. */
+	if (op->cmd.opcode != 0x0b || op->data.dir != SPI_MEM_DATA_IN ||
+	    op->addr.nbytes != 3 || op->dummy.nbytes != 1)
+		return -ENOTSUPP;
+
+	/* disable GPIO mode */
+	ath79_spi_wr(sp, AR71XX_SPI_REG_FS, 0);
+
+	memcpy_fromio(op->data.buf.in, sp->base + op->addr.val, op->data.nbytes);
+
+	/* enable GPIO mode */
+	ath79_spi_wr(sp, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO);
+
+	/* restore IOC register */
+	ath79_spi_wr(sp, AR71XX_SPI_REG_IOC, sp->ioc_base);
+
+	return 0;
+}
+
+static const struct spi_controller_mem_ops ath79_mem_ops = {
+	.exec_op = ath79_exec_mem_op,
+};
+
 static int ath79_spi_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
@@ -154,6 +187,7 @@ static int ath79_spi_probe(struct platform_device *pdev)
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	master->flags = SPI_MASTER_GPIO_SS;
 	master->num_chipselect = 3;
+	master->mem_ops = &ath79_mem_ops;
 
 	sp->bitbang.master = master;
 	sp->bitbang.chipselect = ath79_spi_chipselect;
-- 
GitLab


From 4f92724d4b92c024e721063f520d66e11ca4b54b Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Fri, 28 Jan 2022 16:52:38 +0000
Subject: [PATCH 0292/1586] spi: tegra114: Add missing IRQ check in
 tegra_spi_probe

This func misses checking for platform_get_irq()'s call and may passes the
negative error codes to request_threaded_irq(), which takes unsigned IRQ #,
causing it to fail with -EINVAL, overriding an original error code.
Stop calling request_threaded_irq() with invalid IRQ #s.

Fixes: f333a331adfa ("spi/tegra114: add spi driver")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://lore.kernel.org/r/20220128165238.25615-1-linmq006@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra114.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index e9de1d958bbd2..8f345247a8c32 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev)
 	tspi->phys = r->start;
 
 	spi_irq = platform_get_irq(pdev, 0);
+	if (spi_irq < 0) {
+		ret = spi_irq;
+		goto exit_free_master;
+	}
 	tspi->irq = spi_irq;
 
 	tspi->clk = devm_clk_get(&pdev->dev, "spi");
-- 
GitLab


From 3b8ab4da3405d07ddfe434f17f9014740b30a19c Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Mon, 7 Feb 2022 09:46:34 +0800
Subject: [PATCH 0293/1586] spi: Fix test error for sp7021.

Remove the include path and modify parameters for fix error for bt binding test

Fixes: a708078eeb99  ("spi: Add Sunplus SP7021 schema")
Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/b8a94fbfcab68b1279b09b6297099310c209927b.1644198244.git.lhjeff911@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-sunplus-sp7021.yaml         | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
index 38589fdbc80db..298eac28c40ff 100644
--- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
@@ -59,8 +59,6 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/sp-sp7021.h>
-    #include <dt-bindings/reset/sp-sp7021.h>
     #include <dt-bindings/interrupt-controller/irq.h>
     spi@9C002D80 {
         compatible = "sunplus,sp7021-spi";
@@ -73,8 +71,8 @@ examples:
         interrupts = <144 IRQ_TYPE_LEVEL_HIGH>,
                      <146 IRQ_TYPE_LEVEL_HIGH>,
                      <145 IRQ_TYPE_LEVEL_HIGH>;
-        clocks = <&clkc SPI_COMBO_0>;
-        resets = <&rstc RST_SPI_COMBO_0>;
+        clocks = <&clkc 0x32>;
+        resets = <&rstc 0x22>;
         pinctrl-names = "default";
         pinctrl-0 = <&pins_spi0>;
     };
-- 
GitLab


From 47c3e06ed95aa9b74932dbc6b23b544f644faf84 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Fri, 28 Jan 2022 16:59:56 +0000
Subject: [PATCH 0294/1586] spi: tegra210-quad: Fix missin IRQ check in
 tegra_qspi_probe

This func misses checking for platform_get_irq()'s call and may passes the
negative error codes to request_threaded_irq(), which takes unsigned IRQ #,
causing it to fail with -EINVAL, overriding an original error code.
Stop calling request_threaded_irq() with invalid IRQ #s.

Fixes: 921fc1838fb0 ("spi: tegra210-quad: Add support for Tegra210 QSPI controller")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://lore.kernel.org/r/20220128165956.27821-1-linmq006@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index ce1bdb4767ea3..cb00ac2fc7d8e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1240,6 +1240,8 @@ static int tegra_qspi_probe(struct platform_device *pdev)
 
 	tqspi->phys = r->start;
 	qspi_irq = platform_get_irq(pdev, 0);
+	if (qspi_irq < 0)
+		return qspi_irq;
 	tqspi->irq = qspi_irq;
 
 	tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
-- 
GitLab


From 3301bc53358a0eb0a0db65fd7a513cd4cb50c83a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 10 Jan 2022 15:29:45 +0800
Subject: [PATCH 0295/1586] lib/sbitmap: kill 'depth' from sbitmap_word

Only the last sbitmap_word can have different depth, and all the others
must have same depth of 1U << sb->shift, so not necessary to store it in
sbitmap_word, and it can be retrieved easily and efficiently by adding
one internal helper of __map_depth(sb, index).

Remove 'depth' field from sbitmap_word, then the annotation of
____cacheline_aligned_in_smp for 'word' isn't needed any more.

Not see performance effect when running high parallel IOPS test on
null_blk.

This way saves us one cacheline(usually 64 words) per each sbitmap_word.

Cc: Martin Wilck <martin.wilck@suse.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/20220110072945.347535-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 17 ++++++++++-------
 lib/sbitmap.c           | 34 ++++++++++++++--------------------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 95df357ec0093..df3b584b0f0cb 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -27,15 +27,10 @@ struct seq_file;
  * struct sbitmap_word - Word in a &struct sbitmap.
  */
 struct sbitmap_word {
-	/**
-	 * @depth: Number of bits being used in @word/@cleared
-	 */
-	unsigned long depth;
-
 	/**
 	 * @word: word holding free bits
 	 */
-	unsigned long word ____cacheline_aligned_in_smp;
+	unsigned long word;
 
 	/**
 	 * @cleared: word holding cleared bits
@@ -164,6 +159,14 @@ struct sbitmap_queue {
 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		      gfp_t flags, int node, bool round_robin, bool alloc_hint);
 
+/* sbitmap internal helper */
+static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
+{
+	if (index == sb->map_nr - 1)
+		return sb->depth - (index << sb->shift);
+	return 1U << sb->shift;
+}
+
 /**
  * sbitmap_free() - Free memory used by a &struct sbitmap.
  * @sb: Bitmap to free.
@@ -251,7 +254,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
 	while (scanned < sb->depth) {
 		unsigned long word;
 		unsigned int depth = min_t(unsigned int,
-					   sb->map[index].depth - nr,
+					   __map_depth(sb, index) - nr,
 					   sb->depth - scanned);
 
 		scanned += depth;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 09d293c30fd2d..b7cb96ae4701b 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -85,7 +85,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		      bool alloc_hint)
 {
 	unsigned int bits_per_word;
-	unsigned int i;
 
 	if (shift < 0)
 		shift = sbitmap_calculate_shift(depth);
@@ -117,10 +116,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < sb->map_nr; i++) {
-		sb->map[i].depth = min(depth, bits_per_word);
-		depth -= sb->map[i].depth;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sbitmap_init_node);
@@ -135,11 +130,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
 
 	sb->depth = depth;
 	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
-
-	for (i = 0; i < sb->map_nr; i++) {
-		sb->map[i].depth = min(depth, bits_per_word);
-		depth -= sb->map[i].depth;
-	}
 }
 EXPORT_SYMBOL_GPL(sbitmap_resize);
 
@@ -184,8 +174,8 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
 	int nr;
 
 	do {
-		nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint,
-					!sb->round_robin);
+		nr = __sbitmap_get_word(&map->word, __map_depth(sb, index),
+					alloc_hint, !sb->round_robin);
 		if (nr != -1)
 			break;
 		if (!sbitmap_deferred_clear(map))
@@ -257,7 +247,9 @@ static int __sbitmap_get_shallow(struct sbitmap *sb,
 	for (i = 0; i < sb->map_nr; i++) {
 again:
 		nr = __sbitmap_get_word(&sb->map[index].word,
-					min(sb->map[index].depth, shallow_depth),
+					min_t(unsigned int,
+					      __map_depth(sb, index),
+					      shallow_depth),
 					SB_NR_TO_BIT(sb, alloc_hint), true);
 		if (nr != -1) {
 			nr += index << sb->shift;
@@ -315,11 +307,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
 
 	for (i = 0; i < sb->map_nr; i++) {
 		const struct sbitmap_word *word = &sb->map[i];
+		unsigned int word_depth = __map_depth(sb, i);
 
 		if (set)
-			weight += bitmap_weight(&word->word, word->depth);
+			weight += bitmap_weight(&word->word, word_depth);
 		else
-			weight += bitmap_weight(&word->cleared, word->depth);
+			weight += bitmap_weight(&word->cleared, word_depth);
 	}
 	return weight;
 }
@@ -367,7 +360,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
 	for (i = 0; i < sb->map_nr; i++) {
 		unsigned long word = READ_ONCE(sb->map[i].word);
 		unsigned long cleared = READ_ONCE(sb->map[i].cleared);
-		unsigned int word_bits = READ_ONCE(sb->map[i].depth);
+		unsigned int word_bits = __map_depth(sb, i);
 
 		word &= ~cleared;
 
@@ -531,15 +524,16 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 	for (i = 0; i < sb->map_nr; i++) {
 		struct sbitmap_word *map = &sb->map[index];
 		unsigned long get_mask;
+		unsigned int map_depth = __map_depth(sb, index);
 
 		sbitmap_deferred_clear(map);
-		if (map->word == (1UL << (map->depth - 1)) - 1)
+		if (map->word == (1UL << (map_depth - 1)) - 1)
 			continue;
 
-		nr = find_first_zero_bit(&map->word, map->depth);
-		if (nr + nr_tags <= map->depth) {
+		nr = find_first_zero_bit(&map->word, map_depth);
+		if (nr + nr_tags <= map_depth) {
 			atomic_long_t *ptr = (atomic_long_t *) &map->word;
-			int map_tags = min_t(int, nr_tags, map->depth);
+			int map_tags = min_t(int, nr_tags, map_depth);
 			unsigned long val, ret;
 
 			get_mask = ((1UL << map_tags) - 1) << nr;
-- 
GitLab


From 3f607293b74d6acb06571a774a500143c1f0ed2c Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 8 Feb 2022 20:07:04 +0800
Subject: [PATCH 0296/1586] sbitmap: Delete old sbitmap_queue_get_shallow()

Since __sbitmap_queue_get_shallow() was introduced in commit c05e66733788
("sbitmap: add sbitmap_get_shallow() operation"), it has not been used.

Delete __sbitmap_queue_get_shallow() and rename public
__sbitmap_queue_get_shallow() -> sbitmap_queue_get_shallow() as it is odd
to have public __foo but no foo at all.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1644322024-105340-1-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c      |  2 +-
 include/linux/sbitmap.h | 34 ++++------------------------------
 lib/sbitmap.c           |  6 +++---
 3 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 845f74e8dd7bb..0fd409b8e86ef 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -107,7 +107,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 		return BLK_MQ_NO_TAG;
 
 	if (data->shallow_depth)
-		return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
+		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
 	else
 		return __sbitmap_queue_get(bt);
 }
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index df3b584b0f0cb..dffeb8281c2d9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -135,7 +135,7 @@ struct sbitmap_queue {
 
 	/**
 	 * @min_shallow_depth: The minimum shallow depth which may be passed to
-	 * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+	 * sbitmap_queue_get_shallow()
 	 */
 	unsigned int min_shallow_depth;
 };
@@ -463,7 +463,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 					unsigned int *offset);
 
 /**
- * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
+ * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
  * sbitmap_queue, limiting the depth used from each word, with preemption
  * already disabled.
  * @sbq: Bitmap queue to allocate from.
@@ -475,8 +475,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
  *
  * Return: Non-negative allocated bit number if successful, -1 otherwise.
  */
-int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
-				unsigned int shallow_depth);
+int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+			      unsigned int shallow_depth);
 
 /**
  * sbitmap_queue_get() - Try to allocate a free bit from a &struct
@@ -498,32 +498,6 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
 	return nr;
 }
 
-/**
- * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
- * sbitmap_queue, limiting the depth used from each word.
- * @sbq: Bitmap queue to allocate from.
- * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
- *       sbitmap_queue_clear()).
- * @shallow_depth: The maximum number of bits to allocate from a single word.
- * See sbitmap_get_shallow().
- *
- * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
- * initializing @sbq.
- *
- * Return: Non-negative allocated bit number if successful, -1 otherwise.
- */
-static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
-					    unsigned int *cpu,
-					    unsigned int shallow_depth)
-{
-	int nr;
-
-	*cpu = get_cpu();
-	nr = __sbitmap_queue_get_shallow(sbq, shallow_depth);
-	put_cpu();
-	return nr;
-}
-
 /**
  * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
  * minimum shallow depth that will be used.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index b7cb96ae4701b..2eb3de18ded3e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -557,14 +557,14 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 	return 0;
 }
 
-int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
-				unsigned int shallow_depth)
+int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+			      unsigned int shallow_depth)
 {
 	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
 
 	return sbitmap_get_shallow(&sbq->sb, shallow_depth);
 }
-EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow);
 
 void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
 				     unsigned int min_shallow_depth)
-- 
GitLab


From adbb8a1edecda677c5f031b44da25680a08a163e Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Fri, 24 Dec 2021 16:13:31 +0000
Subject: [PATCH 0297/1586] perf/arm-ccn: Use platform_get_irq() to get the
 interrupt

platform_get_resource(pdev, IORESOURCE_IRQ, ..) relies on static
allocation of IRQ resources in DT core code, this causes an issue
when using hierarchical interrupt domains using "interrupts" property
in the node as this bypasses the hierarchical setup and messes up the
irq chaining.

In preparation for removal of static setup of IRQ resource from DT core
code use platform_get_irq().

Link: https://lore.kernel.org/r/20211224161334.31123-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-ccn.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index a96c316045459..40b352e8aa7f7 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1460,8 +1460,7 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id)
 static int arm_ccn_probe(struct platform_device *pdev)
 {
 	struct arm_ccn *ccn;
-	struct resource *res;
-	unsigned int irq;
+	int irq;
 	int err;
 
 	ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
@@ -1474,10 +1473,9 @@ static int arm_ccn_probe(struct platform_device *pdev)
 	if (IS_ERR(ccn->base))
 		return PTR_ERR(ccn->base);
 
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res)
-		return -EINVAL;
-	irq = res->start;
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
 
 	/* Check if we can use the interrupt */
 	writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
-- 
GitLab


From e564518b0727c8960942d8b1452703bbabf1a5ec Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 12 Jan 2022 15:00:47 +0100
Subject: [PATCH 0298/1586] perf: MARVELL_CN10K_TAD_PMU should depend on
 ARCH_THUNDER

The Marvell CN10K Last-Level cache Tag-and-data Units (LLC-TAD)
performance monitor is only present on Marvell CN10K SoCs.  Hence add a
dependency on ARCH_THUNDER, to prevent asking the user about this driver
when configuring a kernel without Cavium Thunder (incl. Marvell CN10K)
SoC support.

Fixes: 036a7584bede ("drivers: perf: Add LLC-TAD perf counter support")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/b4662a2c767d04cca19417e0c845edea2da262ad.1641995941.git.geert+renesas@glider.be
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e1a0c44bc6864..7d6ffdf44a415 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -141,7 +141,7 @@ config ARM_DMC620_PMU
 
 config MARVELL_CN10K_TAD_PMU
 	tristate "Marvell CN10K LLC-TAD PMU"
-	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
 	help
 	  Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
 	  performance monitors on CN10K family silicons.
-- 
GitLab


From 6f75217b20a768c72fb8bb999e25a95673fe0174 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 3 Feb 2022 18:01:18 +0000
Subject: [PATCH 0299/1586] perf/arm-cmn: Make arm_cmn_debugfs static

Indeed our debugfs directory is driver-internal so should be static.

Link: https://lore.kernel.org/r/202202030812.II1K2ZXf-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/ca9248caaae69b5134f69e085fe78905dfe74378.1643911278.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 0e48adce57ef3..d0e1ce2c83bab 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -353,7 +353,7 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
 	return NULL;
 }
 
-struct dentry *arm_cmn_debugfs;
+static struct dentry *arm_cmn_debugfs;
 
 #ifdef CONFIG_DEBUG_FS
 static const char *arm_cmn_device_type(u8 type)
-- 
GitLab


From 8c0c56879d067ad1e68c0aeeecc7d5f57bac3ffd Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 8 Feb 2022 15:12:28 +0000
Subject: [PATCH 0300/1586] perf/marvell_cn10k: Fix unused variable warning
 when W=1 and CONFIG_OF=n

The kbuild helpfully reports that the Marvell CN10K TAD PMU driver emits
a warning when building with W=1 and CONFIG_OF=n:

  | >> drivers/perf/marvell_cn10k_tad_pmu.c:371:34: warning: unused variable 'tad_pmu_of_match' [-Wunused-const-variable]
       static const struct of_device_id tad_pmu_of_match[] = {

Guard the match table with CONFIG_OF to squash the warning.

Link: https://lore.kernel.org/r/202201292349.zRQLcDDD-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/marvell_cn10k_tad_pmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 7f4d292658e32..ee67305f822d0 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -368,10 +368,12 @@ static int tad_pmu_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id tad_pmu_of_match[] = {
 	{ .compatible = "marvell,cn10k-tad-pmu", },
 	{},
 };
+#endif
 
 static struct platform_driver tad_pmu_driver = {
 	.driver         = {
-- 
GitLab


From 602c873eb52e5717057eb0971258c8eb7440c4a5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 1 Feb 2022 20:10:01 +0100
Subject: [PATCH 0301/1586] perf: Replace acpi_bus_get_device()

Replace acpi_bus_get_device() that is going to be dropped with
acpi_fetch_acpi_dev().

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/10025610.nUPlyArG6x@kreacher
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/thunderx2_pmu.c | 6 ++----
 drivers/perf/xgene_pmu.c     | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 05378c0fd8f32..1edb9c03704fd 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -887,13 +887,11 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
 static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level,
 				    void *data, void **return_value)
 {
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
 	struct tx2_uncore_pmu *tx2_pmu;
-	struct acpi_device *adev;
 	enum tx2_uncore_type type;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
 		return AE_OK;
 
 	type = get_tx2_pmu_type(adev);
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 2b6d476bd2137..5283608dc055b 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1549,14 +1549,12 @@ static const struct acpi_device_id *xgene_pmu_acpi_match_type(
 static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
 				    void *data, void **return_value)
 {
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
 	const struct acpi_device_id *acpi_id;
 	struct xgene_pmu *xgene_pmu = data;
 	struct xgene_pmu_dev_ctx *ctx;
-	struct acpi_device *adev;
 
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
 		return AE_OK;
 
 	acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
-- 
GitLab


From 82980b1622d97017053c6792382469d7dc26a486 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 16 Feb 2021 15:04:34 +0000
Subject: [PATCH 0302/1586] rcu: Kill rnp->ofl_seq and use only
 rcu_state.ofl_lock for exclusion

If we allow architectures to bring APs online in parallel, then we end
up requiring rcu_cpu_starting() to be reentrant. But currently, the
manipulation of rnp->ofl_seq is not thread-safe.

However, rnp->ofl_seq is also fairly much pointless anyway since both
rcu_cpu_starting() and rcu_report_dead() hold rcu_state.ofl_lock for
fairly much the whole time that rnp->ofl_seq is set to an odd number
to indicate that an operation is in progress.

So drop rnp->ofl_seq completely, and use only rcu_state.ofl_lock.

This has a couple of minor complexities: lockdep will complain when we
take rcu_state.ofl_lock, and currently accepts the 'excuse' of having
an odd value in rnp->ofl_seq. So switch it to an arch_spinlock_t to
avoid that false positive complaint. Since we're killing rnp->ofl_seq
of course that 'excuse' has to be changed too, so make it check for
arch_spin_is_locked(rcu_state.ofl_lock).

There's no arch_spin_lock_irqsave() so we have to manually save and
restore local interrupts around the locking.

At Paul's request based on Neeraj's analysis, make rcu_gp_init not just
wait but *exclude* any CPU online/offline activity, which was fairly
much true already by virtue of it holding rcu_state.ofl_lock.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 71 ++++++++++++++++++++++++-----------------------
 kernel/rcu/tree.h |  4 +--
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0b..73a4c9d07b865 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -91,7 +91,7 @@ static struct rcu_state rcu_state = {
 	.abbr = RCU_ABBR,
 	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
 	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
-	.ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
+	.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
 };
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -1175,7 +1175,15 @@ bool rcu_lockdep_current_cpu_online(void)
 	preempt_disable_notrace();
 	rdp = this_cpu_ptr(&rcu_data);
 	rnp = rdp->mynode;
-	if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
+	/*
+	 * Strictly, we care here about the case where the current CPU is
+	 * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
+	 * not being up to date. So arch_spin_is_locked() might have a
+	 * false positive if it's held by some *other* CPU, but that's
+	 * OK because that just means a false *negative* on the warning.
+	 */
+	if (rdp->grpmask & rcu_rnp_online_cpus(rnp) ||
+	    arch_spin_is_locked(&rcu_state.ofl_lock))
 		ret = true;
 	preempt_enable_notrace();
 	return ret;
@@ -1739,7 +1747,6 @@ static void rcu_strict_gp_boundary(void *unused)
  */
 static noinline_for_stack bool rcu_gp_init(void)
 {
-	unsigned long firstseq;
 	unsigned long flags;
 	unsigned long oldmask;
 	unsigned long mask;
@@ -1782,22 +1789,17 @@ static noinline_for_stack bool rcu_gp_init(void)
 	 * of RCU's Requirements documentation.
 	 */
 	WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
+	/* Exclude CPU hotplug operations. */
 	rcu_for_each_leaf_node(rnp) {
-		// Wait for CPU-hotplug operations that might have
-		// started before this grace period did.
-		smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
-		firstseq = READ_ONCE(rnp->ofl_seq);
-		if (firstseq & 0x1)
-			while (firstseq == READ_ONCE(rnp->ofl_seq))
-				schedule_timeout_idle(1);  // Can't wake unless RCU is watching.
-		smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
-		raw_spin_lock(&rcu_state.ofl_lock);
-		raw_spin_lock_irq_rcu_node(rnp);
+		local_irq_save(flags);
+		arch_spin_lock(&rcu_state.ofl_lock);
+		raw_spin_lock_rcu_node(rnp);
 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
 		    !rnp->wait_blkd_tasks) {
 			/* Nothing to do on this leaf rcu_node structure. */
-			raw_spin_unlock_irq_rcu_node(rnp);
-			raw_spin_unlock(&rcu_state.ofl_lock);
+			raw_spin_unlock_rcu_node(rnp);
+			arch_spin_unlock(&rcu_state.ofl_lock);
+			local_irq_restore(flags);
 			continue;
 		}
 
@@ -1832,8 +1834,9 @@ static noinline_for_stack bool rcu_gp_init(void)
 				rcu_cleanup_dead_rnp(rnp);
 		}
 
-		raw_spin_unlock_irq_rcu_node(rnp);
-		raw_spin_unlock(&rcu_state.ofl_lock);
+		raw_spin_unlock_rcu_node(rnp);
+		arch_spin_unlock(&rcu_state.ofl_lock);
+		local_irq_restore(flags);
 	}
 	rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
 
@@ -4287,11 +4290,10 @@ void rcu_cpu_starting(unsigned int cpu)
 
 	rnp = rdp->mynode;
 	mask = rdp->grpmask;
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+	local_irq_save(flags);
+	arch_spin_lock(&rcu_state.ofl_lock);
 	rcu_dynticks_eqs_online();
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	raw_spin_lock_rcu_node(rnp);
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
 	newcpu = !(rnp->expmaskinitnext & mask);
 	rnp->expmaskinitnext |= mask;
@@ -4304,15 +4306,18 @@ void rcu_cpu_starting(unsigned int cpu)
 
 	/* An incoming CPU should never be blocking a grace period. */
 	if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
+		/* rcu_report_qs_rnp() *really* wants some flags to restore */
+		unsigned long flags2;
+
+		local_irq_save(flags2);
 		rcu_disable_urgency_upon_qs(rdp);
 		/* Report QS -after- changing ->qsmaskinitnext! */
-		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
+		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
 	} else {
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		raw_spin_unlock_rcu_node(rnp);
 	}
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+	arch_spin_unlock(&rcu_state.ofl_lock);
+	local_irq_restore(flags);
 	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
 
@@ -4326,7 +4331,7 @@ void rcu_cpu_starting(unsigned int cpu)
  */
 void rcu_report_dead(unsigned int cpu)
 {
-	unsigned long flags;
+	unsigned long flags, seq_flags;
 	unsigned long mask;
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
@@ -4340,10 +4345,8 @@ void rcu_report_dead(unsigned int cpu)
 
 	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
 	mask = rdp->grpmask;
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	raw_spin_lock(&rcu_state.ofl_lock);
+	local_irq_save(seq_flags);
+	arch_spin_lock(&rcu_state.ofl_lock);
 	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
 	rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
 	rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4354,10 +4357,8 @@ void rcu_report_dead(unsigned int cpu)
 	}
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	raw_spin_unlock(&rcu_state.ofl_lock);
-	smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
-	WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
-	WARN_ON_ONCE(rnp->ofl_seq & 0x1);
+	arch_spin_unlock(&rcu_state.ofl_lock);
+	local_irq_restore(seq_flags);
 
 	rdp->cpu_started = false;
 }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd085..4b4bcef8a9743 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,8 +56,6 @@ struct rcu_node {
 				/*  Initialized from ->qsmaskinitnext at the */
 				/*  beginning of each grace period. */
 	unsigned long qsmaskinitnext;
-	unsigned long ofl_seq;	/* CPU-hotplug operation sequence count. */
-				/* Online CPUs for next grace period. */
 	unsigned long expmask;	/* CPUs or groups that need to check in */
 				/*  to allow the current expedited GP */
 				/*  to complete. */
@@ -355,7 +353,7 @@ struct rcu_state {
 	const char *name;			/* Name of structure. */
 	char abbr;				/* Abbreviated name. */
 
-	raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
+	arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
 						/* Synchronize offline with */
 						/*  GP pre-initialization. */
 };
-- 
GitLab


From 0cabb47af3cfaeb6007ba3868379bbd4daee64cc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 10 Dec 2021 16:25:20 -0800
Subject: [PATCH 0303/1586] rcu: Refactor rcu_barrier() empty-list handling

This commit saves a few lines by checking first for an empty callback
list.  If the callback list is empty, then that CPU is taken care of,
regardless of its online or nocb state.  Also simplify tracing accordingly
and fold a few lines together.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/trace/events/rcu.h |  9 ++++-----
 kernel/rcu/tree.c          | 25 ++++++++-----------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 670e41783edd8..90b2fb0292cb1 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read,
  * Tracepoint for rcu_barrier() execution.  The string "s" describes
  * the rcu_barrier phase:
  *	"Begin": rcu_barrier() started.
+ *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"EarlyExit": rcu_barrier() piggybacked, thus early exit.
  *	"Inc1": rcu_barrier() piggyback check counter incremented.
- *	"OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks.
- *	"OnlineQ": rcu_barrier() found online CPU with callbacks.
- *	"OnlineNQ": rcu_barrier() found online CPU, no callbacks.
+ *	"Inc2": rcu_barrier() piggyback check counter incremented.
  *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
  *	"IRQNQ": An rcu_barrier_callback() callback found no callbacks.
- *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"LastCB": An rcu_barrier_callback() invoked the last callback.
- *	"Inc2": rcu_barrier() piggyback check counter incremented.
+ *	"NQ": rcu_barrier() found a CPU with no callbacks.
+ *	"OnlineQ": rcu_barrier() found online CPU with callbacks.
  * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
  * is the count of remaining callbacks, and "done" is the piggybacking count.
  */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 73a4c9d07b865..57a7a0065750b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4030,8 +4030,7 @@ void rcu_barrier(void)
 
 	/* Did someone else do our work for us? */
 	if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
-		rcu_barrier_trace(TPS("EarlyExit"), -1,
-				  rcu_state.barrier_sequence);
+		rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence);
 		smp_mb(); /* caller's subsequent code after above check. */
 		mutex_unlock(&rcu_state.barrier_mutex);
 		return;
@@ -4059,26 +4058,18 @@ void rcu_barrier(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		if (cpu_is_offline(cpu) &&
-		    !rcu_rdp_is_offloaded(rdp))
+		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
+			rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
 			continue;
-		if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) {
-			rcu_barrier_trace(TPS("OnlineQ"), cpu,
-					  rcu_state.barrier_sequence);
+		}
+		if (cpu_online(cpu)) {
+			rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
 			smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
-		} else if (rcu_segcblist_n_cbs(&rdp->cblist) &&
-			   cpu_is_offline(cpu)) {
-			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu,
-					  rcu_state.barrier_sequence);
+		} else {
+			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
 			local_irq_disable();
 			rcu_barrier_func((void *)cpu);
 			local_irq_enable();
-		} else if (cpu_is_offline(cpu)) {
-			rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu,
-					  rcu_state.barrier_sequence);
-		} else {
-			rcu_barrier_trace(TPS("OnlineNQ"), cpu,
-					  rcu_state.barrier_sequence);
 		}
 	}
 	cpus_read_unlock();
-- 
GitLab


From a16578dd5e3a44b53ca0699ac2971679dab97484 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 14 Dec 2021 13:15:18 -0800
Subject: [PATCH 0304/1586] rcu: Rework rcu_barrier() and callback-migration
 logic

This commit reworks rcu_barrier() and callback-migration logic to
permit allowing rcu_barrier() to run concurrently with CPU-hotplug
operations.  The key trick is for callback migration to check to see if
an rcu_barrier() is in flight, and, if so, enqueue the ->barrier_head
callback on its behalf.

This commit adds synchronization with RCU's CPU-hotplug notifiers.  Taken
together, this will permit a later commit to remove the cpus_read_lock()
and cpus_read_unlock() calls from rcu_barrier().

[ paulmck: Updated per kbuild test robot feedback. ]
[ paulmck: Updated per reviews session with Neeraj, Frederic, Uladzislau, and Boqun. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 77 +++++++++++++++++++++++++++++++++++++----------
 kernel/rcu/tree.h |  2 ++
 2 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 57a7a0065750b..004ff1c0d192f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3987,13 +3987,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
 }
 
 /*
- * Called with preemption disabled, and from cross-cpu IRQ context.
+ * If needed, entrain an rcu_barrier() callback on rdp->cblist.
  */
-static void rcu_barrier_func(void *cpu_in)
+static void rcu_barrier_entrain(struct rcu_data *rdp)
 {
-	uintptr_t cpu = (uintptr_t)cpu_in;
-	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
+	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
 
+	lockdep_assert_held(&rdp->barrier_lock);
+	if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
+		return;
 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
 	rdp->barrier_head.func = rcu_barrier_callback;
 	debug_rcu_head_queue(&rdp->barrier_head);
@@ -4003,10 +4006,26 @@ static void rcu_barrier_func(void *cpu_in)
 		atomic_inc(&rcu_state.barrier_cpu_count);
 	} else {
 		debug_rcu_head_unqueue(&rdp->barrier_head);
-		rcu_barrier_trace(TPS("IRQNQ"), -1,
-				  rcu_state.barrier_sequence);
+		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
 	}
 	rcu_nocb_unlock(rdp);
+	smp_store_release(&rdp->barrier_seq_snap, gseq);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_handler(void *cpu_in)
+{
+	uintptr_t cpu = (uintptr_t)cpu_in;
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+	lockdep_assert_irqs_disabled();
+	WARN_ON_ONCE(cpu != rdp->cpu);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+	raw_spin_lock(&rdp->barrier_lock);
+	rcu_barrier_entrain(rdp);
+	raw_spin_unlock(&rdp->barrier_lock);
 }
 
 /**
@@ -4020,6 +4039,8 @@ static void rcu_barrier_func(void *cpu_in)
 void rcu_barrier(void)
 {
 	uintptr_t cpu;
+	unsigned long flags;
+	unsigned long gseq;
 	struct rcu_data *rdp;
 	unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
 
@@ -4038,6 +4059,7 @@ void rcu_barrier(void)
 
 	/* Mark the start of the barrier operation. */
 	rcu_seq_start(&rcu_state.barrier_sequence);
+	gseq = rcu_state.barrier_sequence;
 	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
 
 	/*
@@ -4058,19 +4080,30 @@ void rcu_barrier(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
+retry:
+		if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
+			continue;
+		raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
 		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
+			WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
 			rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
 			continue;
 		}
-		if (cpu_online(cpu)) {
-			rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
-			smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
-		} else {
+		if (!rcu_rdp_cpu_online(rdp)) {
+			rcu_barrier_entrain(rdp);
+			WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
 			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
-			local_irq_disable();
-			rcu_barrier_func((void *)cpu);
-			local_irq_enable();
+			continue;
 		}
+		raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
+		if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
+			schedule_timeout_uninterruptible(1);
+			goto retry;
+		}
+		WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
+		rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
 	}
 	cpus_read_unlock();
 
@@ -4087,6 +4120,12 @@ void rcu_barrier(void)
 	/* Mark the end of the barrier operation. */
 	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
 	rcu_seq_end(&rcu_state.barrier_sequence);
+	gseq = rcu_state.barrier_sequence;
+	for_each_possible_cpu(cpu) {
+		rdp = per_cpu_ptr(&rcu_data, cpu);
+
+		WRITE_ONCE(rdp->barrier_seq_snap, gseq);
+	}
 
 	/* Other rcu_barrier() invocations can now safely proceed. */
 	mutex_unlock(&rcu_state.barrier_mutex);
@@ -4134,6 +4173,8 @@ rcu_boot_init_percpu_data(int cpu)
 	INIT_WORK(&rdp->strict_work, strict_work_handler);
 	WARN_ON_ONCE(rdp->dynticks_nesting != 1);
 	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
+	raw_spin_lock_init(&rdp->barrier_lock);
+	rdp->barrier_seq_snap = rcu_state.barrier_sequence;
 	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
 	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
 	rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
@@ -4284,8 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu)
 	local_irq_save(flags);
 	arch_spin_lock(&rcu_state.ofl_lock);
 	rcu_dynticks_eqs_online();
+	raw_spin_lock(&rdp->barrier_lock);
 	raw_spin_lock_rcu_node(rnp);
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
+	raw_spin_unlock(&rdp->barrier_lock);
 	newcpu = !(rnp->expmaskinitnext & mask);
 	rnp->expmaskinitnext |= mask;
 	/* Allow lockless access for expedited grace periods. */
@@ -4372,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
 	    rcu_segcblist_empty(&rdp->cblist))
 		return;  /* No callbacks to migrate. */
 
-	local_irq_save(flags);
+	raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
+	WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
+	rcu_barrier_entrain(rdp);
 	my_rdp = this_cpu_ptr(&rcu_data);
 	my_rnp = my_rdp->mynode;
 	rcu_nocb_lock(my_rdp); /* irqs already disabled. */
@@ -4382,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
 	needwake = rcu_advance_cbs(my_rnp, rdp) ||
 		   rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
+	raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */
 	needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_disable(&rdp->cblist);
-	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
-		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
+	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
 	if (rcu_rdp_is_offloaded(my_rdp)) {
 		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
 		__call_rcu_nocb_wake(my_rdp, true, flags);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4b4bcef8a9743..84362951ed9e1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -188,6 +188,8 @@ struct rcu_data {
 	bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */
 
 	/* 4) rcu_barrier(), OOM callbacks, and expediting. */
+	raw_spinlock_t barrier_lock;	/* Protects ->barrier_seq_snap. */
+	unsigned long barrier_seq_snap;	/* Snap of rcu_state.barrier_sequence. */
 	struct rcu_head barrier_head;
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
-- 
GitLab


From 80b3fd474c91b3ecfd845b4a0bfb58706b877ba5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 14 Dec 2021 13:35:17 -0800
Subject: [PATCH 0305/1586] rcu: Make rcu_barrier() no longer block CPU-hotplug
 operations

This commit removes the cpus_read_lock() and cpus_read_unlock() calls
from rcu_barrier(), thus allowing CPUs to come and go during the course
of rcu_barrier() execution.  Posting of the ->barrier_head callbacks does
synchronize with portions of RCU's CPU-hotplug notifiers, but these locks
are held for short time periods on both sides.  Thus, full CPU-hotplug
operations could both start and finish during the execution of a given
rcu_barrier() invocation.

Additional synchronization is provided by a global ->barrier_lock.
Since the ->barrier_lock is only used during rcu_barrier() execution and
during onlining/offlining a CPU, the contention for this lock should
be low.  It might be tempting to make use of a per-CPU lock just on
general principles, but straightforward attempts to do this have the
problems shown below.

Initial state: 3 CPUs present, CPU 0 and CPU1 do not have
any callback and CPU2 has callbacks.

1. CPU0 calls rcu_barrier().

2. CPU1 starts offlining for CPU2. CPU1 calls
   rcutree_migrate_callbacks(). rcu_barrier_entrain() is called
   from rcutree_migrate_callbacks(), with CPU2's rdp->barrier_lock.
   It does not entrain ->barrier_head for CPU2, as rcu_barrier()
   on CPU0 hasn't started the barrier sequence (by calling
   rcu_seq_start(&rcu_state.barrier_sequence)) yet.

3. CPU0 starts new barrier sequence. It iterates over
   CPU0 and CPU1, after acquiring their per-cpu ->barrier_lock
   and finds 0 segcblist length. It updates ->barrier_seq_snap
   for CPU0 and CPU1 and continues loop iteration to CPU2.

    for_each_possible_cpu(cpu) {
        raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
        if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
            WRITE_ONCE(rdp->barrier_seq_snap, gseq);
            raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
            rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
            continue;
        }

4. rcutree_migrate_callbacks() completes execution on CPU1.
   Segcblist len for CPU2 becomes 0.

5. The loop iteration on CPU0, checks rcu_segcblist_n_cbs(&rdp->cblist)
   for CPU2 and completes the loop iteration after setting
   ->barrier_seq_snap.

6. As there isn't any ->barrier_head callback entrained; at
   this point, rcu_barrier() in CPU0 returns.

7. The callbacks, which migrated from CPU2 to CPU1, execute.

Straightforward per-CPU locking is also subject to the following race
condition noted by Boqun Feng:

1. CPU0 calls rcu_barrier(), starting a new barrier sequence by invoking
   rcu_seq_start() and init_completion(), but does not yet initialize
   rcu_state.barrier_cpu_count.

2. CPU1 starts offlining for CPU2, calling rcutree_migrate_callbacks(),
   which in turn calls rcu_barrier_entrain() holding CPU2's.
   rdp->barrier_lock.  It then entrains ->barrier_head for CPU2
   and atomically increments rcu_state.barrier_cpu_count, which is
   unfortunately not yet initialized to the value 2.

3. The just-entrained RCU callback is invoked.  It atomically
   decrements rcu_state.barrier_cpu_count and sees that it is
   now zero.  This callback therefore invokes complete().

4. CPU0 continues executing rcu_barrier(), but is not blocked
   by its call to wait_for_completion().  This results in rcu_barrier()
   returning before all pre-existing callbacks have been invoked,
   which is a bug.

Therefore, synchronization is provided by rcu_state.barrier_lock,
which is also held across the initialization sequence, especially the
rcu_seq_start() and the atomic_set() that sets rcu_state.barrier_cpu_count
to the value 2.  In addition, this lock is held when entraining the
rcu_barrier() callback, when deciding whether or not a CPU has callbacks
that rcu_barrier() must wait on, when setting the ->qsmaskinitnext for
incoming CPUs, and when migrating callbacks from a CPU that is going
offline.

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Co-developed-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 28 ++++++++++++++--------------
 kernel/rcu/tree.h |  3 ++-
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 004ff1c0d192f..2d70b91e3fbcc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -87,6 +87,7 @@ static struct rcu_state rcu_state = {
 	.gp_state = RCU_GP_IDLE,
 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
 	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
+	.barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock),
 	.name = RCU_NAME,
 	.abbr = RCU_ABBR,
 	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
@@ -3994,7 +3995,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
 	unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
 	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
 
-	lockdep_assert_held(&rdp->barrier_lock);
+	lockdep_assert_held(&rcu_state.barrier_lock);
 	if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
 		return;
 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
@@ -4023,9 +4024,9 @@ static void rcu_barrier_handler(void *cpu_in)
 	lockdep_assert_irqs_disabled();
 	WARN_ON_ONCE(cpu != rdp->cpu);
 	WARN_ON_ONCE(cpu != smp_processor_id());
-	raw_spin_lock(&rdp->barrier_lock);
+	raw_spin_lock(&rcu_state.barrier_lock);
 	rcu_barrier_entrain(rdp);
-	raw_spin_unlock(&rdp->barrier_lock);
+	raw_spin_unlock(&rcu_state.barrier_lock);
 }
 
 /**
@@ -4058,6 +4059,7 @@ void rcu_barrier(void)
 	}
 
 	/* Mark the start of the barrier operation. */
+	raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
 	rcu_seq_start(&rcu_state.barrier_sequence);
 	gseq = rcu_state.barrier_sequence;
 	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
@@ -4071,7 +4073,7 @@ void rcu_barrier(void)
 	 */
 	init_completion(&rcu_state.barrier_completion);
 	atomic_set(&rcu_state.barrier_cpu_count, 2);
-	cpus_read_lock();
+	raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
 
 	/*
 	 * Force each CPU with callbacks to register a new callback.
@@ -4083,21 +4085,21 @@ void rcu_barrier(void)
 retry:
 		if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
 			continue;
-		raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
+		raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
 		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
 			WRITE_ONCE(rdp->barrier_seq_snap, gseq);
-			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
+			raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
 			rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
 			continue;
 		}
 		if (!rcu_rdp_cpu_online(rdp)) {
 			rcu_barrier_entrain(rdp);
 			WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
-			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
+			raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
 			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
 			continue;
 		}
-		raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
+		raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
 		if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
 			schedule_timeout_uninterruptible(1);
 			goto retry;
@@ -4105,7 +4107,6 @@ retry:
 		WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
 		rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
 	}
-	cpus_read_unlock();
 
 	/*
 	 * Now that we have an rcu_barrier_callback() callback on each
@@ -4173,7 +4174,6 @@ rcu_boot_init_percpu_data(int cpu)
 	INIT_WORK(&rdp->strict_work, strict_work_handler);
 	WARN_ON_ONCE(rdp->dynticks_nesting != 1);
 	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
-	raw_spin_lock_init(&rdp->barrier_lock);
 	rdp->barrier_seq_snap = rcu_state.barrier_sequence;
 	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
 	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
@@ -4325,10 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu)
 	local_irq_save(flags);
 	arch_spin_lock(&rcu_state.ofl_lock);
 	rcu_dynticks_eqs_online();
-	raw_spin_lock(&rdp->barrier_lock);
+	raw_spin_lock(&rcu_state.barrier_lock);
 	raw_spin_lock_rcu_node(rnp);
 	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
-	raw_spin_unlock(&rdp->barrier_lock);
+	raw_spin_unlock(&rcu_state.barrier_lock);
 	newcpu = !(rnp->expmaskinitnext & mask);
 	rnp->expmaskinitnext |= mask;
 	/* Allow lockless access for expedited grace periods. */
@@ -4415,7 +4415,7 @@ void rcutree_migrate_callbacks(int cpu)
 	    rcu_segcblist_empty(&rdp->cblist))
 		return;  /* No callbacks to migrate. */
 
-	raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
+	raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
 	WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
 	rcu_barrier_entrain(rdp);
 	my_rdp = this_cpu_ptr(&rcu_data);
@@ -4427,7 +4427,7 @@ void rcutree_migrate_callbacks(int cpu)
 	needwake = rcu_advance_cbs(my_rnp, rdp) ||
 		   rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
-	raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */
+	raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */
 	needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
 	rcu_segcblist_disable(&rdp->cblist);
 	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 84362951ed9e1..a2d7ffd634cc1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -188,7 +188,6 @@ struct rcu_data {
 	bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */
 
 	/* 4) rcu_barrier(), OOM callbacks, and expediting. */
-	raw_spinlock_t barrier_lock;	/* Protects ->barrier_seq_snap. */
 	unsigned long barrier_seq_snap;	/* Snap of rcu_state.barrier_sequence. */
 	struct rcu_head barrier_head;
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
@@ -323,6 +322,8 @@ struct rcu_state {
 						/*  rcu_barrier(). */
 	/* End of fields guarded by barrier_mutex. */
 
+	raw_spinlock_t barrier_lock;		/* Protects ->barrier_seq_snap. */
+
 	struct mutex exp_mutex;			/* Serialize expedited GP. */
 	struct mutex exp_wake_mutex;		/* Serialize wakeup. */
 	unsigned long expedited_sequence;	/* Take a ticket. */
-- 
GitLab


From 5ae0f1b58b28b53f4ab3708ef9337a2665e79664 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 10 Dec 2021 13:44:17 -0800
Subject: [PATCH 0306/1586] rcu: Create and use an rcu_rdp_cpu_online()

The pattern "rdp->grpmask & rcu_rnp_online_cpus(rnp)" occurs frequently
in RCU code in order to determine whether rdp->cpu is online from an
RCU perspective.  This commit therefore creates an rcu_rdp_cpu_online()
function to replace it.

[ paulmck: Apply kernel test robot unused-variable feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        | 21 +++++++++++++--------
 kernel/rcu/tree_plugin.h |  6 ++----
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2d70b91e3fbcc..1d3507d563db1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -222,6 +222,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
 	return READ_ONCE(rnp->qsmaskinitnext);
 }
 
+/*
+ * Is the CPU corresponding to the specified rcu_data structure online
+ * from RCU's perspective?  This perspective is given by that structure's
+ * ->qsmaskinitnext field rather than by the global cpu_online_mask.
+ */
+static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
+{
+	return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
+}
+
 /*
  * Return true if an RCU grace period is in progress.  The READ_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -1168,14 +1178,12 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
 bool rcu_lockdep_current_cpu_online(void)
 {
 	struct rcu_data *rdp;
-	struct rcu_node *rnp;
 	bool ret = false;
 
 	if (in_nmi() || !rcu_scheduler_fully_active)
 		return true;
 	preempt_disable_notrace();
 	rdp = this_cpu_ptr(&rcu_data);
-	rnp = rdp->mynode;
 	/*
 	 * Strictly, we care here about the case where the current CPU is
 	 * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
@@ -1183,8 +1191,7 @@ bool rcu_lockdep_current_cpu_online(void)
 	 * false positive if it's held by some *other* CPU, but that's
 	 * OK because that just means a false *negative* on the warning.
 	 */
-	if (rdp->grpmask & rcu_rnp_online_cpus(rnp) ||
-	    arch_spin_is_locked(&rcu_state.ofl_lock))
+	if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock))
 		ret = true;
 	preempt_enable_notrace();
 	return ret;
@@ -1269,8 +1276,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * For more detail, please refer to the "Hotplug CPU" section
 	 * of RCU's Requirements documentation.
 	 */
-	if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {
-		bool onl;
+	if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) {
 		struct rcu_node *rnp1;
 
 		pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
@@ -1279,9 +1285,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
 			pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
 				__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
-		onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
 		pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
-			__func__, rdp->cpu, ".o"[onl],
+			__func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],
 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
 		return 1; /* Break things loose after complaining. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a15..d3db2168598ef 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt)
 		 * then queue the task as required based on the states
 		 * of any ongoing and expedited grace periods.
 		 */
-		WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
+		WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp));
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 		trace_rcu_preempt_task(rcu_state.name,
 				       t->pid,
@@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 	int cpu;
 	int i;
 	struct list_head *lhp;
-	bool onl;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp1;
 
@@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 	pr_cont("\n");
 	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
 		pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
-			cpu, ".o"[onl],
+			cpu, ".o"[rcu_rdp_cpu_online(rdp)],
 			(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
 			(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
 	}
-- 
GitLab


From 2bcd18e041fc3c2ae58f41eb5e18790c7c82c674 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 2 Feb 2022 15:42:36 -0800
Subject: [PATCH 0307/1586] rcu-tasks: Use order_base_2() instead of ilog2()

The ilog2() function can be used to generate a shift count, but it will
generate the same count for a power of two as for one greater than a power
of two.  This results in shift counts that are larger than necessary for
systems with a power-of-two number of CPUs because the CPUs are numbered
from zero, so that the maximum CPU number is one less than that power
of two.

This commit therefore substitutes order_base_2(), which appears to have
been designed for exactly this use case.

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d64f0b1d8cd3b..670c75cbcb98d 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -123,7 +123,7 @@ static struct rcu_tasks rt_name =							\
 	.call_func = call,								\
 	.rtpcpu = &rt_name ## __percpu,							\
 	.name = n,									\
-	.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1,				\
+	.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS),				\
 	.percpu_enqueue_lim = 1,							\
 	.percpu_dequeue_lim = 1,							\
 	.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex),		\
@@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 	if (unlikely(needadjust)) {
 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 		if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
-			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
+			WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
 			WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
 			smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
 			pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
@@ -417,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
 	if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) {
 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 		if (rtp->percpu_enqueue_lim > 1) {
-			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1);
+			WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
 			smp_store_release(&rtp->percpu_enqueue_lim, 1);
 			rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu();
 			pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name);
-- 
GitLab


From 00a8b4b54cd69d9f7ba1730d3b266469a778b1d7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 2 Feb 2022 16:34:40 -0800
Subject: [PATCH 0308/1586] rcu-tasks: Set ->percpu_enqueue_shift to zero upon
 contention

Currently, call_rcu_tasks_generic() sets ->percpu_enqueue_shift to
order_base_2(nr_cpu_ids) upon encountering sufficient contention.
This does not shift to use of non-CPU-0 callback queues as intended, but
rather continues using only CPU 0's queue.  Although this does provide
some decrease in contention due to spreading work over multiple locks,
it is not the dramatic decrease that was intended.

This commit therefore makes call_rcu_tasks_generic() set
->percpu_enqueue_shift to 0.

Reported-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 670c75cbcb98d..ac17348187e48 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 	if (unlikely(needadjust)) {
 		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 		if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
-			WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids));
+			WRITE_ONCE(rtp->percpu_enqueue_shift, 0);
 			WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids);
 			smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
 			pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
-- 
GitLab


From c6c89783eba05a5e159b07cfd8c68d841cc5de42 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 15:39:36 -0800
Subject: [PATCH 0309/1586] fscrypt: add functions for direct I/O support

Encrypted files traditionally haven't supported DIO, due to the need to
encrypt/decrypt the data.  However, when the encryption is implemented
using inline encryption (blk-crypto) instead of the traditional
filesystem-layer encryption, it is straightforward to support DIO.

In preparation for supporting this, add the following functions:

- fscrypt_dio_supported() checks whether a DIO request is supported as
  far as encryption is concerned.  Encrypted files will only support DIO
  when inline encryption is used and the I/O request is properly
  aligned; this function checks these preconditions.

- fscrypt_limit_io_blocks() limits the length of a bio to avoid crossing
  a place in the file that a bio with an encryption context cannot
  cross due to a DUN discontiguity.  This function is needed by
  filesystems that use the iomap DIO implementation (which operates
  directly on logical ranges, so it won't use fscrypt_mergeable_bio())
  and that support FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32.

Co-developed-by: Satya Tangirala <satyat@google.com>
Signed-off-by: Satya Tangirala <satyat@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220128233940.79464-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/crypto.c       |  8 ++++
 fs/crypto/inline_crypt.c | 93 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fscrypt.h  | 18 ++++++++
 3 files changed, 119 insertions(+)

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 4ef3f714046aa..4fcca79f39aeb 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -69,6 +69,14 @@ void fscrypt_free_bounce_page(struct page *bounce_page)
 }
 EXPORT_SYMBOL(fscrypt_free_bounce_page);
 
+/*
+ * Generate the IV for the given logical block number within the given file.
+ * For filenames encryption, lblk_num == 0.
+ *
+ * Keep this in sync with fscrypt_limit_io_blocks().  fscrypt_limit_io_blocks()
+ * needs to know about any IV generation methods where the low bits of IV don't
+ * simply contain the lblk_num (e.g., IV_INO_LBLK_32).
+ */
 void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 			 const struct fscrypt_info *ci)
 {
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index c57bebfa48fea..93c2ca8580923 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -17,6 +17,7 @@
 #include <linux/buffer_head.h>
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
+#include <linux/uio.h>
 
 #include "fscrypt_private.h"
 
@@ -315,6 +316,10 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh);
  *
  * fscrypt_set_bio_crypt_ctx() must have already been called on the bio.
  *
+ * This function isn't required in cases where crypto-mergeability is ensured in
+ * another way, such as I/O targeting only a single file (and thus a single key)
+ * combined with fscrypt_limit_io_blocks() to ensure DUN contiguity.
+ *
  * Return: true iff the I/O is mergeable
  */
 bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
@@ -363,3 +368,91 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio,
 	return fscrypt_mergeable_bio(bio, inode, next_lblk);
 }
 EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh);
+
+/**
+ * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is
+ *			     supported as far as encryption is concerned
+ * @iocb: the file and position the I/O is targeting
+ * @iter: the I/O data segment(s)
+ *
+ * Return: %true if there are no encryption constraints that prevent DIO from
+ *	   being supported; %false if DIO is unsupported.  (Note that in the
+ *	   %true case, the filesystem might have other, non-encryption-related
+ *	   constraints that prevent DIO from actually being supported.)
+ */
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+{
+	const struct inode *inode = file_inode(iocb->ki_filp);
+	const unsigned int blocksize = i_blocksize(inode);
+
+	/* If the file is unencrypted, no veto from us. */
+	if (!fscrypt_needs_contents_encryption(inode))
+		return true;
+
+	/* We only support DIO with inline crypto, not fs-layer crypto. */
+	if (!fscrypt_inode_uses_inline_crypto(inode))
+		return false;
+
+	/*
+	 * Since the granularity of encryption is filesystem blocks, the file
+	 * position and total I/O length must be aligned to the filesystem block
+	 * size -- not just to the block device's logical block size as is
+	 * traditionally the case for DIO on many filesystems.
+	 *
+	 * We require that the user-provided memory buffers be filesystem block
+	 * aligned too.  It is simpler to have a single alignment value required
+	 * for all properties of the I/O, as is normally the case for DIO.
+	 * Also, allowing less aligned buffers would imply that data units could
+	 * cross bvecs, which would greatly complicate the I/O stack, which
+	 * assumes that bios can be split at any bvec boundary.
+	 */
+	if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(fscrypt_dio_supported);
+
+/**
+ * fscrypt_limit_io_blocks() - limit I/O blocks to avoid discontiguous DUNs
+ * @inode: the file on which I/O is being done
+ * @lblk: the block at which the I/O is being started from
+ * @nr_blocks: the number of blocks we want to submit starting at @lblk
+ *
+ * Determine the limit to the number of blocks that can be submitted in a bio
+ * targeting @lblk without causing a data unit number (DUN) discontiguity.
+ *
+ * This is normally just @nr_blocks, as normally the DUNs just increment along
+ * with the logical blocks.  (Or the file is not encrypted.)
+ *
+ * In rare cases, fscrypt can be using an IV generation method that allows the
+ * DUN to wrap around within logically contiguous blocks, and that wraparound
+ * will occur.  If this happens, a value less than @nr_blocks will be returned
+ * so that the wraparound doesn't occur in the middle of a bio, which would
+ * cause encryption/decryption to produce wrong results.
+ *
+ * Return: the actual number of blocks that can be submitted
+ */
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks)
+{
+	const struct fscrypt_info *ci;
+	u32 dun;
+
+	if (!fscrypt_inode_uses_inline_crypto(inode))
+		return nr_blocks;
+
+	if (nr_blocks <= 1)
+		return nr_blocks;
+
+	ci = inode->i_crypt_info;
+	if (!(fscrypt_policy_flags(&ci->ci_policy) &
+	      FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32))
+		return nr_blocks;
+
+	/* With IV_INO_LBLK_32, the DUN can wrap around from U32_MAX to 0. */
+
+	dun = ci->ci_hashed_ino + lblk;
+
+	return min_t(u64, nr_blocks, (u64)U32_MAX + 1 - dun);
+}
+EXPORT_SYMBOL_GPL(fscrypt_limit_io_blocks);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 91ea9477e9bd2..50d92d805bd8c 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -714,6 +714,10 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 bool fscrypt_mergeable_bio_bh(struct bio *bio,
 			      const struct buffer_head *next_bh);
 
+bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter);
+
+u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks);
+
 #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
 
 static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
@@ -742,6 +746,20 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
 {
 	return true;
 }
+
+static inline bool fscrypt_dio_supported(struct kiocb *iocb,
+					 struct iov_iter *iter)
+{
+	const struct inode *inode = file_inode(iocb->ki_filp);
+
+	return !fscrypt_needs_contents_encryption(inode);
+}
+
+static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk,
+					  u64 nr_blocks)
+{
+	return nr_blocks;
+}
 #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
 
 /**
-- 
GitLab


From 489734ef94f4f78087e103ef1bd9019968ff8dbd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 15:39:37 -0800
Subject: [PATCH 0310/1586] iomap: support direct I/O with fscrypt using
 blk-crypto

Encrypted files traditionally haven't supported DIO, due to the need to
encrypt/decrypt the data.  However, when the encryption is implemented
using inline encryption (blk-crypto) instead of the traditional
filesystem-layer encryption, it is straightforward to support DIO.

Add support for this to the iomap DIO implementation by calling
fscrypt_set_bio_crypt_ctx() to set encryption contexts on the bios.

Don't check for the rare case where a DUN (crypto data unit number)
discontiguity creates a boundary that bios must not cross.  Instead,
filesystems are expected to handle this in ->iomap_begin() by limiting
the length of the mapping so that iomap doesn't have to worry about it.

Co-developed-by: Satya Tangirala <satyat@google.com>
Signed-off-by: Satya Tangirala <satyat@google.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220128233940.79464-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/iomap/direct-io.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 03ea367df19a4..20325b3926fa3 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/fs.h>
+#include <linux/fscrypt.h>
 #include <linux/pagemap.h>
 #include <linux/iomap.h>
 #include <linux/backing-dev.h>
@@ -179,11 +180,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 		loff_t pos, unsigned len)
 {
+	struct inode *inode = file_inode(dio->iocb->ki_filp);
 	struct page *page = ZERO_PAGE(0);
 	int flags = REQ_SYNC | REQ_IDLE;
 	struct bio *bio;
 
 	bio = bio_alloc(GFP_KERNEL, 1);
+	fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
+				  GFP_KERNEL);
 	bio_set_dev(bio, iter->iomap.bdev);
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
 	bio->bi_private = dio;
@@ -310,6 +314,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		}
 
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
+		fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
+					  GFP_KERNEL);
 		bio_set_dev(bio, iomap->bdev);
 		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
 		bio->bi_write_hint = dio->iocb->ki_hint;
-- 
GitLab


From 38ea50daa7a447dbcd7031f37a39a1baa163b2ab Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 15:39:38 -0800
Subject: [PATCH 0311/1586] ext4: support direct I/O with fscrypt using
 blk-crypto

Encrypted files traditionally haven't supported DIO, due to the need to
encrypt/decrypt the data.  However, when the encryption is implemented
using inline encryption (blk-crypto) instead of the traditional
filesystem-layer encryption, it is straightforward to support DIO.

Therefore, make ext4 support DIO on files that are using inline
encryption.  Since ext4 uses iomap for DIO, and fscrypt support was
already added to iomap DIO, this just requires two small changes:

- Let DIO proceed when supported, by checking fscrypt_dio_supported()
  instead of assuming that encrypted files never support DIO.

- In ext4_iomap_begin(), use fscrypt_limit_io_blocks() to limit the
  length of the mapping in the rare case where a DUN discontiguity
  occurs in the middle of an extent.  The iomap DIO implementation
  requires this, since it assumes that it can submit a bio covering (up
  to) the whole mapping, without checking fscrypt constraints itself.

Co-developed-by: Satya Tangirala <satyat@google.com>
Signed-off-by: Satya Tangirala <satyat@google.com>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Link: https://lore.kernel.org/r/20220128233940.79464-4-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/ext4/file.c  | 10 ++++++----
 fs/ext4/inode.c |  7 +++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8cc11715518ac..8bd66cdc41be2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -36,9 +36,11 @@
 #include "acl.h"
 #include "truncate.h"
 
-static bool ext4_dio_supported(struct inode *inode)
+static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
 {
-	if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	if (!fscrypt_dio_supported(iocb, iter))
 		return false;
 	if (fsverity_active(inode))
 		return false;
@@ -61,7 +63,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		inode_lock_shared(inode);
 	}
 
-	if (!ext4_dio_supported(inode)) {
+	if (!ext4_dio_supported(iocb, to)) {
 		inode_unlock_shared(inode);
 		/*
 		 * Fallback to buffered I/O if the operation being performed on
@@ -509,7 +511,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	/* Fallback to buffered I/O if the inode does not support direct I/O. */
-	if (!ext4_dio_supported(inode)) {
+	if (!ext4_dio_supported(iocb, from)) {
 		if (ilock_shared)
 			inode_unlock_shared(inode);
 		else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 01c9e4f743ba9..4cf55ef54193a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3409,6 +3409,13 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	if (ret < 0)
 		return ret;
 out:
+	/*
+	 * When inline encryption is enabled, sometimes I/O to an encrypted file
+	 * has to be broken up to guarantee DUN contiguity.  Handle this by
+	 * limiting the length of the mapping returned.
+	 */
+	map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
+
 	ext4_set_iomap(inode, iomap, &map, offset, length, flags);
 
 	return 0;
-- 
GitLab


From 8a2c77bc2a9a44d1a80dee1f320ea8f7516b98ee Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 15:39:39 -0800
Subject: [PATCH 0312/1586] f2fs: support direct I/O with fscrypt using
 blk-crypto

Encrypted files traditionally haven't supported DIO, due to the need to
encrypt/decrypt the data.  However, when the encryption is implemented
using inline encryption (blk-crypto) instead of the traditional
filesystem-layer encryption, it is straightforward to support DIO.

Therefore, make f2fs support DIO on files that are using inline
encryption.  Since f2fs uses iomap for DIO, and fscrypt support was
already added to iomap DIO, this just requires two small changes:

- Let DIO proceed when supported, by checking fscrypt_dio_supported()
  instead of assuming that encrypted files never support DIO.

- In f2fs_iomap_begin(), use fscrypt_limit_io_blocks() to limit the
  length of the mapping in the rare case where a DUN discontiguity
  occurs in the middle of an extent.  The iomap DIO implementation
  requires this, since it assumes that it can submit a bio covering (up
  to) the whole mapping, without checking fscrypt constraints itself.

Co-developed-by: Satya Tangirala <satyat@google.com>
Signed-off-by: Satya Tangirala <satyat@google.com>
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Link: https://lore.kernel.org/r/20220128233940.79464-5-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/f2fs/data.c | 7 +++++++
 fs/f2fs/f2fs.h | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c417864c66ae..020d47f97969c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4044,6 +4044,13 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 
 	iomap->offset = blks_to_bytes(inode, map.m_lblk);
 
+	/*
+	 * When inline encryption is enabled, sometimes I/O to an encrypted file
+	 * has to be broken up to guarantee DUN contiguity.  Handle this by
+	 * limiting the length of the mapping returned.
+	 */
+	map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
+
 	if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
 		iomap->length = blks_to_bytes(inode, map.m_len);
 		if (map.m_flags & F2FS_MAP_MAPPED) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 68b44015514f5..8130b092e5432 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4371,7 +4371,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	int rw = iov_iter_rw(iter);
 
-	if (f2fs_post_read_required(inode))
+	if (!fscrypt_dio_supported(iocb, iter))
+		return true;
+	if (fsverity_active(inode))
+		return true;
+	if (f2fs_compressed_file(inode))
 		return true;
 
 	/* disallow direct IO if any of devices has unaligned blksize */
-- 
GitLab


From cdaa1b1941f667814300799ddb74f3079517cd5a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 15:39:40 -0800
Subject: [PATCH 0313/1586] fscrypt: update documentation for direct I/O
 support

Now that direct I/O is supported on encrypted files in some cases,
document what these cases are.

Link: https://lore.kernel.org/r/20220128233940.79464-6-ebiggers@kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 4d5d50dca65c6..6ccd5efb25b77 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -1047,8 +1047,8 @@ astute users may notice some differences in behavior:
   may be used to overwrite the source files but isn't guaranteed to be
   effective on all filesystems and storage devices.
 
-- Direct I/O is not supported on encrypted files.  Attempts to use
-  direct I/O on such files will fall back to buffered I/O.
+- Direct I/O is supported on encrypted files only under some
+  circumstances.  For details, see `Direct I/O support`_.
 
 - The fallocate operations FALLOC_FL_COLLAPSE_RANGE and
   FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will
@@ -1179,6 +1179,27 @@ Inline encryption doesn't affect the ciphertext or other aspects of
 the on-disk format, so users may freely switch back and forth between
 using "inlinecrypt" and not using "inlinecrypt".
 
+Direct I/O support
+==================
+
+For direct I/O on an encrypted file to work, the following conditions
+must be met (in addition to the conditions for direct I/O on an
+unencrypted file):
+
+* The file must be using inline encryption.  Usually this means that
+  the filesystem must be mounted with ``-o inlinecrypt`` and inline
+  encryption hardware must be present.  However, a software fallback
+  is also available.  For details, see `Inline encryption support`_.
+
+* The I/O request must be fully aligned to the filesystem block size.
+  This means that the file position the I/O is targeting, the lengths
+  of all I/O segments, and the memory addresses of all I/O buffers
+  must be multiples of this value.  Note that the filesystem block
+  size may be greater than the logical block size of the block device.
+
+If either of the above conditions is not met, then direct I/O on the
+encrypted file will fall back to buffered I/O.
+
 Implementation details
 ======================
 
-- 
GitLab


From f233673cd32a048f2eed69e56b61174c33fb740b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 Jan 2022 09:39:15 -0800
Subject: [PATCH 0314/1586] torture: Make torture.sh help message match reality

This commit fixes a couple of typos: s/--doall/--do-all/ and
s/--doallmodconfig/--do-allmodconfig/.

[ paulmck: Add Fixes: supplied by Paul Menzel. ]

Fixes: a115a775a8d5 ("torture: Add "make allmodconfig" to torture.sh")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index e00e60efb2310..bfe09e2829c8d 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -71,8 +71,8 @@ usage () {
 	echo "       --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
-	echo "       --doall"
-	echo "       --doallmodconfig / --do-no-allmodconfig"
+	echo "       --do-all"
+	echo "       --do-allmodconfig / --do-no-allmodconfig"
 	echo "       --do-clocksourcewd / --do-no-clocksourcewd"
 	echo "       --do-kasan / --do-no-kasan"
 	echo "       --do-kcsan / --do-no-kcsan"
-- 
GitLab


From b5597cb36f8bb29b244b2f90030d54bf81bf6fbc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 31 Jan 2022 15:03:36 -0800
Subject: [PATCH 0315/1586] rcutorture: Test SRCU size transitions

Thie commit adds kernel boot parameters to the SRCU-N and SRCU-P
rcutorture scenarios to cause SRCU-N to test contention-based resizing
and SRCU-P to test init_srcu_struct()-time resizing.  Note that this
also tests never-resizing because the contention-based resizing normally
takes some minutes to make the shift.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
index 238bfe3bd0ccc..ce0694fd9b929 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcu
+rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index ce48c7b826734..2db39f298d182 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1,2 +1,4 @@
 rcutorture.torture_type=srcud
 rcupdate.rcu_self_test=1
+rcutorture.fwd_progress=3
+srcutree.big_cpu_lim=5
-- 
GitLab


From 8ea7a53daf3c9d26910ee9a115b2fb6b86cf3c01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 1 Feb 2022 08:23:46 -0800
Subject: [PATCH 0316/1586] rcutorture: Provide non-power-of-two Tasks RCU
 scenarios

This commit adjusts RUDE01 to 3 CPUs and TRACE01 to 5 CPUs in order to
test Tasks RCU's ability to handle non-power-of-two numbers of CPUs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/RUDE01  | 2 +-
 tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 3ca112444ce77..7093422050f66 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=3
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 34c8ff5a12f20..e4d74e5fc1d09 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=5
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
-- 
GitLab


From 7d8e4c98d13b6b54655140d6b6a6a17fbc9b32f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Jan 2022 18:51:57 +0100
Subject: [PATCH 0317/1586] staging: fbtft: Fix error path in
 fbtft_driver_module_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If registering the platform driver fails, the function must not return
without undoing the spi driver registration first.

Fixes: c296d5f9957c ("staging: fbtft: core support")
Link: https://lore.kernel.org/r/20220118181338.207943-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220123175201.34839-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/staging/fbtft/fbtft.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h
index 4cdec34e23d2e..55677efc01380 100644
--- a/drivers/staging/fbtft/fbtft.h
+++ b/drivers/staging/fbtft/fbtft.h
@@ -334,7 +334,10 @@ static int __init fbtft_driver_module_init(void)                           \
 	ret = spi_register_driver(&fbtft_driver_spi_driver);               \
 	if (ret < 0)                                                       \
 		return ret;                                                \
-	return platform_driver_register(&fbtft_driver_platform_driver);    \
+	ret = platform_driver_register(&fbtft_driver_platform_driver);     \
+	if (ret < 0)                                                       \
+		spi_unregister_driver(&fbtft_driver_spi_driver);           \
+	return ret;                                                        \
 }                                                                          \
 									   \
 static void __exit fbtft_driver_module_exit(void)                          \
-- 
GitLab


From c222ea5d2f3361458672d1f52c78970aa021cb61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Jan 2022 18:51:58 +0100
Subject: [PATCH 0318/1586] staging: fbtft: Deduplicate driver registration
 macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two macros FBTFT_REGISTER_DRIVER and FBTFT_REGISTER_SPI_DRIVER
contain quite some duplication: Both define an spi driver and an of device
table and the differences are quite subtle.

So create two new macros and use both twice.

Link: https://lore.kernel.org/r/20220118181338.207943-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220123175201.34839-3-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/staging/fbtft/fbtft.h | 93 ++++++++++++++---------------------
 1 file changed, 36 insertions(+), 57 deletions(-)

diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h
index 55677efc01380..6a7545b5bcd2d 100644
--- a/drivers/staging/fbtft/fbtft.h
+++ b/drivers/staging/fbtft/fbtft.h
@@ -272,21 +272,40 @@ void fbtft_write_reg8_bus9(struct fbtft_par *par, int len, ...);
 void fbtft_write_reg16_bus8(struct fbtft_par *par, int len, ...);
 void fbtft_write_reg16_bus16(struct fbtft_par *par, int len, ...);
 
+#define FBTFT_DT_TABLE(_compatible)						\
+static const struct of_device_id dt_ids[] = {					\
+	{ .compatible = _compatible },						\
+	{},									\
+};										\
+MODULE_DEVICE_TABLE(of, dt_ids);
+
+#define FBTFT_SPI_DRIVER(_name, _compatible, _display, _spi_ids)		\
+										\
+static int fbtft_driver_probe_spi(struct spi_device *spi)			\
+{										\
+	return fbtft_probe_common(_display, spi, NULL);				\
+}										\
+										\
+static int fbtft_driver_remove_spi(struct spi_device *spi)			\
+{										\
+	struct fb_info *info = spi_get_drvdata(spi);				\
+										\
+	fbtft_remove_common(&spi->dev, info);					\
+	return 0;								\
+}										\
+										\
+static struct spi_driver fbtft_driver_spi_driver = {				\
+	.driver = {								\
+		.name = _name,							\
+		.of_match_table = dt_ids,					\
+	},									\
+	.id_table = _spi_ids,							\
+	.probe = fbtft_driver_probe_spi,					\
+	.remove = fbtft_driver_remove_spi,					\
+};
+
 #define FBTFT_REGISTER_DRIVER(_name, _compatible, _display)                \
 									   \
-static int fbtft_driver_probe_spi(struct spi_device *spi)                  \
-{                                                                          \
-	return fbtft_probe_common(_display, spi, NULL);                    \
-}                                                                          \
-									   \
-static int fbtft_driver_remove_spi(struct spi_device *spi)                 \
-{                                                                          \
-	struct fb_info *info = spi_get_drvdata(spi);                       \
-									   \
-	fbtft_remove_common(&spi->dev, info);                              \
-	return 0;                                                          \
-}                                                                          \
-									   \
 static int fbtft_driver_probe_pdev(struct platform_device *pdev)           \
 {                                                                          \
 	return fbtft_probe_common(_display, NULL, pdev);                   \
@@ -300,22 +319,9 @@ static int fbtft_driver_remove_pdev(struct platform_device *pdev)          \
 	return 0;                                                          \
 }                                                                          \
 									   \
-static const struct of_device_id dt_ids[] = {                              \
-	{ .compatible = _compatible },                                     \
-	{},                                                                \
-};                                                                         \
-									   \
-MODULE_DEVICE_TABLE(of, dt_ids);                                           \
+FBTFT_DT_TABLE(_compatible)						   \
 									   \
-									   \
-static struct spi_driver fbtft_driver_spi_driver = {                       \
-	.driver = {                                                        \
-		.name   = _name,                                           \
-		.of_match_table = dt_ids,                                  \
-	},                                                                 \
-	.probe  = fbtft_driver_probe_spi,                                  \
-	.remove = fbtft_driver_remove_spi,                                 \
-};                                                                         \
+FBTFT_SPI_DRIVER(_name, _compatible, _display, NULL)			   \
 									   \
 static struct platform_driver fbtft_driver_platform_driver = {             \
 	.driver = {                                                        \
@@ -351,42 +357,15 @@ module_exit(fbtft_driver_module_exit);
 
 #define FBTFT_REGISTER_SPI_DRIVER(_name, _comp_vend, _comp_dev, _display)	\
 										\
-static int fbtft_driver_probe_spi(struct spi_device *spi)			\
-{										\
-	return fbtft_probe_common(_display, spi, NULL);				\
-}										\
-										\
-static int fbtft_driver_remove_spi(struct spi_device *spi)			\
-{										\
-	struct fb_info *info = spi_get_drvdata(spi);				\
-										\
-	fbtft_remove_common(&spi->dev, info);					\
-	return 0;								\
-}										\
-										\
-static const struct of_device_id dt_ids[] = {					\
-	{ .compatible = _comp_vend "," _comp_dev },				\
-	{},									\
-};										\
-										\
-MODULE_DEVICE_TABLE(of, dt_ids);						\
+FBTFT_DT_TABLE(_comp_vend "," _comp_dev)					\
 										\
 static const struct spi_device_id spi_ids[] = {					\
 	{ .name = _comp_dev },							\
 	{},									\
 };										\
-										\
 MODULE_DEVICE_TABLE(spi, spi_ids);						\
 										\
-static struct spi_driver fbtft_driver_spi_driver = {				\
-	.driver = {								\
-		.name  = _name,							\
-		.of_match_table = dt_ids,					\
-	},									\
-	.id_table = spi_ids,							\
-	.probe  = fbtft_driver_probe_spi,					\
-	.remove = fbtft_driver_remove_spi,					\
-};										\
+FBTFT_SPI_DRIVER(_name, _comp_vend "," _comp_dev, _display, spi_ids)		\
 										\
 module_spi_driver(fbtft_driver_spi_driver);
 
-- 
GitLab


From 316f569df766df9a49c36c052ec6afaf19cb6933 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Jan 2022 18:51:59 +0100
Subject: [PATCH 0319/1586] tpm: st33zp24: Make st33zp24_remove() a void
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now st33zp24_remove() returns zero unconditionally. Make it return
no value instead which makes it easier to see in the callers that there is
no error to handle.

Also the return value of i2c and spi remove callbacks is ignored anyway.

Link: https://lore.kernel.org/r/20220104231103.227924-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220123175201.34839-4-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/char/tpm/st33zp24/i2c.c      | 5 +----
 drivers/char/tpm/st33zp24/spi.c      | 5 +----
 drivers/char/tpm/st33zp24/st33zp24.c | 3 +--
 drivers/char/tpm/st33zp24/st33zp24.h | 2 +-
 4 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index 7c617edff4ca2..3170d59d660c0 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -267,11 +267,8 @@ static int st33zp24_i2c_probe(struct i2c_client *client,
 static int st33zp24_i2c_remove(struct i2c_client *client)
 {
 	struct tpm_chip *chip = i2c_get_clientdata(client);
-	int ret;
 
-	ret = st33zp24_remove(chip);
-	if (ret)
-		return ret;
+	st33zp24_remove(chip);
 
 	return 0;
 }
diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c
index a75dafd394451..ccd9e42b8eab4 100644
--- a/drivers/char/tpm/st33zp24/spi.c
+++ b/drivers/char/tpm/st33zp24/spi.c
@@ -384,11 +384,8 @@ static int st33zp24_spi_probe(struct spi_device *dev)
 static int st33zp24_spi_remove(struct spi_device *dev)
 {
 	struct tpm_chip *chip = spi_get_drvdata(dev);
-	int ret;
 
-	ret = st33zp24_remove(chip);
-	if (ret)
-		return ret;
+	st33zp24_remove(chip);
 
 	return 0;
 }
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index ce9efb73c144b..15b393e92c8ec 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -511,10 +511,9 @@ _tpm_clean_answer:
 }
 EXPORT_SYMBOL(st33zp24_probe);
 
-int st33zp24_remove(struct tpm_chip *chip)
+void st33zp24_remove(struct tpm_chip *chip)
 {
 	tpm_chip_unregister(chip);
-	return 0;
 }
 EXPORT_SYMBOL(st33zp24_remove);
 
diff --git a/drivers/char/tpm/st33zp24/st33zp24.h b/drivers/char/tpm/st33zp24/st33zp24.h
index 6747be1e25021..b387a476c555f 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.h
+++ b/drivers/char/tpm/st33zp24/st33zp24.h
@@ -34,5 +34,5 @@ int st33zp24_pm_resume(struct device *dev);
 
 int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops,
 		   struct device *dev, int irq, int io_lpcpd);
-int st33zp24_remove(struct tpm_chip *chip);
+void st33zp24_remove(struct tpm_chip *chip);
 #endif /* __LOCAL_ST33ZP24_H__ */
-- 
GitLab


From afb0a80e63d67e957b5d0eb4ade301aff6e13c8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Jan 2022 18:52:00 +0100
Subject: [PATCH 0320/1586] platform/chrome: cros_ec: Make cros_ec_unregister()
 return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now cros_ec_unregister() returns zero unconditionally. Make it
return void instead which makes it easier to see in the callers that
there is no error to handle.

Also the return value of i2c, platform and spi remove callbacks is
ignored anyway.

Reviewed-by: Guenter Roeck <groeck@chromium.org>
Link: https://lore.kernel.org/r/20211020071753.wltjslmimb6wtlp5@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220123175201.34839-5-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/platform/chrome/cros_ec.c     | 4 +---
 drivers/platform/chrome/cros_ec.h     | 2 +-
 drivers/platform/chrome/cros_ec_i2c.c | 4 +++-
 drivers/platform/chrome/cros_ec_lpc.c | 4 +++-
 drivers/platform/chrome/cros_ec_spi.c | 4 +++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c
index fc5aa1525d13c..d49a4efe46c8a 100644
--- a/drivers/platform/chrome/cros_ec.c
+++ b/drivers/platform/chrome/cros_ec.c
@@ -302,13 +302,11 @@ EXPORT_SYMBOL(cros_ec_register);
  *
  * Return: 0 on success or negative error code.
  */
-int cros_ec_unregister(struct cros_ec_device *ec_dev)
+void cros_ec_unregister(struct cros_ec_device *ec_dev)
 {
 	if (ec_dev->pd)
 		platform_device_unregister(ec_dev->pd);
 	platform_device_unregister(ec_dev->ec);
-
-	return 0;
 }
 EXPORT_SYMBOL(cros_ec_unregister);
 
diff --git a/drivers/platform/chrome/cros_ec.h b/drivers/platform/chrome/cros_ec.h
index 78363dcfdf23a..bbca0096868ac 100644
--- a/drivers/platform/chrome/cros_ec.h
+++ b/drivers/platform/chrome/cros_ec.h
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 
 int cros_ec_register(struct cros_ec_device *ec_dev);
-int cros_ec_unregister(struct cros_ec_device *ec_dev);
+void cros_ec_unregister(struct cros_ec_device *ec_dev);
 
 int cros_ec_suspend(struct cros_ec_device *ec_dev);
 int cros_ec_resume(struct cros_ec_device *ec_dev);
diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c
index 30c8938c27d54..22feb0fd4ce71 100644
--- a/drivers/platform/chrome/cros_ec_i2c.c
+++ b/drivers/platform/chrome/cros_ec_i2c.c
@@ -313,7 +313,9 @@ static int cros_ec_i2c_remove(struct i2c_client *client)
 {
 	struct cros_ec_device *ec_dev = i2c_get_clientdata(client);
 
-	return cros_ec_unregister(ec_dev);
+	cros_ec_unregister(ec_dev);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index d6306d2a096f1..7651417b4a25b 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -439,7 +439,9 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
 		acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
 					   cros_ec_lpc_acpi_notify);
 
-	return cros_ec_unregister(ec_dev);
+	cros_ec_unregister(ec_dev);
+
+	return 0;
 }
 
 static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = {
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 14c4046fa04d5..713c58687721b 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -790,7 +790,9 @@ static int cros_ec_spi_remove(struct spi_device *spi)
 {
 	struct cros_ec_device *ec_dev = spi_get_drvdata(spi);
 
-	return cros_ec_unregister(ec_dev);
+	cros_ec_unregister(ec_dev);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From a0386bba70934d42f586eaf68b21d5eeaffa7bd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Jan 2022 18:52:01 +0100
Subject: [PATCH 0321/1586] spi: make remove callback a void function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The value returned by an spi driver's remove function is mostly ignored.
(Only an error message is printed if the value is non-zero that the
error is ignored.)

So change the prototype of the remove function to return no value. This
way driver authors are not tempted to assume that passing an error to
the upper layer is a good idea. All drivers are adapted accordingly.
There is no intended change of behaviour, all callbacks were prepared to
return 0 before.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Claudius Heine <ch@denx.de>
Acked-by: Stefan Schmidt <stefan@datenfreihafen.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
Acked-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Acked-by: Łukasz Stelmach <l.stelmach@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220123175201.34839-6-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/bus/moxtet.c                                  |  4 +---
 drivers/char/tpm/st33zp24/spi.c                       |  4 +---
 drivers/char/tpm/tpm_tis_spi_main.c                   |  3 +--
 drivers/clk/clk-lmk04832.c                            |  4 +---
 drivers/gpio/gpio-74x164.c                            |  4 +---
 drivers/gpio/gpio-max3191x.c                          |  4 +---
 drivers/gpio/gpio-max7301.c                           |  4 +---
 drivers/gpio/gpio-mc33880.c                           |  4 +---
 drivers/gpio/gpio-pisosr.c                            |  4 +---
 drivers/gpu/drm/panel/panel-abt-y030xx067a.c          |  4 +---
 drivers/gpu/drm/panel/panel-ilitek-ili9322.c          |  4 +---
 drivers/gpu/drm/panel/panel-ilitek-ili9341.c          |  3 +--
 drivers/gpu/drm/panel/panel-innolux-ej030na.c         |  4 +---
 drivers/gpu/drm/panel/panel-lg-lb035q02.c             |  4 +---
 drivers/gpu/drm/panel/panel-lg-lg4573.c               |  4 +---
 drivers/gpu/drm/panel/panel-nec-nl8048hl11.c          |  4 +---
 drivers/gpu/drm/panel/panel-novatek-nt39016.c         |  4 +---
 drivers/gpu/drm/panel/panel-samsung-db7430.c          |  3 +--
 drivers/gpu/drm/panel/panel-samsung-ld9040.c          |  4 +---
 drivers/gpu/drm/panel/panel-samsung-s6d27a1.c         |  3 +--
 drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c     |  3 +--
 drivers/gpu/drm/panel/panel-sitronix-st7789v.c        |  4 +---
 drivers/gpu/drm/panel/panel-sony-acx565akm.c          |  4 +---
 drivers/gpu/drm/panel/panel-tpo-td028ttec1.c          |  4 +---
 drivers/gpu/drm/panel/panel-tpo-td043mtea1.c          |  4 +---
 drivers/gpu/drm/panel/panel-tpo-tpg110.c              |  3 +--
 drivers/gpu/drm/panel/panel-widechips-ws2401.c        |  3 +--
 drivers/gpu/drm/tiny/hx8357d.c                        |  4 +---
 drivers/gpu/drm/tiny/ili9163.c                        |  4 +---
 drivers/gpu/drm/tiny/ili9225.c                        |  4 +---
 drivers/gpu/drm/tiny/ili9341.c                        |  4 +---
 drivers/gpu/drm/tiny/ili9486.c                        |  4 +---
 drivers/gpu/drm/tiny/mi0283qt.c                       |  4 +---
 drivers/gpu/drm/tiny/repaper.c                        |  4 +---
 drivers/gpu/drm/tiny/st7586.c                         |  4 +---
 drivers/gpu/drm/tiny/st7735r.c                        |  4 +---
 drivers/hwmon/adcxx.c                                 |  4 +---
 drivers/hwmon/adt7310.c                               |  3 +--
 drivers/hwmon/max1111.c                               |  3 +--
 drivers/hwmon/max31722.c                              |  4 +---
 drivers/iio/accel/bma400_spi.c                        |  4 +---
 drivers/iio/accel/bmc150-accel-spi.c                  |  4 +---
 drivers/iio/accel/bmi088-accel-spi.c                  |  4 +---
 drivers/iio/accel/kxsd9-spi.c                         |  4 +---
 drivers/iio/accel/mma7455_spi.c                       |  4 +---
 drivers/iio/accel/sca3000.c                           |  4 +---
 drivers/iio/adc/ad7266.c                              |  4 +---
 drivers/iio/adc/ltc2496.c                             |  4 +---
 drivers/iio/adc/mcp320x.c                             |  4 +---
 drivers/iio/adc/mcp3911.c                             |  4 +---
 drivers/iio/adc/ti-adc12138.c                         |  4 +---
 drivers/iio/adc/ti-ads7950.c                          |  4 +---
 drivers/iio/adc/ti-ads8688.c                          |  4 +---
 drivers/iio/adc/ti-tlc4541.c                          |  4 +---
 drivers/iio/amplifiers/ad8366.c                       |  4 +---
 drivers/iio/common/ssp_sensors/ssp_dev.c              |  4 +---
 drivers/iio/dac/ad5360.c                              |  4 +---
 drivers/iio/dac/ad5380.c                              |  4 +---
 drivers/iio/dac/ad5446.c                              |  4 +---
 drivers/iio/dac/ad5449.c                              |  4 +---
 drivers/iio/dac/ad5504.c                              |  4 +---
 drivers/iio/dac/ad5592r.c                             |  4 +---
 drivers/iio/dac/ad5624r_spi.c                         |  4 +---
 drivers/iio/dac/ad5686-spi.c                          |  4 +---
 drivers/iio/dac/ad5761.c                              |  4 +---
 drivers/iio/dac/ad5764.c                              |  4 +---
 drivers/iio/dac/ad5791.c                              |  4 +---
 drivers/iio/dac/ad8801.c                              |  4 +---
 drivers/iio/dac/ltc1660.c                             |  4 +---
 drivers/iio/dac/ltc2632.c                             |  4 +---
 drivers/iio/dac/mcp4922.c                             |  4 +---
 drivers/iio/dac/ti-dac082s085.c                       |  4 +---
 drivers/iio/dac/ti-dac7311.c                          |  3 +--
 drivers/iio/frequency/adf4350.c                       |  4 +---
 drivers/iio/gyro/bmg160_spi.c                         |  4 +---
 drivers/iio/gyro/fxas21002c_spi.c                     |  4 +---
 drivers/iio/health/afe4403.c                          |  4 +---
 drivers/iio/magnetometer/bmc150_magn_spi.c            |  4 +---
 drivers/iio/magnetometer/hmc5843_spi.c                |  4 +---
 drivers/iio/potentiometer/max5487.c                   |  4 +---
 drivers/iio/pressure/ms5611_spi.c                     |  4 +---
 drivers/iio/pressure/zpa2326_spi.c                    |  4 +---
 drivers/input/keyboard/applespi.c                     |  4 +---
 drivers/input/misc/adxl34x-spi.c                      |  4 +---
 drivers/input/touchscreen/ads7846.c                   |  4 +---
 drivers/input/touchscreen/cyttsp4_spi.c               |  4 +---
 drivers/input/touchscreen/tsc2005.c                   |  4 +---
 drivers/leds/leds-cr0014114.c                         |  4 +---
 drivers/leds/leds-dac124s085.c                        |  4 +---
 drivers/leds/leds-el15203000.c                        |  4 +---
 drivers/leds/leds-spi-byte.c                          |  4 +---
 drivers/media/spi/cxd2880-spi.c                       |  4 +---
 drivers/media/spi/gs1662.c                            |  4 +---
 drivers/media/tuners/msi001.c                         |  3 +--
 drivers/mfd/arizona-spi.c                             |  4 +---
 drivers/mfd/da9052-spi.c                              |  3 +--
 drivers/mfd/ezx-pcap.c                                |  4 +---
 drivers/mfd/madera-spi.c                              |  4 +---
 drivers/mfd/mc13xxx-spi.c                             |  3 +--
 drivers/mfd/rsmu_spi.c                                |  4 +---
 drivers/mfd/stmpe-spi.c                               |  4 +---
 drivers/mfd/tps65912-spi.c                            |  4 +---
 drivers/misc/ad525x_dpot-spi.c                        |  3 +--
 drivers/misc/eeprom/eeprom_93xx46.c                   |  4 +---
 drivers/misc/lattice-ecp3-config.c                    |  4 +---
 drivers/misc/lis3lv02d/lis3lv02d_spi.c                |  4 +---
 drivers/mmc/host/mmc_spi.c                            |  3 +--
 drivers/mtd/devices/mchp23k256.c                      |  4 +---
 drivers/mtd/devices/mchp48l640.c                      |  4 +---
 drivers/mtd/devices/mtd_dataflash.c                   |  4 +---
 drivers/mtd/devices/sst25l.c                          |  4 +---
 drivers/net/can/m_can/tcan4x5x-core.c                 |  4 +---
 drivers/net/can/spi/hi311x.c                          |  4 +---
 drivers/net/can/spi/mcp251x.c                         |  4 +---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c        |  4 +---
 drivers/net/dsa/b53/b53_spi.c                         |  4 +---
 drivers/net/dsa/microchip/ksz8795_spi.c               |  4 +---
 drivers/net/dsa/microchip/ksz9477_spi.c               |  4 +---
 drivers/net/dsa/sja1105/sja1105_main.c                |  6 ++----
 drivers/net/dsa/vitesse-vsc73xx-spi.c                 |  6 ++----
 drivers/net/ethernet/asix/ax88796c_main.c             |  4 +---
 drivers/net/ethernet/micrel/ks8851_spi.c              |  4 +---
 drivers/net/ethernet/microchip/enc28j60.c             |  4 +---
 drivers/net/ethernet/microchip/encx24j600.c           |  4 +---
 drivers/net/ethernet/qualcomm/qca_spi.c               |  4 +---
 drivers/net/ethernet/vertexcom/mse102x.c              |  4 +---
 drivers/net/ethernet/wiznet/w5100-spi.c               |  4 +---
 drivers/net/ieee802154/adf7242.c                      |  4 +---
 drivers/net/ieee802154/at86rf230.c                    |  4 +---
 drivers/net/ieee802154/ca8210.c                       |  6 ++----
 drivers/net/ieee802154/cc2520.c                       |  4 +---
 drivers/net/ieee802154/mcr20a.c                       |  4 +---
 drivers/net/ieee802154/mrf24j40.c                     |  4 +---
 drivers/net/phy/spi_ks8995.c                          |  4 +---
 drivers/net/wan/slic_ds26522.c                        |  3 +--
 drivers/net/wireless/intersil/p54/p54spi.c            |  4 +---
 drivers/net/wireless/marvell/libertas/if_spi.c        |  4 +---
 drivers/net/wireless/microchip/wilc1000/spi.c         |  4 +---
 drivers/net/wireless/st/cw1200/cw1200_spi.c           |  4 +---
 drivers/net/wireless/ti/wl1251/spi.c                  |  4 +---
 drivers/net/wireless/ti/wlcore/spi.c                  |  4 +---
 drivers/nfc/nfcmrvl/spi.c                             |  3 +--
 drivers/nfc/st-nci/spi.c                              |  4 +---
 drivers/nfc/st95hf/core.c                             |  4 +---
 drivers/nfc/trf7970a.c                                |  4 +---
 drivers/platform/chrome/cros_ec_spi.c                 |  4 +---
 drivers/platform/olpc/olpc-xo175-ec.c                 |  4 +---
 drivers/rtc/rtc-ds1302.c                              |  3 +--
 drivers/rtc/rtc-ds1305.c                              |  4 +---
 drivers/rtc/rtc-ds1343.c                              |  4 +---
 drivers/spi/spi-mem.c                                 |  6 ++----
 drivers/spi/spi-slave-system-control.c                |  3 +--
 drivers/spi/spi-slave-time.c                          |  3 +--
 drivers/spi/spi-tle62x0.c                             |  3 +--
 drivers/spi/spi.c                                     | 11 ++---------
 drivers/spi/spidev.c                                  |  4 +---
 drivers/staging/fbtft/fbtft.h                         |  3 +--
 drivers/staging/pi433/pi433_if.c                      |  4 +---
 drivers/staging/wfx/bus_spi.c                         |  3 +--
 drivers/tty/serial/max3100.c                          |  5 ++---
 drivers/tty/serial/max310x.c                          |  3 +--
 drivers/tty/serial/sc16is7xx.c                        |  4 +---
 drivers/usb/gadget/udc/max3420_udc.c                  |  4 +---
 drivers/usb/host/max3421-hcd.c                        |  3 +--
 drivers/video/backlight/ams369fg06.c                  |  3 +--
 drivers/video/backlight/corgi_lcd.c                   |  3 +--
 drivers/video/backlight/ili922x.c                     |  3 +--
 drivers/video/backlight/l4f00242t03.c                 |  3 +--
 drivers/video/backlight/lms501kf03.c                  |  3 +--
 drivers/video/backlight/ltv350qv.c                    |  3 +--
 drivers/video/backlight/tdo24m.c                      |  3 +--
 drivers/video/backlight/tosa_lcd.c                    |  4 +---
 drivers/video/backlight/vgg2432a4.c                   |  4 +---
 drivers/video/fbdev/omap/lcd_mipid.c                  |  4 +---
 .../omap2/omapfb/displays/panel-lgphilips-lb035q02.c  |  4 +---
 .../omap2/omapfb/displays/panel-nec-nl8048hl11.c      |  4 +---
 .../omap2/omapfb/displays/panel-sony-acx565akm.c      |  4 +---
 .../omap2/omapfb/displays/panel-tpo-td028ttec1.c      |  4 +---
 .../omap2/omapfb/displays/panel-tpo-td043mtea1.c      |  4 +---
 include/linux/spi/spi.h                               |  2 +-
 sound/pci/hda/cs35l41_hda_spi.c                       |  4 +---
 sound/soc/codecs/adau1761-spi.c                       |  3 +--
 sound/soc/codecs/adau1781-spi.c                       |  3 +--
 sound/soc/codecs/cs35l41-spi.c                        |  4 +---
 sound/soc/codecs/pcm3168a-spi.c                       |  4 +---
 sound/soc/codecs/pcm512x-spi.c                        |  3 +--
 sound/soc/codecs/tlv320aic32x4-spi.c                  |  4 +---
 sound/soc/codecs/tlv320aic3x-spi.c                    |  4 +---
 sound/soc/codecs/wm0010.c                             |  4 +---
 sound/soc/codecs/wm8804-spi.c                         |  3 +--
 sound/spi/at73c213.c                                  |  4 +---
 191 files changed, 197 insertions(+), 545 deletions(-)

diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index fd87a59837fa2..5eb0fe73ddc45 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -815,7 +815,7 @@ static int moxtet_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int moxtet_remove(struct spi_device *spi)
+static void moxtet_remove(struct spi_device *spi)
 {
 	struct moxtet *moxtet = spi_get_drvdata(spi);
 
@@ -828,8 +828,6 @@ static int moxtet_remove(struct spi_device *spi)
 	device_for_each_child(moxtet->dev, NULL, __unregister);
 
 	mutex_destroy(&moxtet->lock);
-
-	return 0;
 }
 
 static const struct of_device_id moxtet_dt_ids[] = {
diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c
index ccd9e42b8eab4..22d184884694a 100644
--- a/drivers/char/tpm/st33zp24/spi.c
+++ b/drivers/char/tpm/st33zp24/spi.c
@@ -381,13 +381,11 @@ static int st33zp24_spi_probe(struct spi_device *dev)
  * @param: client, the spi_device description (TPM SPI description).
  * @return: 0 in case of success.
  */
-static int st33zp24_spi_remove(struct spi_device *dev)
+static void st33zp24_spi_remove(struct spi_device *dev)
 {
 	struct tpm_chip *chip = spi_get_drvdata(dev);
 
 	st33zp24_remove(chip);
-
-	return 0;
 }
 
 static const struct spi_device_id st33zp24_spi_id[] = {
diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index aaa59a00eeaef..184396b3af501 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -254,13 +254,12 @@ static int tpm_tis_spi_driver_probe(struct spi_device *spi)
 
 static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume);
 
-static int tpm_tis_spi_remove(struct spi_device *dev)
+static void tpm_tis_spi_remove(struct spi_device *dev)
 {
 	struct tpm_chip *chip = spi_get_drvdata(dev);
 
 	tpm_chip_unregister(chip);
 	tpm_tis_remove(chip);
-	return 0;
 }
 
 static const struct spi_device_id tpm_tis_spi_id[] = {
diff --git a/drivers/clk/clk-lmk04832.c b/drivers/clk/clk-lmk04832.c
index 8f02c0b880005..f416f8bc28987 100644
--- a/drivers/clk/clk-lmk04832.c
+++ b/drivers/clk/clk-lmk04832.c
@@ -1544,14 +1544,12 @@ err_disable_oscin:
 	return ret;
 }
 
-static int lmk04832_remove(struct spi_device *spi)
+static void lmk04832_remove(struct spi_device *spi)
 {
 	struct lmk04832 *lmk = spi_get_drvdata(spi);
 
 	clk_disable_unprepare(lmk->oscin);
 	of_clk_del_provider(spi->dev.of_node);
-
-	return 0;
 }
 static const struct spi_device_id lmk04832_id[] = {
 	{ "lmk04832", LMK04832 },
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c
index 4a55cdf089d62..e00c333105170 100644
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -163,15 +163,13 @@ exit_destroy:
 	return ret;
 }
 
-static int gen_74x164_remove(struct spi_device *spi)
+static void gen_74x164_remove(struct spi_device *spi)
 {
 	struct gen_74x164_chip *chip = spi_get_drvdata(spi);
 
 	gpiod_set_value_cansleep(chip->gpiod_oe, 0);
 	gpiochip_remove(&chip->gpio_chip);
 	mutex_destroy(&chip->lock);
-
-	return 0;
 }
 
 static const struct spi_device_id gen_74x164_spi_ids[] = {
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 51cd6f98d1c74..161c4751c5f7a 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -443,14 +443,12 @@ static int max3191x_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int max3191x_remove(struct spi_device *spi)
+static void max3191x_remove(struct spi_device *spi)
 {
 	struct max3191x_chip *max3191x = spi_get_drvdata(spi);
 
 	gpiochip_remove(&max3191x->gpio);
 	mutex_destroy(&max3191x->lock);
-
-	return 0;
 }
 
 static int __init max3191x_register_driver(struct spi_driver *sdrv)
diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c
index 5862d73bf3254..11813f41d4609 100644
--- a/drivers/gpio/gpio-max7301.c
+++ b/drivers/gpio/gpio-max7301.c
@@ -64,11 +64,9 @@ static int max7301_probe(struct spi_device *spi)
 	return ret;
 }
 
-static int max7301_remove(struct spi_device *spi)
+static void max7301_remove(struct spi_device *spi)
 {
 	__max730x_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id max7301_id[] = {
diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c
index 31d2be1bebc84..cd9b16dbe1a97 100644
--- a/drivers/gpio/gpio-mc33880.c
+++ b/drivers/gpio/gpio-mc33880.c
@@ -134,7 +134,7 @@ exit_destroy:
 	return ret;
 }
 
-static int mc33880_remove(struct spi_device *spi)
+static void mc33880_remove(struct spi_device *spi)
 {
 	struct mc33880 *mc;
 
@@ -142,8 +142,6 @@ static int mc33880_remove(struct spi_device *spi)
 
 	gpiochip_remove(&mc->chip);
 	mutex_destroy(&mc->lock);
-
-	return 0;
 }
 
 static struct spi_driver mc33880_driver = {
diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 8e04054cf07e3..81a47ae09ff8c 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -163,15 +163,13 @@ static int pisosr_gpio_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int pisosr_gpio_remove(struct spi_device *spi)
+static void pisosr_gpio_remove(struct spi_device *spi)
 {
 	struct pisosr_gpio *gpio = spi_get_drvdata(spi);
 
 	gpiochip_remove(&gpio->chip);
 
 	mutex_destroy(&gpio->lock);
-
-	return 0;
 }
 
 static const struct spi_device_id pisosr_gpio_id_table[] = {
diff --git a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
index f043b484055b2..ed626fdc08e88 100644
--- a/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
+++ b/drivers/gpu/drm/panel/panel-abt-y030xx067a.c
@@ -293,15 +293,13 @@ static int y030xx067a_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int y030xx067a_remove(struct spi_device *spi)
+static void y030xx067a_remove(struct spi_device *spi)
 {
 	struct y030xx067a *priv = spi_get_drvdata(spi);
 
 	drm_panel_remove(&priv->panel);
 	drm_panel_disable(&priv->panel);
 	drm_panel_unprepare(&priv->panel);
-
-	return 0;
 }
 
 static const struct drm_display_mode y030xx067a_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
index 8e84df9a0033d..3dfafa585127f 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -896,14 +896,12 @@ static int ili9322_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili9322_remove(struct spi_device *spi)
+static void ili9322_remove(struct spi_device *spi)
 {
 	struct ili9322 *ili = spi_get_drvdata(spi);
 
 	ili9322_power_off(ili);
 	drm_panel_remove(&ili->panel);
-
-	return 0;
 }
 
 /*
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
index 2c3378a259b1e..a07ef26234e57 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
@@ -728,7 +728,7 @@ static int ili9341_probe(struct spi_device *spi)
 	return -1;
 }
 
-static int ili9341_remove(struct spi_device *spi)
+static void ili9341_remove(struct spi_device *spi)
 {
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct ili9341 *ili = spi_get_drvdata(spi);
@@ -741,7 +741,6 @@ static int ili9341_remove(struct spi_device *spi)
 		drm_dev_unplug(drm);
 		drm_atomic_helper_shutdown(drm);
 	}
-	return 0;
 }
 
 static void ili9341_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
index c558de3f99be7..e3b1daa0cb720 100644
--- a/drivers/gpu/drm/panel/panel-innolux-ej030na.c
+++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c
@@ -219,15 +219,13 @@ static int ej030na_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ej030na_remove(struct spi_device *spi)
+static void ej030na_remove(struct spi_device *spi)
 {
 	struct ej030na *priv = spi_get_drvdata(spi);
 
 	drm_panel_remove(&priv->panel);
 	drm_panel_disable(&priv->panel);
 	drm_panel_unprepare(&priv->panel);
-
-	return 0;
 }
 
 static const struct drm_display_mode ej030na_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
index f3183b68704f6..9d0d4faa3f58a 100644
--- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c
+++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
@@ -203,14 +203,12 @@ static int lb035q02_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int lb035q02_remove(struct spi_device *spi)
+static void lb035q02_remove(struct spi_device *spi)
 {
 	struct lb035q02_device *lcd = spi_get_drvdata(spi);
 
 	drm_panel_remove(&lcd->panel);
 	drm_panel_disable(&lcd->panel);
-
-	return 0;
 }
 
 static const struct of_device_id lb035q02_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-lg-lg4573.c b/drivers/gpu/drm/panel/panel-lg-lg4573.c
index 8e5160af1de5d..cf246d15b7b62 100644
--- a/drivers/gpu/drm/panel/panel-lg-lg4573.c
+++ b/drivers/gpu/drm/panel/panel-lg-lg4573.c
@@ -266,14 +266,12 @@ static int lg4573_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int lg4573_remove(struct spi_device *spi)
+static void lg4573_remove(struct spi_device *spi)
 {
 	struct lg4573 *ctx = spi_get_drvdata(spi);
 
 	lg4573_display_off(ctx);
 	drm_panel_remove(&ctx->panel);
-
-	return 0;
 }
 
 static const struct of_device_id lg4573_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
index 6e5ab1debc8b4..81c5c541a351c 100644
--- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
@@ -212,15 +212,13 @@ static int nl8048_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int nl8048_remove(struct spi_device *spi)
+static void nl8048_remove(struct spi_device *spi)
 {
 	struct nl8048_panel *lcd = spi_get_drvdata(spi);
 
 	drm_panel_remove(&lcd->panel);
 	drm_panel_disable(&lcd->panel);
 	drm_panel_unprepare(&lcd->panel);
-
-	return 0;
 }
 
 static const struct of_device_id nl8048_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt39016.c b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
index d036853db865f..f58cfb10b58a2 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt39016.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt39016.c
@@ -292,7 +292,7 @@ static int nt39016_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int nt39016_remove(struct spi_device *spi)
+static void nt39016_remove(struct spi_device *spi)
 {
 	struct nt39016 *panel = spi_get_drvdata(spi);
 
@@ -300,8 +300,6 @@ static int nt39016_remove(struct spi_device *spi)
 
 	nt39016_disable(&panel->drm_panel);
 	nt39016_unprepare(&panel->drm_panel);
-
-	return 0;
 }
 
 static const struct drm_display_mode kd035g6_display_modes[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-db7430.c b/drivers/gpu/drm/panel/panel-samsung-db7430.c
index ead479719f008..04640c5256a84 100644
--- a/drivers/gpu/drm/panel/panel-samsung-db7430.c
+++ b/drivers/gpu/drm/panel/panel-samsung-db7430.c
@@ -314,12 +314,11 @@ static int db7430_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int db7430_remove(struct spi_device *spi)
+static void db7430_remove(struct spi_device *spi)
 {
 	struct db7430 *db = spi_get_drvdata(spi);
 
 	drm_panel_remove(&db->panel);
-	return 0;
 }
 
 /*
diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
index c4b388850a13e..01eb211f32f75 100644
--- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c
+++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
@@ -358,14 +358,12 @@ static int ld9040_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ld9040_remove(struct spi_device *spi)
+static void ld9040_remove(struct spi_device *spi)
 {
 	struct ld9040 *ctx = spi_get_drvdata(spi);
 
 	ld9040_power_off(ctx);
 	drm_panel_remove(&ctx->panel);
-
-	return 0;
 }
 
 static const struct of_device_id ld9040_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
index 1696ceb36aa02..2adb223a895c9 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6d27a1.c
@@ -291,12 +291,11 @@ static int s6d27a1_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int s6d27a1_remove(struct spi_device *spi)
+static void s6d27a1_remove(struct spi_device *spi)
 {
 	struct s6d27a1 *ctx = spi_get_drvdata(spi);
 
 	drm_panel_remove(&ctx->panel);
-	return 0;
 }
 
 static const struct of_device_id s6d27a1_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
index c178d962b0d51..d99afcc672ca1 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
@@ -62,10 +62,9 @@ static int s6e63m0_spi_probe(struct spi_device *spi)
 			     s6e63m0_spi_dcs_write, false);
 }
 
-static int s6e63m0_spi_remove(struct spi_device *spi)
+static void s6e63m0_spi_remove(struct spi_device *spi)
 {
 	s6e63m0_remove(&spi->dev);
-	return 0;
 }
 
 static const struct of_device_id s6e63m0_spi_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
index 61e565524542c..bbc4569cbcdc2 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
@@ -387,13 +387,11 @@ static int st7789v_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int st7789v_remove(struct spi_device *spi)
+static void st7789v_remove(struct spi_device *spi)
 {
 	struct st7789v *ctx = spi_get_drvdata(spi);
 
 	drm_panel_remove(&ctx->panel);
-
-	return 0;
 }
 
 static const struct of_device_id st7789v_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
index ba0b3ead150f9..0d7541a33f87f 100644
--- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
@@ -655,7 +655,7 @@ static int acx565akm_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int acx565akm_remove(struct spi_device *spi)
+static void acx565akm_remove(struct spi_device *spi)
 {
 	struct acx565akm_panel *lcd = spi_get_drvdata(spi);
 
@@ -666,8 +666,6 @@ static int acx565akm_remove(struct spi_device *spi)
 
 	drm_panel_disable(&lcd->panel);
 	drm_panel_unprepare(&lcd->panel);
-
-	return 0;
 }
 
 static const struct of_device_id acx565akm_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
index ba0c00d1a001c..4dbf8b88f264f 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
@@ -350,15 +350,13 @@ static int td028ttec1_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int td028ttec1_remove(struct spi_device *spi)
+static void td028ttec1_remove(struct spi_device *spi)
 {
 	struct td028ttec1_panel *lcd = spi_get_drvdata(spi);
 
 	drm_panel_remove(&lcd->panel);
 	drm_panel_disable(&lcd->panel);
 	drm_panel_unprepare(&lcd->panel);
-
-	return 0;
 }
 
 static const struct of_device_id td028ttec1_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
index 1866cdb8f9c1d..cf4609bb9b1da 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
@@ -463,7 +463,7 @@ static int td043mtea1_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int td043mtea1_remove(struct spi_device *spi)
+static void td043mtea1_remove(struct spi_device *spi)
 {
 	struct td043mtea1_panel *lcd = spi_get_drvdata(spi);
 
@@ -472,8 +472,6 @@ static int td043mtea1_remove(struct spi_device *spi)
 	drm_panel_unprepare(&lcd->panel);
 
 	sysfs_remove_group(&spi->dev.kobj, &td043mtea1_attr_group);
-
-	return 0;
 }
 
 static const struct of_device_id td043mtea1_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
index e3791dad6830c..0b1f5a11a0554 100644
--- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c
+++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
@@ -450,12 +450,11 @@ static int tpg110_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int tpg110_remove(struct spi_device *spi)
+static void tpg110_remove(struct spi_device *spi)
 {
 	struct tpg110 *tpg = spi_get_drvdata(spi);
 
 	drm_panel_remove(&tpg->panel);
-	return 0;
 }
 
 static const struct of_device_id tpg110_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
index 8bc976f54b801..236f3cb2b594e 100644
--- a/drivers/gpu/drm/panel/panel-widechips-ws2401.c
+++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c
@@ -407,12 +407,11 @@ static int ws2401_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ws2401_remove(struct spi_device *spi)
+static void ws2401_remove(struct spi_device *spi)
 {
 	struct ws2401 *ws = spi_get_drvdata(spi);
 
 	drm_panel_remove(&ws->panel);
-	return 0;
 }
 
 /*
diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c
index 9b33c05732aa8..ebb025543f8de 100644
--- a/drivers/gpu/drm/tiny/hx8357d.c
+++ b/drivers/gpu/drm/tiny/hx8357d.c
@@ -263,14 +263,12 @@ static int hx8357d_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int hx8357d_remove(struct spi_device *spi)
+static void hx8357d_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void hx8357d_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c
index bcc1813512369..fc8ed245b0bc9 100644
--- a/drivers/gpu/drm/tiny/ili9163.c
+++ b/drivers/gpu/drm/tiny/ili9163.c
@@ -193,14 +193,12 @@ static int ili9163_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili9163_remove(struct spi_device *spi)
+static void ili9163_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void ili9163_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index 976d3209f1647..cc92eb9f2a07b 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -411,14 +411,12 @@ static int ili9225_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili9225_remove(struct spi_device *spi)
+static void ili9225_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void ili9225_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c
index 37e0c33399c8f..5b8cc770ee7b9 100644
--- a/drivers/gpu/drm/tiny/ili9341.c
+++ b/drivers/gpu/drm/tiny/ili9341.c
@@ -225,14 +225,12 @@ static int ili9341_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili9341_remove(struct spi_device *spi)
+static void ili9341_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void ili9341_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c
index e9a63f4b2993c..6d655e18e0aa2 100644
--- a/drivers/gpu/drm/tiny/ili9486.c
+++ b/drivers/gpu/drm/tiny/ili9486.c
@@ -243,14 +243,12 @@ static int ili9486_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili9486_remove(struct spi_device *spi)
+static void ili9486_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void ili9486_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c
index 023de49e7a8e6..5e060f6910bb4 100644
--- a/drivers/gpu/drm/tiny/mi0283qt.c
+++ b/drivers/gpu/drm/tiny/mi0283qt.c
@@ -233,14 +233,12 @@ static int mi0283qt_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int mi0283qt_remove(struct spi_device *spi)
+static void mi0283qt_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void mi0283qt_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index 97a775c48cea7..beeeb170d0b19 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -1140,14 +1140,12 @@ static int repaper_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int repaper_remove(struct spi_device *spi)
+static void repaper_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void repaper_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index 51b9b9fb3ead1..3f38faa1cd8c8 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -360,14 +360,12 @@ static int st7586_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int st7586_remove(struct spi_device *spi)
+static void st7586_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void st7586_shutdown(struct spi_device *spi)
diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c
index fc40dd10efa8f..29d618093e946 100644
--- a/drivers/gpu/drm/tiny/st7735r.c
+++ b/drivers/gpu/drm/tiny/st7735r.c
@@ -247,14 +247,12 @@ static int st7735r_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int st7735r_remove(struct spi_device *spi)
+static void st7735r_remove(struct spi_device *spi)
 {
 	struct drm_device *drm = spi_get_drvdata(spi);
 
 	drm_dev_unplug(drm);
 	drm_atomic_helper_shutdown(drm);
-
-	return 0;
 }
 
 static void st7735r_shutdown(struct spi_device *spi)
diff --git a/drivers/hwmon/adcxx.c b/drivers/hwmon/adcxx.c
index e5bc5ce09f4e7..de37bce24fa68 100644
--- a/drivers/hwmon/adcxx.c
+++ b/drivers/hwmon/adcxx.c
@@ -194,7 +194,7 @@ out_err:
 	return status;
 }
 
-static int adcxx_remove(struct spi_device *spi)
+static void adcxx_remove(struct spi_device *spi)
 {
 	struct adcxx *adc = spi_get_drvdata(spi);
 	int i;
@@ -205,8 +205,6 @@ static int adcxx_remove(struct spi_device *spi)
 		device_remove_file(&spi->dev, &ad_input[i].dev_attr);
 
 	mutex_unlock(&adc->lock);
-
-	return 0;
 }
 
 static const struct spi_device_id adcxx_ids[] = {
diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c
index c40cac16af683..832d9ec64934d 100644
--- a/drivers/hwmon/adt7310.c
+++ b/drivers/hwmon/adt7310.c
@@ -88,10 +88,9 @@ static int adt7310_spi_probe(struct spi_device *spi)
 			&adt7310_spi_ops);
 }
 
-static int adt7310_spi_remove(struct spi_device *spi)
+static void adt7310_spi_remove(struct spi_device *spi)
 {
 	adt7x10_remove(&spi->dev, spi->irq);
-	return 0;
 }
 
 static const struct spi_device_id adt7310_id[] = {
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 5fcfd57df61e1..4c5487aeb3cfc 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -254,7 +254,7 @@ err_remove:
 	return err;
 }
 
-static int max1111_remove(struct spi_device *spi)
+static void max1111_remove(struct spi_device *spi)
 {
 	struct max1111_data *data = spi_get_drvdata(spi);
 
@@ -265,7 +265,6 @@ static int max1111_remove(struct spi_device *spi)
 	sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
 	sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);
 	mutex_destroy(&data->drvdata_lock);
-	return 0;
 }
 
 static const struct spi_device_id max1111_ids[] = {
diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c
index 4cf4fe6809a32..93e048ee49557 100644
--- a/drivers/hwmon/max31722.c
+++ b/drivers/hwmon/max31722.c
@@ -100,7 +100,7 @@ static int max31722_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int max31722_remove(struct spi_device *spi)
+static void max31722_remove(struct spi_device *spi)
 {
 	struct max31722_data *data = spi_get_drvdata(spi);
 	int ret;
@@ -111,8 +111,6 @@ static int max31722_remove(struct spi_device *spi)
 	if (ret)
 		/* There is nothing we can do about this ... */
 		dev_warn(&spi->dev, "Failed to put device in stand-by mode\n");
-
-	return 0;
 }
 
 static int __maybe_unused max31722_suspend(struct device *dev)
diff --git a/drivers/iio/accel/bma400_spi.c b/drivers/iio/accel/bma400_spi.c
index 9f622e37477b4..9040a717b2470 100644
--- a/drivers/iio/accel/bma400_spi.c
+++ b/drivers/iio/accel/bma400_spi.c
@@ -87,11 +87,9 @@ static int bma400_spi_probe(struct spi_device *spi)
 	return bma400_probe(&spi->dev, regmap, id->name);
 }
 
-static int bma400_spi_remove(struct spi_device *spi)
+static void bma400_spi_remove(struct spi_device *spi)
 {
 	bma400_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id bma400_spi_ids[] = {
diff --git a/drivers/iio/accel/bmc150-accel-spi.c b/drivers/iio/accel/bmc150-accel-spi.c
index 11559567cb398..80007cc2d0446 100644
--- a/drivers/iio/accel/bmc150-accel-spi.c
+++ b/drivers/iio/accel/bmc150-accel-spi.c
@@ -35,11 +35,9 @@ static int bmc150_accel_probe(struct spi_device *spi)
 				       true);
 }
 
-static int bmc150_accel_remove(struct spi_device *spi)
+static void bmc150_accel_remove(struct spi_device *spi)
 {
 	bmc150_accel_core_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct acpi_device_id bmc150_accel_acpi_match[] = {
diff --git a/drivers/iio/accel/bmi088-accel-spi.c b/drivers/iio/accel/bmi088-accel-spi.c
index 758ad2f128962..06d99d9949f3f 100644
--- a/drivers/iio/accel/bmi088-accel-spi.c
+++ b/drivers/iio/accel/bmi088-accel-spi.c
@@ -56,11 +56,9 @@ static int bmi088_accel_probe(struct spi_device *spi)
 				       true);
 }
 
-static int bmi088_accel_remove(struct spi_device *spi)
+static void bmi088_accel_remove(struct spi_device *spi)
 {
 	bmi088_accel_core_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id bmi088_accel_id[] = {
diff --git a/drivers/iio/accel/kxsd9-spi.c b/drivers/iio/accel/kxsd9-spi.c
index 441e6b764281c..57c451cfb9e5d 100644
--- a/drivers/iio/accel/kxsd9-spi.c
+++ b/drivers/iio/accel/kxsd9-spi.c
@@ -32,11 +32,9 @@ static int kxsd9_spi_probe(struct spi_device *spi)
 				  spi_get_device_id(spi)->name);
 }
 
-static int kxsd9_spi_remove(struct spi_device *spi)
+static void kxsd9_spi_remove(struct spi_device *spi)
 {
 	kxsd9_common_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id kxsd9_spi_id[] = {
diff --git a/drivers/iio/accel/mma7455_spi.c b/drivers/iio/accel/mma7455_spi.c
index ecf690692dcc8..b746031551a39 100644
--- a/drivers/iio/accel/mma7455_spi.c
+++ b/drivers/iio/accel/mma7455_spi.c
@@ -22,11 +22,9 @@ static int mma7455_spi_probe(struct spi_device *spi)
 	return mma7455_core_probe(&spi->dev, regmap, id->name);
 }
 
-static int mma7455_spi_remove(struct spi_device *spi)
+static void mma7455_spi_remove(struct spi_device *spi)
 {
 	mma7455_core_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id mma7455_spi_ids[] = {
diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c
index 43ecacbdc95ae..83c81072511ee 100644
--- a/drivers/iio/accel/sca3000.c
+++ b/drivers/iio/accel/sca3000.c
@@ -1524,7 +1524,7 @@ error_ret:
 	return ret;
 }
 
-static int sca3000_remove(struct spi_device *spi)
+static void sca3000_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct sca3000_state *st = iio_priv(indio_dev);
@@ -1535,8 +1535,6 @@ static int sca3000_remove(struct spi_device *spi)
 	sca3000_stop_all_interrupts(st);
 	if (spi->irq)
 		free_irq(spi->irq, indio_dev);
-
-	return 0;
 }
 
 static const struct spi_device_id sca3000_id[] = {
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 1d345d66742d8..c17d9b5fbaf64 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -479,7 +479,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad7266_remove(struct spi_device *spi)
+static void ad7266_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad7266_state *st = iio_priv(indio_dev);
@@ -488,8 +488,6 @@ static int ad7266_remove(struct spi_device *spi)
 	iio_triggered_buffer_cleanup(indio_dev);
 	if (!IS_ERR(st->reg))
 		regulator_disable(st->reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad7266_id[] = {
diff --git a/drivers/iio/adc/ltc2496.c b/drivers/iio/adc/ltc2496.c
index dd956a7c216e1..5a55f79f25749 100644
--- a/drivers/iio/adc/ltc2496.c
+++ b/drivers/iio/adc/ltc2496.c
@@ -78,13 +78,11 @@ static int ltc2496_probe(struct spi_device *spi)
 	return ltc2497core_probe(dev, indio_dev);
 }
 
-static int ltc2496_remove(struct spi_device *spi)
+static void ltc2496_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 
 	ltc2497core_remove(indio_dev);
-
-	return 0;
 }
 
 static const struct of_device_id ltc2496_of_match[] = {
diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c
index 8d1cff28cae0a..b4c69acb33e34 100644
--- a/drivers/iio/adc/mcp320x.c
+++ b/drivers/iio/adc/mcp320x.c
@@ -459,15 +459,13 @@ reg_disable:
 	return ret;
 }
 
-static int mcp320x_remove(struct spi_device *spi)
+static void mcp320x_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct mcp320x *adc = iio_priv(indio_dev);
 
 	iio_device_unregister(indio_dev);
 	regulator_disable(adc->reg);
-
-	return 0;
 }
 
 static const struct of_device_id mcp320x_dt_ids[] = {
diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c
index 13535f148c4c6..1cb4590fe4125 100644
--- a/drivers/iio/adc/mcp3911.c
+++ b/drivers/iio/adc/mcp3911.c
@@ -321,7 +321,7 @@ reg_disable:
 	return ret;
 }
 
-static int mcp3911_remove(struct spi_device *spi)
+static void mcp3911_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct mcp3911 *adc = iio_priv(indio_dev);
@@ -331,8 +331,6 @@ static int mcp3911_remove(struct spi_device *spi)
 	clk_disable_unprepare(adc->clki);
 	if (adc->vref)
 		regulator_disable(adc->vref);
-
-	return 0;
 }
 
 static const struct of_device_id mcp3911_dt_ids[] = {
diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c
index 6eb62b564dae2..59d75d09604f3 100644
--- a/drivers/iio/adc/ti-adc12138.c
+++ b/drivers/iio/adc/ti-adc12138.c
@@ -503,7 +503,7 @@ err_clk_disable:
 	return ret;
 }
 
-static int adc12138_remove(struct spi_device *spi)
+static void adc12138_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct adc12138 *adc = iio_priv(indio_dev);
@@ -514,8 +514,6 @@ static int adc12138_remove(struct spi_device *spi)
 		regulator_disable(adc->vref_n);
 	regulator_disable(adc->vref_p);
 	clk_disable_unprepare(adc->cclk);
-
-	return 0;
 }
 
 static const struct of_device_id adc12138_dt_ids[] = {
diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c
index a7efa3eada2c4..e3658b969c5bf 100644
--- a/drivers/iio/adc/ti-ads7950.c
+++ b/drivers/iio/adc/ti-ads7950.c
@@ -662,7 +662,7 @@ error_destroy_mutex:
 	return ret;
 }
 
-static int ti_ads7950_remove(struct spi_device *spi)
+static void ti_ads7950_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ti_ads7950_state *st = iio_priv(indio_dev);
@@ -672,8 +672,6 @@ static int ti_ads7950_remove(struct spi_device *spi)
 	iio_triggered_buffer_cleanup(indio_dev);
 	regulator_disable(st->reg);
 	mutex_destroy(&st->slock);
-
-	return 0;
 }
 
 static const struct spi_device_id ti_ads7950_id[] = {
diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c
index 2e24717d7f553..22c2583eedd0f 100644
--- a/drivers/iio/adc/ti-ads8688.c
+++ b/drivers/iio/adc/ti-ads8688.c
@@ -479,7 +479,7 @@ err_regulator_disable:
 	return ret;
 }
 
-static int ads8688_remove(struct spi_device *spi)
+static void ads8688_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ads8688_state *st = iio_priv(indio_dev);
@@ -489,8 +489,6 @@ static int ads8688_remove(struct spi_device *spi)
 
 	if (!IS_ERR(st->reg))
 		regulator_disable(st->reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ads8688_id[] = {
diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c
index 403b787f9f7ec..2406eda9dfc6a 100644
--- a/drivers/iio/adc/ti-tlc4541.c
+++ b/drivers/iio/adc/ti-tlc4541.c
@@ -224,7 +224,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int tlc4541_remove(struct spi_device *spi)
+static void tlc4541_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct tlc4541_state *st = iio_priv(indio_dev);
@@ -232,8 +232,6 @@ static int tlc4541_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 	iio_triggered_buffer_cleanup(indio_dev);
 	regulator_disable(st->reg);
-
-	return 0;
 }
 
 static const struct of_device_id tlc4541_dt_ids[] = {
diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c
index cfcf18a0bce85..1134ae12e5319 100644
--- a/drivers/iio/amplifiers/ad8366.c
+++ b/drivers/iio/amplifiers/ad8366.c
@@ -298,7 +298,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad8366_remove(struct spi_device *spi)
+static void ad8366_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad8366_state *st = iio_priv(indio_dev);
@@ -308,8 +308,6 @@ static int ad8366_remove(struct spi_device *spi)
 
 	if (!IS_ERR(reg))
 		regulator_disable(reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad8366_id[] = {
diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c
index 1aee871000381..eafaf4529df54 100644
--- a/drivers/iio/common/ssp_sensors/ssp_dev.c
+++ b/drivers/iio/common/ssp_sensors/ssp_dev.c
@@ -586,7 +586,7 @@ err_setup_irq:
 	return ret;
 }
 
-static int ssp_remove(struct spi_device *spi)
+static void ssp_remove(struct spi_device *spi)
 {
 	struct ssp_data *data = spi_get_drvdata(spi);
 
@@ -608,8 +608,6 @@ static int ssp_remove(struct spi_device *spi)
 	mutex_destroy(&data->pending_lock);
 
 	mfd_remove_devices(&spi->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/iio/dac/ad5360.c b/drivers/iio/dac/ad5360.c
index 2d3b14c407d83..ecbc6a51d60fa 100644
--- a/drivers/iio/dac/ad5360.c
+++ b/drivers/iio/dac/ad5360.c
@@ -521,7 +521,7 @@ error_free_channels:
 	return ret;
 }
 
-static int ad5360_remove(struct spi_device *spi)
+static void ad5360_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5360_state *st = iio_priv(indio_dev);
@@ -531,8 +531,6 @@ static int ad5360_remove(struct spi_device *spi)
 	kfree(indio_dev->channels);
 
 	regulator_bulk_disable(st->chip_info->num_vrefs, st->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5360_ids[] = {
diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
index e38860a6a9f3f..82e1d9bd773e5 100644
--- a/drivers/iio/dac/ad5380.c
+++ b/drivers/iio/dac/ad5380.c
@@ -488,11 +488,9 @@ static int ad5380_spi_probe(struct spi_device *spi)
 	return ad5380_probe(&spi->dev, regmap, id->driver_data, id->name);
 }
 
-static int ad5380_spi_remove(struct spi_device *spi)
+static void ad5380_spi_remove(struct spi_device *spi)
 {
 	ad5380_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5380_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 1c9b54c012a7e..14cfabacbea53 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -491,11 +491,9 @@ static int ad5446_spi_probe(struct spi_device *spi)
 		&ad5446_spi_chip_info[id->driver_data]);
 }
 
-static int ad5446_spi_remove(struct spi_device *spi)
+static void ad5446_spi_remove(struct spi_device *spi)
 {
 	ad5446_remove(&spi->dev);
-
-	return 0;
 }
 
 static struct spi_driver ad5446_spi_driver = {
diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c
index f5e93c6acc9d0..bad9bdaafa94d 100644
--- a/drivers/iio/dac/ad5449.c
+++ b/drivers/iio/dac/ad5449.c
@@ -330,7 +330,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad5449_spi_remove(struct spi_device *spi)
+static void ad5449_spi_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5449 *st = iio_priv(indio_dev);
@@ -338,8 +338,6 @@ static int ad5449_spi_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 
 	regulator_bulk_disable(st->chip_info->num_channels, st->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5449_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c
index b631261efa97a..8507573aa13e9 100644
--- a/drivers/iio/dac/ad5504.c
+++ b/drivers/iio/dac/ad5504.c
@@ -336,7 +336,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad5504_remove(struct spi_device *spi)
+static void ad5504_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5504_state *st = iio_priv(indio_dev);
@@ -345,8 +345,6 @@ static int ad5504_remove(struct spi_device *spi)
 
 	if (!IS_ERR(st->reg))
 		regulator_disable(st->reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5504_id[] = {
diff --git a/drivers/iio/dac/ad5592r.c b/drivers/iio/dac/ad5592r.c
index 6bfd7951e18ca..0f7abfa75bec7 100644
--- a/drivers/iio/dac/ad5592r.c
+++ b/drivers/iio/dac/ad5592r.c
@@ -130,11 +130,9 @@ static int ad5592r_spi_probe(struct spi_device *spi)
 	return ad5592r_probe(&spi->dev, id->name, &ad5592r_rw_ops);
 }
 
-static int ad5592r_spi_remove(struct spi_device *spi)
+static void ad5592r_spi_remove(struct spi_device *spi)
 {
 	ad5592r_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5592r_spi_ids[] = {
diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c
index 3c98941b9f99a..371e812850eba 100644
--- a/drivers/iio/dac/ad5624r_spi.c
+++ b/drivers/iio/dac/ad5624r_spi.c
@@ -293,7 +293,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad5624r_remove(struct spi_device *spi)
+static void ad5624r_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5624r_state *st = iio_priv(indio_dev);
@@ -301,8 +301,6 @@ static int ad5624r_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 	if (!IS_ERR(st->reg))
 		regulator_disable(st->reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5624r_id[] = {
diff --git a/drivers/iio/dac/ad5686-spi.c b/drivers/iio/dac/ad5686-spi.c
index 2628810fdbb1f..d26fb29b6b04d 100644
--- a/drivers/iio/dac/ad5686-spi.c
+++ b/drivers/iio/dac/ad5686-spi.c
@@ -95,11 +95,9 @@ static int ad5686_spi_probe(struct spi_device *spi)
 			    ad5686_spi_write, ad5686_spi_read);
 }
 
-static int ad5686_spi_remove(struct spi_device *spi)
+static void ad5686_spi_remove(struct spi_device *spi)
 {
 	ad5686_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5686_spi_id[] = {
diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c
index e37e095e94fc2..4cb8471db81e0 100644
--- a/drivers/iio/dac/ad5761.c
+++ b/drivers/iio/dac/ad5761.c
@@ -394,7 +394,7 @@ disable_regulator_err:
 	return ret;
 }
 
-static int ad5761_remove(struct spi_device *spi)
+static void ad5761_remove(struct spi_device *spi)
 {
 	struct iio_dev *iio_dev = spi_get_drvdata(spi);
 	struct ad5761_state *st = iio_priv(iio_dev);
@@ -403,8 +403,6 @@ static int ad5761_remove(struct spi_device *spi)
 
 	if (!IS_ERR_OR_NULL(st->vref_reg))
 		regulator_disable(st->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5761_id[] = {
diff --git a/drivers/iio/dac/ad5764.c b/drivers/iio/dac/ad5764.c
index ae089b9145cb7..d235a8047ba0c 100644
--- a/drivers/iio/dac/ad5764.c
+++ b/drivers/iio/dac/ad5764.c
@@ -332,7 +332,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int ad5764_remove(struct spi_device *spi)
+static void ad5764_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5764_state *st = iio_priv(indio_dev);
@@ -341,8 +341,6 @@ static int ad5764_remove(struct spi_device *spi)
 
 	if (st->chip_info->int_vref == 0)
 		regulator_bulk_disable(ARRAY_SIZE(st->vref_reg), st->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5764_ids[] = {
diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c
index 7b4579d73d18b..2b14914b40500 100644
--- a/drivers/iio/dac/ad5791.c
+++ b/drivers/iio/dac/ad5791.c
@@ -428,7 +428,7 @@ error_disable_reg_pos:
 	return ret;
 }
 
-static int ad5791_remove(struct spi_device *spi)
+static void ad5791_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad5791_state *st = iio_priv(indio_dev);
@@ -439,8 +439,6 @@ static int ad5791_remove(struct spi_device *spi)
 
 	if (!IS_ERR(st->reg_vss))
 		regulator_disable(st->reg_vss);
-
-	return 0;
 }
 
 static const struct spi_device_id ad5791_id[] = {
diff --git a/drivers/iio/dac/ad8801.c b/drivers/iio/dac/ad8801.c
index 5ecfdad54dec3..6be35c92d435a 100644
--- a/drivers/iio/dac/ad8801.c
+++ b/drivers/iio/dac/ad8801.c
@@ -193,7 +193,7 @@ error_disable_vrefh_reg:
 	return ret;
 }
 
-static int ad8801_remove(struct spi_device *spi)
+static void ad8801_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad8801_state *state = iio_priv(indio_dev);
@@ -202,8 +202,6 @@ static int ad8801_remove(struct spi_device *spi)
 	if (state->vrefl_reg)
 		regulator_disable(state->vrefl_reg);
 	regulator_disable(state->vrefh_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ad8801_ids[] = {
diff --git a/drivers/iio/dac/ltc1660.c b/drivers/iio/dac/ltc1660.c
index f6ec9bf5815ea..c76233c9bb72b 100644
--- a/drivers/iio/dac/ltc1660.c
+++ b/drivers/iio/dac/ltc1660.c
@@ -206,15 +206,13 @@ error_disable_reg:
 	return ret;
 }
 
-static int ltc1660_remove(struct spi_device *spi)
+static void ltc1660_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ltc1660_priv *priv = iio_priv(indio_dev);
 
 	iio_device_unregister(indio_dev);
 	regulator_disable(priv->vref_reg);
-
-	return 0;
 }
 
 static const struct of_device_id ltc1660_dt_ids[] = {
diff --git a/drivers/iio/dac/ltc2632.c b/drivers/iio/dac/ltc2632.c
index 53e4b887d372d..aed46c80757e3 100644
--- a/drivers/iio/dac/ltc2632.c
+++ b/drivers/iio/dac/ltc2632.c
@@ -372,7 +372,7 @@ static int ltc2632_probe(struct spi_device *spi)
 	return iio_device_register(indio_dev);
 }
 
-static int ltc2632_remove(struct spi_device *spi)
+static void ltc2632_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ltc2632_state *st = iio_priv(indio_dev);
@@ -381,8 +381,6 @@ static int ltc2632_remove(struct spi_device *spi)
 
 	if (st->vref_reg)
 		regulator_disable(st->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id ltc2632_id[] = {
diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c
index 0ae414ee17166..cb9e60e71b915 100644
--- a/drivers/iio/dac/mcp4922.c
+++ b/drivers/iio/dac/mcp4922.c
@@ -172,7 +172,7 @@ error_disable_reg:
 	return ret;
 }
 
-static int mcp4922_remove(struct spi_device *spi)
+static void mcp4922_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct mcp4922_state *state;
@@ -180,8 +180,6 @@ static int mcp4922_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 	state = iio_priv(indio_dev);
 	regulator_disable(state->vref_reg);
-
-	return 0;
 }
 
 static const struct spi_device_id mcp4922_id[] = {
diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c
index 6beda2193683a..4e1156e6deb2d 100644
--- a/drivers/iio/dac/ti-dac082s085.c
+++ b/drivers/iio/dac/ti-dac082s085.c
@@ -313,7 +313,7 @@ err:
 	return ret;
 }
 
-static int ti_dac_remove(struct spi_device *spi)
+static void ti_dac_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ti_dac_chip *ti_dac = iio_priv(indio_dev);
@@ -321,8 +321,6 @@ static int ti_dac_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 	mutex_destroy(&ti_dac->lock);
 	regulator_disable(ti_dac->vref);
-
-	return 0;
 }
 
 static const struct of_device_id ti_dac_of_id[] = {
diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c
index 99f275829ec21..e10d17e60ed39 100644
--- a/drivers/iio/dac/ti-dac7311.c
+++ b/drivers/iio/dac/ti-dac7311.c
@@ -292,7 +292,7 @@ err:
 	return ret;
 }
 
-static int ti_dac_remove(struct spi_device *spi)
+static void ti_dac_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ti_dac_chip *ti_dac = iio_priv(indio_dev);
@@ -300,7 +300,6 @@ static int ti_dac_remove(struct spi_device *spi)
 	iio_device_unregister(indio_dev);
 	mutex_destroy(&ti_dac->lock);
 	regulator_disable(ti_dac->vref);
-	return 0;
 }
 
 static const struct of_device_id ti_dac_of_id[] = {
diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c
index 3d9eba716b691..f3521330f6fb2 100644
--- a/drivers/iio/frequency/adf4350.c
+++ b/drivers/iio/frequency/adf4350.c
@@ -589,7 +589,7 @@ error_disable_clk:
 	return ret;
 }
 
-static int adf4350_remove(struct spi_device *spi)
+static void adf4350_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct adf4350_state *st = iio_priv(indio_dev);
@@ -604,8 +604,6 @@ static int adf4350_remove(struct spi_device *spi)
 
 	if (!IS_ERR(reg))
 		regulator_disable(reg);
-
-	return 0;
 }
 
 static const struct of_device_id adf4350_of_match[] = {
diff --git a/drivers/iio/gyro/bmg160_spi.c b/drivers/iio/gyro/bmg160_spi.c
index 745962e1e4235..fc2e453527b97 100644
--- a/drivers/iio/gyro/bmg160_spi.c
+++ b/drivers/iio/gyro/bmg160_spi.c
@@ -27,11 +27,9 @@ static int bmg160_spi_probe(struct spi_device *spi)
 	return bmg160_core_probe(&spi->dev, regmap, spi->irq, id->name);
 }
 
-static int bmg160_spi_remove(struct spi_device *spi)
+static void bmg160_spi_remove(struct spi_device *spi)
 {
 	bmg160_core_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id bmg160_spi_id[] = {
diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c
index 77ceebef4e34c..c3ac169facf9b 100644
--- a/drivers/iio/gyro/fxas21002c_spi.c
+++ b/drivers/iio/gyro/fxas21002c_spi.c
@@ -34,11 +34,9 @@ static int fxas21002c_spi_probe(struct spi_device *spi)
 	return fxas21002c_core_probe(&spi->dev, regmap, spi->irq, id->name);
 }
 
-static int fxas21002c_spi_remove(struct spi_device *spi)
+static void fxas21002c_spi_remove(struct spi_device *spi)
 {
 	fxas21002c_core_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id fxas21002c_spi_id[] = {
diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
index 273f16dcaff84..856ec901b0913 100644
--- a/drivers/iio/health/afe4403.c
+++ b/drivers/iio/health/afe4403.c
@@ -570,7 +570,7 @@ err_disable_reg:
 	return ret;
 }
 
-static int afe4403_remove(struct spi_device *spi)
+static void afe4403_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct afe4403_data *afe = iio_priv(indio_dev);
@@ -586,8 +586,6 @@ static int afe4403_remove(struct spi_device *spi)
 	ret = regulator_disable(afe->regulator);
 	if (ret)
 		dev_warn(afe->dev, "Unable to disable regulator\n");
-
-	return 0;
 }
 
 static const struct spi_device_id afe4403_ids[] = {
diff --git a/drivers/iio/magnetometer/bmc150_magn_spi.c b/drivers/iio/magnetometer/bmc150_magn_spi.c
index c6ed3ea8460ae..4c570412d65cc 100644
--- a/drivers/iio/magnetometer/bmc150_magn_spi.c
+++ b/drivers/iio/magnetometer/bmc150_magn_spi.c
@@ -29,11 +29,9 @@ static int bmc150_magn_spi_probe(struct spi_device *spi)
 	return bmc150_magn_probe(&spi->dev, regmap, spi->irq, id->name);
 }
 
-static int bmc150_magn_spi_remove(struct spi_device *spi)
+static void bmc150_magn_spi_remove(struct spi_device *spi)
 {
 	bmc150_magn_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id bmc150_magn_spi_id[] = {
diff --git a/drivers/iio/magnetometer/hmc5843_spi.c b/drivers/iio/magnetometer/hmc5843_spi.c
index 89cf59a62c289..a99dd9b33e95f 100644
--- a/drivers/iio/magnetometer/hmc5843_spi.c
+++ b/drivers/iio/magnetometer/hmc5843_spi.c
@@ -74,11 +74,9 @@ static int hmc5843_spi_probe(struct spi_device *spi)
 			id->driver_data, id->name);
 }
 
-static int hmc5843_spi_remove(struct spi_device *spi)
+static void hmc5843_spi_remove(struct spi_device *spi)
 {
 	hmc5843_common_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id hmc5843_id[] = {
diff --git a/drivers/iio/potentiometer/max5487.c b/drivers/iio/potentiometer/max5487.c
index 007c2bd324cb1..42723c996c9f4 100644
--- a/drivers/iio/potentiometer/max5487.c
+++ b/drivers/iio/potentiometer/max5487.c
@@ -112,7 +112,7 @@ static int max5487_spi_probe(struct spi_device *spi)
 	return iio_device_register(indio_dev);
 }
 
-static int max5487_spi_remove(struct spi_device *spi)
+static void max5487_spi_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	int ret;
@@ -123,8 +123,6 @@ static int max5487_spi_remove(struct spi_device *spi)
 	ret = max5487_write_cmd(spi, MAX5487_COPY_AB_TO_NV);
 	if (ret)
 		dev_warn(&spi->dev, "Failed to save wiper regs to NV regs\n");
-
-	return 0;
 }
 
 static const struct spi_device_id max5487_id[] = {
diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c
index 9fa2dcd71760b..7ccd960ced5db 100644
--- a/drivers/iio/pressure/ms5611_spi.c
+++ b/drivers/iio/pressure/ms5611_spi.c
@@ -107,11 +107,9 @@ static int ms5611_spi_probe(struct spi_device *spi)
 			    spi_get_device_id(spi)->driver_data);
 }
 
-static int ms5611_spi_remove(struct spi_device *spi)
+static void ms5611_spi_remove(struct spi_device *spi)
 {
 	ms5611_remove(spi_get_drvdata(spi));
-
-	return 0;
 }
 
 static const struct of_device_id ms5611_spi_matches[] = {
diff --git a/drivers/iio/pressure/zpa2326_spi.c b/drivers/iio/pressure/zpa2326_spi.c
index 85201a4bae44c..ee8ed77536cac 100644
--- a/drivers/iio/pressure/zpa2326_spi.c
+++ b/drivers/iio/pressure/zpa2326_spi.c
@@ -57,11 +57,9 @@ static int zpa2326_probe_spi(struct spi_device *spi)
 			     spi->irq, ZPA2326_DEVICE_ID, regmap);
 }
 
-static int zpa2326_remove_spi(struct spi_device *spi)
+static void zpa2326_remove_spi(struct spi_device *spi)
 {
 	zpa2326_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id zpa2326_spi_ids[] = {
diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c
index eda1b23002b58..d1f5354d5ea28 100644
--- a/drivers/input/keyboard/applespi.c
+++ b/drivers/input/keyboard/applespi.c
@@ -1858,7 +1858,7 @@ static void applespi_drain_reads(struct applespi_data *applespi)
 	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
 }
 
-static int applespi_remove(struct spi_device *spi)
+static void applespi_remove(struct spi_device *spi)
 {
 	struct applespi_data *applespi = spi_get_drvdata(spi);
 
@@ -1871,8 +1871,6 @@ static int applespi_remove(struct spi_device *spi)
 	applespi_drain_reads(applespi);
 
 	debugfs_remove_recursive(applespi->debugfs_root);
-
-	return 0;
 }
 
 static void applespi_shutdown(struct spi_device *spi)
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index 6e51c9bc619f2..91e44d4c66f7a 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -87,13 +87,11 @@ static int adxl34x_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int adxl34x_spi_remove(struct spi_device *spi)
+static void adxl34x_spi_remove(struct spi_device *spi)
 {
 	struct adxl34x *ac = spi_get_drvdata(spi);
 
 	adxl34x_remove(ac);
-
-	return 0;
 }
 
 static int __maybe_unused adxl34x_spi_suspend(struct device *dev)
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index a25a77dd9a32d..bed68a68f3303 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1411,13 +1411,11 @@ static int ads7846_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ads7846_remove(struct spi_device *spi)
+static void ads7846_remove(struct spi_device *spi)
 {
 	struct ads7846 *ts = spi_get_drvdata(spi);
 
 	ads7846_stop(ts);
-
-	return 0;
 }
 
 static struct spi_driver ads7846_driver = {
diff --git a/drivers/input/touchscreen/cyttsp4_spi.c b/drivers/input/touchscreen/cyttsp4_spi.c
index 2aec41eb76b72..5d7db84f2749c 100644
--- a/drivers/input/touchscreen/cyttsp4_spi.c
+++ b/drivers/input/touchscreen/cyttsp4_spi.c
@@ -164,12 +164,10 @@ static int cyttsp4_spi_probe(struct spi_device *spi)
 	return PTR_ERR_OR_ZERO(ts);
 }
 
-static int cyttsp4_spi_remove(struct spi_device *spi)
+static void cyttsp4_spi_remove(struct spi_device *spi)
 {
 	struct cyttsp4 *ts = spi_get_drvdata(spi);
 	cyttsp4_remove(ts);
-
-	return 0;
 }
 
 static struct spi_driver cyttsp4_spi_driver = {
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index a2f55920b9b2e..555dfe98b3c4c 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -64,11 +64,9 @@ static int tsc2005_probe(struct spi_device *spi)
 			     tsc2005_cmd);
 }
 
-static int tsc2005_remove(struct spi_device *spi)
+static void tsc2005_remove(struct spi_device *spi)
 {
 	tsc200x_remove(&spi->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c
index d03cfd3c0bfbe..c87686bd7c189 100644
--- a/drivers/leds/leds-cr0014114.c
+++ b/drivers/leds/leds-cr0014114.c
@@ -266,14 +266,12 @@ static int cr0014114_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int cr0014114_remove(struct spi_device *spi)
+static void cr0014114_remove(struct spi_device *spi)
 {
 	struct cr0014114 *priv = spi_get_drvdata(spi);
 
 	cancel_delayed_work_sync(&priv->work);
 	mutex_destroy(&priv->lock);
-
-	return 0;
 }
 
 static const struct of_device_id cr0014114_dt_ids[] = {
diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c
index 20dc9b9d7deac..cf5fb1195f87f 100644
--- a/drivers/leds/leds-dac124s085.c
+++ b/drivers/leds/leds-dac124s085.c
@@ -85,15 +85,13 @@ eledcr:
 	return ret;
 }
 
-static int dac124s085_remove(struct spi_device *spi)
+static void dac124s085_remove(struct spi_device *spi)
 {
 	struct dac124s085	*dac = spi_get_drvdata(spi);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(dac->leds); i++)
 		led_classdev_unregister(&dac->leds[i].ldev);
-
-	return 0;
 }
 
 static struct spi_driver dac124s085_driver = {
diff --git a/drivers/leds/leds-el15203000.c b/drivers/leds/leds-el15203000.c
index f9eb59a255705..7e7b617bcd56e 100644
--- a/drivers/leds/leds-el15203000.c
+++ b/drivers/leds/leds-el15203000.c
@@ -315,13 +315,11 @@ static int el15203000_probe(struct spi_device *spi)
 	return el15203000_probe_dt(priv);
 }
 
-static int el15203000_remove(struct spi_device *spi)
+static void el15203000_remove(struct spi_device *spi)
 {
 	struct el15203000 *priv = spi_get_drvdata(spi);
 
 	mutex_destroy(&priv->lock);
-
-	return 0;
 }
 
 static const struct of_device_id el15203000_dt_ids[] = {
diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c
index f1964c96fb159..2bc5c99daf51a 100644
--- a/drivers/leds/leds-spi-byte.c
+++ b/drivers/leds/leds-spi-byte.c
@@ -130,13 +130,11 @@ static int spi_byte_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int spi_byte_remove(struct spi_device *spi)
+static void spi_byte_remove(struct spi_device *spi)
 {
 	struct spi_byte_led	*led = spi_get_drvdata(spi);
 
 	mutex_destroy(&led->mutex);
-
-	return 0;
 }
 
 static struct spi_driver spi_byte_driver = {
diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c
index 6f2a66bc87fb8..6be4e5528879f 100644
--- a/drivers/media/spi/cxd2880-spi.c
+++ b/drivers/media/spi/cxd2880-spi.c
@@ -625,7 +625,7 @@ fail_regulator:
 	return ret;
 }
 
-static int
+static void
 cxd2880_spi_remove(struct spi_device *spi)
 {
 	struct cxd2880_dvb_spi *dvb_spi = spi_get_drvdata(spi);
@@ -643,8 +643,6 @@ cxd2880_spi_remove(struct spi_device *spi)
 
 	kfree(dvb_spi);
 	pr_info("cxd2880_spi remove ok.\n");
-
-	return 0;
 }
 
 static const struct spi_device_id cxd2880_spi_id[] = {
diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c
index f86ef1ca12885..75c21a93e6d00 100644
--- a/drivers/media/spi/gs1662.c
+++ b/drivers/media/spi/gs1662.c
@@ -458,13 +458,11 @@ static int gs_probe(struct spi_device *spi)
 	return ret;
 }
 
-static int gs_remove(struct spi_device *spi)
+static void gs_remove(struct spi_device *spi)
 {
 	struct v4l2_subdev *sd = spi_get_drvdata(spi);
 
 	v4l2_device_unregister_subdev(sd);
-
-	return 0;
 }
 
 static struct spi_driver gs_driver = {
diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c
index 44247049a3190..ad6c72c1ed042 100644
--- a/drivers/media/tuners/msi001.c
+++ b/drivers/media/tuners/msi001.c
@@ -472,7 +472,7 @@ err:
 	return ret;
 }
 
-static int msi001_remove(struct spi_device *spi)
+static void msi001_remove(struct spi_device *spi)
 {
 	struct v4l2_subdev *sd = spi_get_drvdata(spi);
 	struct msi001_dev *dev = sd_to_msi001_dev(sd);
@@ -486,7 +486,6 @@ static int msi001_remove(struct spi_device *spi)
 	v4l2_device_unregister_subdev(&dev->sd);
 	v4l2_ctrl_handler_free(&dev->hdl);
 	kfree(dev);
-	return 0;
 }
 
 static const struct spi_device_id msi001_id_table[] = {
diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c
index 9fe06dda37829..03620c8efe34a 100644
--- a/drivers/mfd/arizona-spi.c
+++ b/drivers/mfd/arizona-spi.c
@@ -206,13 +206,11 @@ static int arizona_spi_probe(struct spi_device *spi)
 	return arizona_dev_init(arizona);
 }
 
-static int arizona_spi_remove(struct spi_device *spi)
+static void arizona_spi_remove(struct spi_device *spi)
 {
 	struct arizona *arizona = spi_get_drvdata(spi);
 
 	arizona_dev_exit(arizona);
-
-	return 0;
 }
 
 static const struct spi_device_id arizona_spi_ids[] = {
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 5faf3766a5e20..b79a57b45c1e8 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -55,12 +55,11 @@ static int da9052_spi_probe(struct spi_device *spi)
 	return da9052_device_init(da9052, id->driver_data);
 }
 
-static int da9052_spi_remove(struct spi_device *spi)
+static void da9052_spi_remove(struct spi_device *spi)
 {
 	struct da9052 *da9052 = spi_get_drvdata(spi);
 
 	da9052_device_exit(da9052);
-	return 0;
 }
 
 static const struct spi_device_id da9052_spi_id[] = {
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 70fa18b04ad2b..2280f756f4229 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -392,7 +392,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap,
 	return ret;
 }
 
-static int ezx_pcap_remove(struct spi_device *spi)
+static void ezx_pcap_remove(struct spi_device *spi)
 {
 	struct pcap_chip *pcap = spi_get_drvdata(spi);
 	unsigned long flags;
@@ -412,8 +412,6 @@ static int ezx_pcap_remove(struct spi_device *spi)
 		irq_set_chip_and_handler(i, NULL, NULL);
 
 	destroy_workqueue(pcap->workqueue);
-
-	return 0;
 }
 
 static int ezx_pcap_probe(struct spi_device *spi)
diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c
index e860f5ff09336..da84eb50e53a3 100644
--- a/drivers/mfd/madera-spi.c
+++ b/drivers/mfd/madera-spi.c
@@ -112,13 +112,11 @@ static int madera_spi_probe(struct spi_device *spi)
 	return madera_dev_init(madera);
 }
 
-static int madera_spi_remove(struct spi_device *spi)
+static void madera_spi_remove(struct spi_device *spi)
 {
 	struct madera *madera = spi_get_drvdata(spi);
 
 	madera_dev_exit(madera);
-
-	return 0;
 }
 
 static const struct spi_device_id madera_spi_ids[] = {
diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index 4d8913d647e63..f803527e58194 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -166,10 +166,9 @@ static int mc13xxx_spi_probe(struct spi_device *spi)
 	return mc13xxx_common_init(&spi->dev);
 }
 
-static int mc13xxx_spi_remove(struct spi_device *spi)
+static void mc13xxx_spi_remove(struct spi_device *spi)
 {
 	mc13xxx_common_exit(&spi->dev);
-	return 0;
 }
 
 static struct spi_driver mc13xxx_spi_driver = {
diff --git a/drivers/mfd/rsmu_spi.c b/drivers/mfd/rsmu_spi.c
index fec2b4ec477c5..d2f3d8f1e05af 100644
--- a/drivers/mfd/rsmu_spi.c
+++ b/drivers/mfd/rsmu_spi.c
@@ -220,13 +220,11 @@ static int rsmu_spi_probe(struct spi_device *client)
 	return rsmu_core_init(rsmu);
 }
 
-static int rsmu_spi_remove(struct spi_device *client)
+static void rsmu_spi_remove(struct spi_device *client)
 {
 	struct rsmu_ddata *rsmu = spi_get_drvdata(client);
 
 	rsmu_core_exit(rsmu);
-
-	return 0;
 }
 
 static const struct spi_device_id rsmu_spi_id[] = {
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 6c5915016be50..ad8055a0e2869 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -102,13 +102,11 @@ stmpe_spi_probe(struct spi_device *spi)
 	return stmpe_probe(&spi_ci, id->driver_data);
 }
 
-static int stmpe_spi_remove(struct spi_device *spi)
+static void stmpe_spi_remove(struct spi_device *spi)
 {
 	struct stmpe *stmpe = spi_get_drvdata(spi);
 
 	stmpe_remove(stmpe);
-
-	return 0;
 }
 
 static const struct of_device_id stmpe_spi_of_match[] = {
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index d701926aa46e6..bba38fbc781df 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c
@@ -50,13 +50,11 @@ static int tps65912_spi_probe(struct spi_device *spi)
 	return tps65912_device_init(tps);
 }
 
-static int tps65912_spi_remove(struct spi_device *spi)
+static void tps65912_spi_remove(struct spi_device *spi)
 {
 	struct tps65912 *tps = spi_get_drvdata(spi);
 
 	tps65912_device_exit(tps);
-
-	return 0;
 }
 
 static const struct spi_device_id tps65912_spi_id_table[] = {
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
index a9e75d80ad362..263055bda48b7 100644
--- a/drivers/misc/ad525x_dpot-spi.c
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -90,10 +90,9 @@ static int ad_dpot_spi_probe(struct spi_device *spi)
 			     spi_get_device_id(spi)->name);
 }
 
-static int ad_dpot_spi_remove(struct spi_device *spi)
+static void ad_dpot_spi_remove(struct spi_device *spi)
 {
 	ad_dpot_remove(&spi->dev);
-	return 0;
 }
 
 static const struct spi_device_id ad_dpot_spi_id[] = {
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 1f15399e5cb49..b630625b3024b 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -555,14 +555,12 @@ static int eeprom_93xx46_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int eeprom_93xx46_remove(struct spi_device *spi)
+static void eeprom_93xx46_remove(struct spi_device *spi)
 {
 	struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
 
 	if (!(edev->pdata->flags & EE_READONLY))
 		device_remove_file(&spi->dev, &dev_attr_erase);
-
-	return 0;
 }
 
 static struct spi_driver eeprom_93xx46_driver = {
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
index 98828030b5a4d..bac4df2e52314 100644
--- a/drivers/misc/lattice-ecp3-config.c
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -211,13 +211,11 @@ static int lattice_ecp3_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int lattice_ecp3_remove(struct spi_device *spi)
+static void lattice_ecp3_remove(struct spi_device *spi)
 {
 	struct fpga_data *data = spi_get_drvdata(spi);
 
 	wait_for_completion(&data->fw_loaded);
-
-	return 0;
 }
 
 static const struct spi_device_id lattice_ecp3_id[] = {
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
index 9e40dfb607425..203a108b8883c 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_spi.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -96,15 +96,13 @@ static int lis302dl_spi_probe(struct spi_device *spi)
 	return lis3lv02d_init_device(&lis3_dev);
 }
 
-static int lis302dl_spi_remove(struct spi_device *spi)
+static void lis302dl_spi_remove(struct spi_device *spi)
 {
 	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
 	lis3lv02d_joystick_disable(lis3);
 	lis3lv02d_poweroff(lis3);
 
 	lis3lv02d_remove_fs(&lis3_dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a576181e9db03..106dd204b1a7f 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1489,7 +1489,7 @@ nomem:
 }
 
 
-static int mmc_spi_remove(struct spi_device *spi)
+static void mmc_spi_remove(struct spi_device *spi)
 {
 	struct mmc_host		*mmc = dev_get_drvdata(&spi->dev);
 	struct mmc_spi_host	*host = mmc_priv(mmc);
@@ -1507,7 +1507,6 @@ static int mmc_spi_remove(struct spi_device *spi)
 	spi->max_speed_hz = mmc->f_max;
 	mmc_spi_put_pdata(spi);
 	mmc_free_host(mmc);
-	return 0;
 }
 
 static const struct spi_device_id mmc_spi_dev_ids[] = {
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index a8b31bddf14b8..008df9d8898db 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -209,13 +209,11 @@ static int mchp23k256_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int mchp23k256_remove(struct spi_device *spi)
+static void mchp23k256_remove(struct spi_device *spi)
 {
 	struct mchp23k256_flash *flash = spi_get_drvdata(spi);
 
 	WARN_ON(mtd_device_unregister(&flash->mtd));
-
-	return 0;
 }
 
 static const struct of_device_id mchp23k256_of_table[] = {
diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c
index 231a107901960..a3fd426df74be 100644
--- a/drivers/mtd/devices/mchp48l640.c
+++ b/drivers/mtd/devices/mchp48l640.c
@@ -341,13 +341,11 @@ static int mchp48l640_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int mchp48l640_remove(struct spi_device *spi)
+static void mchp48l640_remove(struct spi_device *spi)
 {
 	struct mchp48l640_flash *flash = spi_get_drvdata(spi);
 
 	WARN_ON(mtd_device_unregister(&flash->mtd));
-
-	return 0;
 }
 
 static const struct of_device_id mchp48l640_of_table[] = {
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 734878abaa236..134e273285974 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -916,7 +916,7 @@ static int dataflash_probe(struct spi_device *spi)
 	return status;
 }
 
-static int dataflash_remove(struct spi_device *spi)
+static void dataflash_remove(struct spi_device *spi)
 {
 	struct dataflash	*flash = spi_get_drvdata(spi);
 
@@ -925,8 +925,6 @@ static int dataflash_remove(struct spi_device *spi)
 	WARN_ON(mtd_device_unregister(&flash->mtd));
 
 	kfree(flash);
-
-	return 0;
 }
 
 static struct spi_driver dataflash_driver = {
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index 7f124c1bfa406..8813994ce9f45 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -398,13 +398,11 @@ static int sst25l_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int sst25l_remove(struct spi_device *spi)
+static void sst25l_remove(struct spi_device *spi)
 {
 	struct sst25l_flash *flash = spi_get_drvdata(spi);
 
 	WARN_ON(mtd_device_unregister(&flash->mtd));
-
-	return 0;
 }
 
 static struct spi_driver sst25l_driver = {
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index 04687b15b250e..41645a24384ce 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -388,7 +388,7 @@ out_power:
 	return ret;
 }
 
-static int tcan4x5x_can_remove(struct spi_device *spi)
+static void tcan4x5x_can_remove(struct spi_device *spi)
 {
 	struct tcan4x5x_priv *priv = spi_get_drvdata(spi);
 
@@ -397,8 +397,6 @@ static int tcan4x5x_can_remove(struct spi_device *spi)
 	tcan4x5x_power_enable(priv->power, 0);
 
 	m_can_class_free_dev(priv->cdev.net);
-
-	return 0;
 }
 
 static const struct of_device_id tcan4x5x_of_match[] = {
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index cfcc14fe3e42d..664b8f14d7b05 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -948,7 +948,7 @@ static int hi3110_can_probe(struct spi_device *spi)
 	return dev_err_probe(dev, ret, "Probe failed\n");
 }
 
-static int hi3110_can_remove(struct spi_device *spi)
+static void hi3110_can_remove(struct spi_device *spi)
 {
 	struct hi3110_priv *priv = spi_get_drvdata(spi);
 	struct net_device *net = priv->net;
@@ -960,8 +960,6 @@ static int hi3110_can_remove(struct spi_device *spi)
 	clk_disable_unprepare(priv->clk);
 
 	free_candev(net);
-
-	return 0;
 }
 
 static int __maybe_unused hi3110_can_suspend(struct device *dev)
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 025e07cb74397..d23edaf224204 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1427,7 +1427,7 @@ out_free:
 	return ret;
 }
 
-static int mcp251x_can_remove(struct spi_device *spi)
+static void mcp251x_can_remove(struct spi_device *spi)
 {
 	struct mcp251x_priv *priv = spi_get_drvdata(spi);
 	struct net_device *net = priv->net;
@@ -1442,8 +1442,6 @@ static int mcp251x_can_remove(struct spi_device *spi)
 	clk_disable_unprepare(priv->clk);
 
 	free_candev(net);
-
-	return 0;
 }
 
 static int __maybe_unused mcp251x_can_suspend(struct device *dev)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index b5986df6eca0b..65c9b31666a6a 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1966,7 +1966,7 @@ static int mcp251xfd_probe(struct spi_device *spi)
 	return err;
 }
 
-static int mcp251xfd_remove(struct spi_device *spi)
+static void mcp251xfd_remove(struct spi_device *spi)
 {
 	struct mcp251xfd_priv *priv = spi_get_drvdata(spi);
 	struct net_device *ndev = priv->ndev;
@@ -1975,8 +1975,6 @@ static int mcp251xfd_remove(struct spi_device *spi)
 	mcp251xfd_unregister(priv);
 	spi->max_speed_hz = priv->spi_max_speed_hz_orig;
 	free_candev(ndev);
-
-	return 0;
 }
 
 static int __maybe_unused mcp251xfd_runtime_suspend(struct device *device)
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 2b88f03e52521..0e54b2a0c2118 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -314,7 +314,7 @@ static int b53_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int b53_spi_remove(struct spi_device *spi)
+static void b53_spi_remove(struct spi_device *spi)
 {
 	struct b53_device *dev = spi_get_drvdata(spi);
 
@@ -322,8 +322,6 @@ static int b53_spi_remove(struct spi_device *spi)
 		b53_switch_remove(dev);
 
 	spi_set_drvdata(spi, NULL);
-
-	return 0;
 }
 
 static void b53_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 866767b70d65b..673589dc88ab4 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -87,7 +87,7 @@ static int ksz8795_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ksz8795_spi_remove(struct spi_device *spi)
+static void ksz8795_spi_remove(struct spi_device *spi)
 {
 	struct ksz_device *dev = spi_get_drvdata(spi);
 
@@ -95,8 +95,6 @@ static int ksz8795_spi_remove(struct spi_device *spi)
 		ksz_switch_remove(dev);
 
 	spi_set_drvdata(spi, NULL);
-
-	return 0;
 }
 
 static void ksz8795_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index e3cb0e6c9f6f2..940bb9665f150 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -65,7 +65,7 @@ static int ksz9477_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ksz9477_spi_remove(struct spi_device *spi)
+static void ksz9477_spi_remove(struct spi_device *spi)
 {
 	struct ksz_device *dev = spi_get_drvdata(spi);
 
@@ -73,8 +73,6 @@ static int ksz9477_spi_remove(struct spi_device *spi)
 		ksz_switch_remove(dev);
 
 	spi_set_drvdata(spi, NULL);
-
-	return 0;
 }
 
 static void ksz9477_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b513713be6101..c2a47c6693b80 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3346,18 +3346,16 @@ static int sja1105_probe(struct spi_device *spi)
 	return dsa_register_switch(priv->ds);
 }
 
-static int sja1105_remove(struct spi_device *spi)
+static void sja1105_remove(struct spi_device *spi)
 {
 	struct sja1105_private *priv = spi_get_drvdata(spi);
 
 	if (!priv)
-		return 0;
+		return;
 
 	dsa_unregister_switch(priv->ds);
 
 	spi_set_drvdata(spi, NULL);
-
-	return 0;
 }
 
 static void sja1105_shutdown(struct spi_device *spi)
diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c
index 645398901e05e..3110895358d8d 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-spi.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c
@@ -159,18 +159,16 @@ static int vsc73xx_spi_probe(struct spi_device *spi)
 	return vsc73xx_probe(&vsc_spi->vsc);
 }
 
-static int vsc73xx_spi_remove(struct spi_device *spi)
+static void vsc73xx_spi_remove(struct spi_device *spi)
 {
 	struct vsc73xx_spi *vsc_spi = spi_get_drvdata(spi);
 
 	if (!vsc_spi)
-		return 0;
+		return;
 
 	vsc73xx_remove(&vsc_spi->vsc);
 
 	spi_set_drvdata(spi, NULL);
-
-	return 0;
 }
 
 static void vsc73xx_spi_shutdown(struct spi_device *spi)
diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c
index e7a9f98632587..bf70481bb1cad 100644
--- a/drivers/net/ethernet/asix/ax88796c_main.c
+++ b/drivers/net/ethernet/asix/ax88796c_main.c
@@ -1102,7 +1102,7 @@ err:
 	return ret;
 }
 
-static int ax88796c_remove(struct spi_device *spi)
+static void ax88796c_remove(struct spi_device *spi)
 {
 	struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev);
 	struct net_device *ndev = ax_local->ndev;
@@ -1112,8 +1112,6 @@ static int ax88796c_remove(struct spi_device *spi)
 	netif_info(ax_local, probe, ndev, "removing network device %s %s\n",
 		   dev_driver_string(&spi->dev),
 		   dev_name(&spi->dev));
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 0303e727e99f9..d167d93e4c12f 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -452,11 +452,9 @@ static int ks8851_probe_spi(struct spi_device *spi)
 	return ks8851_probe_common(netdev, dev, msg_enable);
 }
 
-static int ks8851_remove_spi(struct spi_device *spi)
+static void ks8851_remove_spi(struct spi_device *spi)
 {
 	ks8851_remove_common(&spi->dev);
-
-	return 0;
 }
 
 static const struct of_device_id ks8851_match_table[] = {
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 634ac7649c43e..db5a3edb4c3c0 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1612,15 +1612,13 @@ error_alloc:
 	return ret;
 }
 
-static int enc28j60_remove(struct spi_device *spi)
+static void enc28j60_remove(struct spi_device *spi)
 {
 	struct enc28j60_net *priv = spi_get_drvdata(spi);
 
 	unregister_netdev(priv->netdev);
 	free_irq(spi->irq, priv);
 	free_netdev(priv->netdev);
-
-	return 0;
 }
 
 static const struct of_device_id enc28j60_dt_ids[] = {
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index b90efc80fb59f..dc1840cb5b10f 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1093,7 +1093,7 @@ error_out:
 	return ret;
 }
 
-static int encx24j600_spi_remove(struct spi_device *spi)
+static void encx24j600_spi_remove(struct spi_device *spi)
 {
 	struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev);
 
@@ -1101,8 +1101,6 @@ static int encx24j600_spi_remove(struct spi_device *spi)
 	kthread_stop(priv->kworker_task);
 
 	free_netdev(priv->ndev);
-
-	return 0;
 }
 
 static const struct spi_device_id encx24j600_spi_id_table[] = {
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 955cce644392a..3c5494afd3c04 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -1001,7 +1001,7 @@ qca_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int
+static void
 qca_spi_remove(struct spi_device *spi)
 {
 	struct net_device *qcaspi_devs = spi_get_drvdata(spi);
@@ -1011,8 +1011,6 @@ qca_spi_remove(struct spi_device *spi)
 
 	unregister_netdev(qcaspi_devs);
 	free_netdev(qcaspi_devs);
-
-	return 0;
 }
 
 static const struct spi_device_id qca_spi_id[] = {
diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
index 89a31783fbb48..25739b182ac7b 100644
--- a/drivers/net/ethernet/vertexcom/mse102x.c
+++ b/drivers/net/ethernet/vertexcom/mse102x.c
@@ -731,7 +731,7 @@ static int mse102x_probe_spi(struct spi_device *spi)
 	return 0;
 }
 
-static int mse102x_remove_spi(struct spi_device *spi)
+static void mse102x_remove_spi(struct spi_device *spi)
 {
 	struct mse102x_net *mse = dev_get_drvdata(&spi->dev);
 	struct mse102x_net_spi *mses = to_mse102x_spi(mse);
@@ -741,8 +741,6 @@ static int mse102x_remove_spi(struct spi_device *spi)
 
 	mse102x_remove_device_debugfs(mses);
 	unregister_netdev(mse->ndev);
-
-	return 0;
 }
 
 static const struct of_device_id mse102x_match_table[] = {
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 7779a36da3c85..7c52796273a49 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -461,11 +461,9 @@ static int w5100_spi_probe(struct spi_device *spi)
 	return w5100_probe(&spi->dev, ops, priv_size, mac, spi->irq, -EINVAL);
 }
 
-static int w5100_spi_remove(struct spi_device *spi)
+static void w5100_spi_remove(struct spi_device *spi)
 {
 	w5100_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id w5100_spi_ids[] = {
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index 7db9cbd0f5ded..6afdf1622944e 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1304,7 +1304,7 @@ err_alloc_wq:
 	return ret;
 }
 
-static int adf7242_remove(struct spi_device *spi)
+static void adf7242_remove(struct spi_device *spi)
 {
 	struct adf7242_local *lp = spi_get_drvdata(spi);
 
@@ -1316,8 +1316,6 @@ static int adf7242_remove(struct spi_device *spi)
 	ieee802154_unregister_hw(lp->hw);
 	mutex_destroy(&lp->bmux);
 	ieee802154_free_hw(lp->hw);
-
-	return 0;
 }
 
 static const struct of_device_id adf7242_of_match[] = {
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 7d67f41387f55..a4734323dc293 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1759,7 +1759,7 @@ free_dev:
 	return rc;
 }
 
-static int at86rf230_remove(struct spi_device *spi)
+static void at86rf230_remove(struct spi_device *spi)
 {
 	struct at86rf230_local *lp = spi_get_drvdata(spi);
 
@@ -1769,8 +1769,6 @@ static int at86rf230_remove(struct spi_device *spi)
 	ieee802154_free_hw(lp->hw);
 	at86rf230_debugfs_remove();
 	dev_dbg(&spi->dev, "unregistered at86rf230\n");
-
-	return 0;
 }
 
 static const struct of_device_id at86rf230_of_match[] = {
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index ece6ff6049f66..b499bbe4d48f2 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -831,7 +831,7 @@ static void ca8210_rx_done(struct cas_control *cas_ctl)
 finish:;
 }
 
-static int ca8210_remove(struct spi_device *spi_device);
+static void ca8210_remove(struct spi_device *spi_device);
 
 /**
  * ca8210_spi_transfer_complete() - Called when a single spi transfer has
@@ -3048,7 +3048,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv)
  *
  * Return: 0 or linux error code
  */
-static int ca8210_remove(struct spi_device *spi_device)
+static void ca8210_remove(struct spi_device *spi_device)
 {
 	struct ca8210_priv *priv;
 	struct ca8210_platform_data *pdata;
@@ -3088,8 +3088,6 @@ static int ca8210_remove(struct spi_device *spi_device)
 		if (IS_ENABLED(CONFIG_IEEE802154_CA8210_DEBUGFS))
 			ca8210_test_interface_clear(priv);
 	}
-
-	return 0;
 }
 
 /**
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index 89c046b204e0c..1e1f40f628a02 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -1213,7 +1213,7 @@ err_hw_init:
 	return ret;
 }
 
-static int cc2520_remove(struct spi_device *spi)
+static void cc2520_remove(struct spi_device *spi)
 {
 	struct cc2520_private *priv = spi_get_drvdata(spi);
 
@@ -1222,8 +1222,6 @@ static int cc2520_remove(struct spi_device *spi)
 
 	ieee802154_unregister_hw(priv->hw);
 	ieee802154_free_hw(priv->hw);
-
-	return 0;
 }
 
 static const struct spi_device_id cc2520_ids[] = {
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index 8dc04e2590b18..a3af52a8e6ddf 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -1335,7 +1335,7 @@ free_dev:
 	return ret;
 }
 
-static int mcr20a_remove(struct spi_device *spi)
+static void mcr20a_remove(struct spi_device *spi)
 {
 	struct mcr20a_local *lp = spi_get_drvdata(spi);
 
@@ -1343,8 +1343,6 @@ static int mcr20a_remove(struct spi_device *spi)
 
 	ieee802154_unregister_hw(lp->hw);
 	ieee802154_free_hw(lp->hw);
-
-	return 0;
 }
 
 static const struct of_device_id mcr20a_of_match[] = {
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index ff83e00b77af7..ee4cfbf2c5cc0 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1356,7 +1356,7 @@ err_ret:
 	return ret;
 }
 
-static int mrf24j40_remove(struct spi_device *spi)
+static void mrf24j40_remove(struct spi_device *spi)
 {
 	struct mrf24j40 *devrec = spi_get_drvdata(spi);
 
@@ -1366,8 +1366,6 @@ static int mrf24j40_remove(struct spi_device *spi)
 	ieee802154_free_hw(devrec->hw);
 	/* TODO: Will ieee802154_free_device() wait until ->xmit() is
 	 * complete? */
-
-	return 0;
 }
 
 static const struct of_device_id mrf24j40_of_match[] = {
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 8b5445a724ce5..ff37f8ba6758b 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -517,7 +517,7 @@ static int ks8995_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ks8995_remove(struct spi_device *spi)
+static void ks8995_remove(struct spi_device *spi)
 {
 	struct ks8995_switch *ks = spi_get_drvdata(spi);
 
@@ -526,8 +526,6 @@ static int ks8995_remove(struct spi_device *spi)
 	/* assert reset */
 	if (ks->pdata && gpio_is_valid(ks->pdata->reset_gpio))
 		gpiod_set_value(gpio_to_desc(ks->pdata->reset_gpio), 1);
-
-	return 0;
 }
 
 /* ------------------------------------------------------------------------ */
diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c
index 8e3b1c717c107..6063552cea9b2 100644
--- a/drivers/net/wan/slic_ds26522.c
+++ b/drivers/net/wan/slic_ds26522.c
@@ -194,10 +194,9 @@ static int slic_ds26522_init_configure(struct spi_device *spi)
 	return 0;
 }
 
-static int slic_ds26522_remove(struct spi_device *spi)
+static void slic_ds26522_remove(struct spi_device *spi)
 {
 	pr_info("DS26522 module uninstalled\n");
-	return 0;
 }
 
 static int slic_ds26522_probe(struct spi_device *spi)
diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c
index ab0fe85658518..f99b7ba69fc3d 100644
--- a/drivers/net/wireless/intersil/p54/p54spi.c
+++ b/drivers/net/wireless/intersil/p54/p54spi.c
@@ -669,7 +669,7 @@ err_free:
 	return ret;
 }
 
-static int p54spi_remove(struct spi_device *spi)
+static void p54spi_remove(struct spi_device *spi)
 {
 	struct p54s_priv *priv = spi_get_drvdata(spi);
 
@@ -684,8 +684,6 @@ static int p54spi_remove(struct spi_device *spi)
 	mutex_destroy(&priv->mutex);
 
 	p54_free_common(priv->hw);
-
-	return 0;
 }
 
 
diff --git a/drivers/net/wireless/marvell/libertas/if_spi.c b/drivers/net/wireless/marvell/libertas/if_spi.c
index cd9f8ecf171f3..ff1c7ec8c450b 100644
--- a/drivers/net/wireless/marvell/libertas/if_spi.c
+++ b/drivers/net/wireless/marvell/libertas/if_spi.c
@@ -1195,7 +1195,7 @@ out:
 	return err;
 }
 
-static int libertas_spi_remove(struct spi_device *spi)
+static void libertas_spi_remove(struct spi_device *spi)
 {
 	struct if_spi_card *card = spi_get_drvdata(spi);
 	struct lbs_private *priv = card->priv;
@@ -1212,8 +1212,6 @@ static int libertas_spi_remove(struct spi_device *spi)
 	if (card->pdata->teardown)
 		card->pdata->teardown(spi);
 	free_if_spi_card(card);
-
-	return 0;
 }
 
 static int if_spi_suspend(struct device *dev)
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 2c2ed4b09efd5..d2db522893997 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -240,7 +240,7 @@ free:
 	return ret;
 }
 
-static int wilc_bus_remove(struct spi_device *spi)
+static void wilc_bus_remove(struct spi_device *spi)
 {
 	struct wilc *wilc = spi_get_drvdata(spi);
 	struct wilc_spi *spi_priv = wilc->bus_data;
@@ -248,8 +248,6 @@ static int wilc_bus_remove(struct spi_device *spi)
 	clk_disable_unprepare(wilc->rtc_clk);
 	wilc_netdev_cleanup(wilc);
 	kfree(spi_priv);
-
-	return 0;
 }
 
 static const struct of_device_id wilc_of_match[] = {
diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c
index 271ed2ce2d7f9..fe0d220da44d0 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_spi.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c
@@ -423,7 +423,7 @@ static int cw1200_spi_probe(struct spi_device *func)
 }
 
 /* Disconnect Function to be called by SPI stack when device is disconnected */
-static int cw1200_spi_disconnect(struct spi_device *func)
+static void cw1200_spi_disconnect(struct spi_device *func)
 {
 	struct hwbus_priv *self = spi_get_drvdata(func);
 
@@ -435,8 +435,6 @@ static int cw1200_spi_disconnect(struct spi_device *func)
 		}
 	}
 	cw1200_spi_off(dev_get_platdata(&func->dev));
-
-	return 0;
 }
 
 static int __maybe_unused cw1200_spi_suspend(struct device *dev)
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index 5b894bd6237ee..9df38726e8b0b 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -327,14 +327,12 @@ out_free:
 	return ret;
 }
 
-static int wl1251_spi_remove(struct spi_device *spi)
+static void wl1251_spi_remove(struct spi_device *spi)
 {
 	struct wl1251 *wl = spi_get_drvdata(spi);
 
 	wl1251_free_hw(wl);
 	regulator_disable(wl->vio);
-
-	return 0;
 }
 
 static struct spi_driver wl1251_spi_driver = {
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 354a7e1c3315c..7eae1ec2eb2b3 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -546,13 +546,11 @@ out_dev_put:
 	return ret;
 }
 
-static int wl1271_remove(struct spi_device *spi)
+static void wl1271_remove(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue = spi_get_drvdata(spi);
 
 	platform_device_unregister(glue->core);
-
-	return 0;
 }
 
 static struct spi_driver wl1271_spi_driver = {
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 5b833a9a83f80..a38e2fcdfd39f 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -174,12 +174,11 @@ static int nfcmrvl_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int nfcmrvl_spi_remove(struct spi_device *spi)
+static void nfcmrvl_spi_remove(struct spi_device *spi)
 {
 	struct nfcmrvl_spi_drv_data *drv_data = spi_get_drvdata(spi);
 
 	nfcmrvl_nci_unregister_dev(drv_data->priv);
-	return 0;
 }
 
 static const struct of_device_id of_nfcmrvl_spi_match[] __maybe_unused = {
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 4e723992e74c2..169eacc0a32ae 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -263,13 +263,11 @@ static int st_nci_spi_probe(struct spi_device *dev)
 	return r;
 }
 
-static int st_nci_spi_remove(struct spi_device *dev)
+static void st_nci_spi_remove(struct spi_device *dev)
 {
 	struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
 
 	ndlc_remove(phy->ndlc);
-
-	return 0;
 }
 
 static struct spi_device_id st_nci_spi_id_table[] = {
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index b23f47936473d..ed704bb772264 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1198,7 +1198,7 @@ err_disable_regulator:
 	return ret;
 }
 
-static int st95hf_remove(struct spi_device *nfc_spi_dev)
+static void st95hf_remove(struct spi_device *nfc_spi_dev)
 {
 	int result = 0;
 	unsigned char reset_cmd = ST95HF_COMMAND_RESET;
@@ -1236,8 +1236,6 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev)
 	/* disable regulator */
 	if (stcontext->st95hf_supply)
 		regulator_disable(stcontext->st95hf_supply);
-
-	return 0;
 }
 
 /* Register as SPI protocol driver */
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 29ca9c328df2b..21d68664fe082 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -2144,7 +2144,7 @@ err_destroy_lock:
 	return ret;
 }
 
-static int trf7970a_remove(struct spi_device *spi)
+static void trf7970a_remove(struct spi_device *spi)
 {
 	struct trf7970a *trf = spi_get_drvdata(spi);
 
@@ -2160,8 +2160,6 @@ static int trf7970a_remove(struct spi_device *spi)
 	regulator_disable(trf->regulator);
 
 	mutex_destroy(&trf->lock);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 713c58687721b..8493af0f680e2 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -786,13 +786,11 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int cros_ec_spi_remove(struct spi_device *spi)
+static void cros_ec_spi_remove(struct spi_device *spi)
 {
 	struct cros_ec_device *ec_dev = spi_get_drvdata(spi);
 
 	cros_ec_unregister(ec_dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/olpc/olpc-xo175-ec.c b/drivers/platform/olpc/olpc-xo175-ec.c
index 0d46706afd2da..4823bd2819f64 100644
--- a/drivers/platform/olpc/olpc-xo175-ec.c
+++ b/drivers/platform/olpc/olpc-xo175-ec.c
@@ -648,7 +648,7 @@ static struct olpc_ec_driver olpc_xo175_ec_driver = {
 	.ec_cmd = olpc_xo175_ec_cmd,
 };
 
-static int olpc_xo175_ec_remove(struct spi_device *spi)
+static void olpc_xo175_ec_remove(struct spi_device *spi)
 {
 	if (pm_power_off == olpc_xo175_ec_power_off)
 		pm_power_off = NULL;
@@ -657,8 +657,6 @@ static int olpc_xo175_ec_remove(struct spi_device *spi)
 
 	platform_device_unregister(olpc_ec);
 	olpc_ec = NULL;
-
-	return 0;
 }
 
 static int olpc_xo175_ec_probe(struct spi_device *spi)
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 2f83adef966eb..6d66ab5a8b176 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -185,10 +185,9 @@ static int ds1302_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ds1302_remove(struct spi_device *spi)
+static void ds1302_remove(struct spi_device *spi)
 {
 	spi_set_drvdata(spi, NULL);
-	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 9ef107b99b658..ed9360486953e 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -720,7 +720,7 @@ static int ds1305_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ds1305_remove(struct spi_device *spi)
+static void ds1305_remove(struct spi_device *spi)
 {
 	struct ds1305 *ds1305 = spi_get_drvdata(spi);
 
@@ -730,8 +730,6 @@ static int ds1305_remove(struct spi_device *spi)
 		devm_free_irq(&spi->dev, spi->irq, ds1305);
 		cancel_work_sync(&ds1305->work);
 	}
-
-	return 0;
 }
 
 static struct spi_driver ds1305_driver = {
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index f14ed6c96437b..ed5a6ba89a3ee 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -434,11 +434,9 @@ static int ds1343_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ds1343_remove(struct spi_device *spi)
+static void ds1343_remove(struct spi_device *spi)
 {
 	dev_pm_clear_wake_irq(&spi->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 37f4443ce9a09..e9d83d65873bd 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -854,15 +854,13 @@ static int spi_mem_probe(struct spi_device *spi)
 	return memdrv->probe(mem);
 }
 
-static int spi_mem_remove(struct spi_device *spi)
+static void spi_mem_remove(struct spi_device *spi)
 {
 	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
 	struct spi_mem *mem = spi_get_drvdata(spi);
 
 	if (memdrv->remove)
-		return memdrv->remove(mem);
-
-	return 0;
+		memdrv->remove(mem);
 }
 
 static void spi_mem_shutdown(struct spi_device *spi)
diff --git a/drivers/spi/spi-slave-system-control.c b/drivers/spi/spi-slave-system-control.c
index 169f3d595f60c..d37cfe995a632 100644
--- a/drivers/spi/spi-slave-system-control.c
+++ b/drivers/spi/spi-slave-system-control.c
@@ -132,13 +132,12 @@ static int spi_slave_system_control_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int spi_slave_system_control_remove(struct spi_device *spi)
+static void spi_slave_system_control_remove(struct spi_device *spi)
 {
 	struct spi_slave_system_control_priv *priv = spi_get_drvdata(spi);
 
 	spi_slave_abort(spi);
 	wait_for_completion(&priv->finished);
-	return 0;
 }
 
 static struct spi_driver spi_slave_system_control_driver = {
diff --git a/drivers/spi/spi-slave-time.c b/drivers/spi/spi-slave-time.c
index f2e07a392d686..f56c1afb85340 100644
--- a/drivers/spi/spi-slave-time.c
+++ b/drivers/spi/spi-slave-time.c
@@ -106,13 +106,12 @@ static int spi_slave_time_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int spi_slave_time_remove(struct spi_device *spi)
+static void spi_slave_time_remove(struct spi_device *spi)
 {
 	struct spi_slave_time_priv *priv = spi_get_drvdata(spi);
 
 	spi_slave_abort(spi);
 	wait_for_completion(&priv->finished);
-	return 0;
 }
 
 static struct spi_driver spi_slave_time_driver = {
diff --git a/drivers/spi/spi-tle62x0.c b/drivers/spi/spi-tle62x0.c
index f8ad0709d0152..a565352f63815 100644
--- a/drivers/spi/spi-tle62x0.c
+++ b/drivers/spi/spi-tle62x0.c
@@ -288,7 +288,7 @@ static int tle62x0_probe(struct spi_device *spi)
 	return ret;
 }
 
-static int tle62x0_remove(struct spi_device *spi)
+static void tle62x0_remove(struct spi_device *spi)
 {
 	struct tle62x0_state *st = spi_get_drvdata(spi);
 	int ptr;
@@ -298,7 +298,6 @@ static int tle62x0_remove(struct spi_device *spi)
 
 	device_remove_file(&spi->dev, &dev_attr_status_show);
 	kfree(st);
-	return 0;
 }
 
 static struct spi_driver tle62x0_driver = {
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4599b121d7442..ead9a132dcb9c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -404,15 +404,8 @@ static void spi_remove(struct device *dev)
 {
 	const struct spi_driver		*sdrv = to_spi_driver(dev->driver);
 
-	if (sdrv->remove) {
-		int ret;
-
-		ret = sdrv->remove(to_spi_device(dev));
-		if (ret)
-			dev_warn(dev,
-				 "Failed to unbind driver (%pe), ignoring\n",
-				 ERR_PTR(ret));
-	}
+	if (sdrv->remove)
+		sdrv->remove(to_spi_device(dev));
 
 	dev_pm_domain_detach(dev, true);
 }
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index a5cceca8b82b6..9468f74308bd5 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -803,7 +803,7 @@ static int spidev_probe(struct spi_device *spi)
 	return status;
 }
 
-static int spidev_remove(struct spi_device *spi)
+static void spidev_remove(struct spi_device *spi)
 {
 	struct spidev_data	*spidev = spi_get_drvdata(spi);
 
@@ -820,8 +820,6 @@ static int spidev_remove(struct spi_device *spi)
 	if (spidev->users == 0)
 		kfree(spidev);
 	mutex_unlock(&device_list_lock);
-
-	return 0;
 }
 
 static struct spi_driver spidev_spi_driver = {
diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h
index 6a7545b5bcd2d..b68f5f9b7c78c 100644
--- a/drivers/staging/fbtft/fbtft.h
+++ b/drivers/staging/fbtft/fbtft.h
@@ -286,12 +286,11 @@ static int fbtft_driver_probe_spi(struct spi_device *spi)			\
 	return fbtft_probe_common(_display, spi, NULL);				\
 }										\
 										\
-static int fbtft_driver_remove_spi(struct spi_device *spi)			\
+static void fbtft_driver_remove_spi(struct spi_device *spi)			\
 {										\
 	struct fb_info *info = spi_get_drvdata(spi);				\
 										\
 	fbtft_remove_common(&spi->dev, info);					\
-	return 0;								\
 }										\
 										\
 static struct spi_driver fbtft_driver_spi_driver = {				\
diff --git a/drivers/staging/pi433/pi433_if.c b/drivers/staging/pi433/pi433_if.c
index 68c09fa016ed3..1d31c35875e33 100644
--- a/drivers/staging/pi433/pi433_if.c
+++ b/drivers/staging/pi433/pi433_if.c
@@ -1264,7 +1264,7 @@ RX_failed:
 	return retval;
 }
 
-static int pi433_remove(struct spi_device *spi)
+static void pi433_remove(struct spi_device *spi)
 {
 	struct pi433_device	*device = spi_get_drvdata(spi);
 
@@ -1284,8 +1284,6 @@ static int pi433_remove(struct spi_device *spi)
 
 	kfree(device->rx_buffer);
 	kfree(device);
-
-	return 0;
 }
 
 static const struct of_device_id pi433_dt_ids[] = {
diff --git a/drivers/staging/wfx/bus_spi.c b/drivers/staging/wfx/bus_spi.c
index 55ffcd7c42e27..fa0ff66a457df 100644
--- a/drivers/staging/wfx/bus_spi.c
+++ b/drivers/staging/wfx/bus_spi.c
@@ -232,12 +232,11 @@ static int wfx_spi_probe(struct spi_device *func)
 	return wfx_probe(bus->core);
 }
 
-static int wfx_spi_remove(struct spi_device *func)
+static void wfx_spi_remove(struct spi_device *func)
 {
 	struct wfx_spi_priv *bus = spi_get_drvdata(func);
 
 	wfx_release(bus->core);
-	return 0;
 }
 
 /* For dynamic driver binding, kernel does not use OF to match driver. It only
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index 3c92d4e014887..516cff362434d 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -805,7 +805,7 @@ static int max3100_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int max3100_remove(struct spi_device *spi)
+static void max3100_remove(struct spi_device *spi)
 {
 	struct max3100_port *s = spi_get_drvdata(spi);
 	int i;
@@ -828,13 +828,12 @@ static int max3100_remove(struct spi_device *spi)
 	for (i = 0; i < MAX_MAX3100; i++)
 		if (max3100s[i]) {
 			mutex_unlock(&max3100s_lock);
-			return 0;
+			return;
 		}
 	pr_debug("removing max3100 driver\n");
 	uart_unregister_driver(&max3100_uart_driver);
 
 	mutex_unlock(&max3100s_lock);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index dde0824b2fa52..3112b4a054485 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1487,10 +1487,9 @@ static int max310x_spi_probe(struct spi_device *spi)
 	return max310x_probe(&spi->dev, devtype, regmap, spi->irq);
 }
 
-static int max310x_spi_remove(struct spi_device *spi)
+static void max310x_spi_remove(struct spi_device *spi)
 {
 	max310x_remove(&spi->dev);
-	return 0;
 }
 
 static const struct spi_device_id max310x_id_table[] = {
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 64e7e6c8145f8..25d67b8c4db7d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1440,11 +1440,9 @@ static int sc16is7xx_spi_probe(struct spi_device *spi)
 	return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq);
 }
 
-static int sc16is7xx_spi_remove(struct spi_device *spi)
+static void sc16is7xx_spi_remove(struct spi_device *spi)
 {
 	sc16is7xx_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id sc16is7xx_spi_id_table[] = {
diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c
index d2a2b20cc1ad4..7d9bd16190c09 100644
--- a/drivers/usb/gadget/udc/max3420_udc.c
+++ b/drivers/usb/gadget/udc/max3420_udc.c
@@ -1292,7 +1292,7 @@ del_gadget:
 	return err;
 }
 
-static int max3420_remove(struct spi_device *spi)
+static void max3420_remove(struct spi_device *spi)
 {
 	struct max3420_udc *udc = spi_get_drvdata(spi);
 	unsigned long flags;
@@ -1304,8 +1304,6 @@ static int max3420_remove(struct spi_device *spi)
 	kthread_stop(udc->thread_task);
 
 	spin_unlock_irqrestore(&udc->lock, flags);
-
-	return 0;
 }
 
 static const struct of_device_id max3420_udc_of_match[] = {
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
index 30de85a707fef..99a5523a79fb9 100644
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1926,7 +1926,7 @@ error:
 	return retval;
 }
 
-static int
+static void
 max3421_remove(struct spi_device *spi)
 {
 	struct max3421_hcd *max3421_hcd;
@@ -1947,7 +1947,6 @@ max3421_remove(struct spi_device *spi)
 	free_irq(spi->irq, hcd);
 
 	usb_put_hcd(hcd);
-	return 0;
 }
 
 static const struct of_device_id max3421_of_match_table[] = {
diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c
index 8a4361e95a114..522dd81110b8e 100644
--- a/drivers/video/backlight/ams369fg06.c
+++ b/drivers/video/backlight/ams369fg06.c
@@ -506,12 +506,11 @@ static int ams369fg06_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ams369fg06_remove(struct spi_device *spi)
+static void ams369fg06_remove(struct spi_device *spi)
 {
 	struct ams369fg06 *lcd = spi_get_drvdata(spi);
 
 	ams369fg06_power(lcd, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 33f5d80495e6f..0a57033ae31d1 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -542,7 +542,7 @@ static int corgi_lcd_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int corgi_lcd_remove(struct spi_device *spi)
+static void corgi_lcd_remove(struct spi_device *spi)
 {
 	struct corgi_lcd *lcd = spi_get_drvdata(spi);
 
@@ -550,7 +550,6 @@ static int corgi_lcd_remove(struct spi_device *spi)
 	lcd->bl_dev->props.brightness = 0;
 	backlight_update_status(lcd->bl_dev);
 	corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 static struct spi_driver corgi_lcd_driver = {
diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c
index 328aba9cddad1..e7b6bd827986f 100644
--- a/drivers/video/backlight/ili922x.c
+++ b/drivers/video/backlight/ili922x.c
@@ -526,10 +526,9 @@ static int ili922x_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ili922x_remove(struct spi_device *spi)
+static void ili922x_remove(struct spi_device *spi)
 {
 	ili922x_poweroff(spi);
-	return 0;
 }
 
 static struct spi_driver ili922x_driver = {
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index 46f97d1c3d219..cc763cf15f53e 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -223,12 +223,11 @@ static int l4f00242t03_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int l4f00242t03_remove(struct spi_device *spi)
+static void l4f00242t03_remove(struct spi_device *spi)
 {
 	struct l4f00242t03_priv *priv = spi_get_drvdata(spi);
 
 	l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 static void l4f00242t03_shutdown(struct spi_device *spi)
diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c
index f949b66dce1be..5c46df8022bf4 100644
--- a/drivers/video/backlight/lms501kf03.c
+++ b/drivers/video/backlight/lms501kf03.c
@@ -364,12 +364,11 @@ static int lms501kf03_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int lms501kf03_remove(struct spi_device *spi)
+static void lms501kf03_remove(struct spi_device *spi)
 {
 	struct lms501kf03 *lcd = spi_get_drvdata(spi);
 
 	lms501kf03_power(lcd, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 5cbf621e48bd3..b6d373af6e3f1 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -255,12 +255,11 @@ static int ltv350qv_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int ltv350qv_remove(struct spi_device *spi)
+static void ltv350qv_remove(struct spi_device *spi)
 {
 	struct ltv350qv *lcd = spi_get_drvdata(spi);
 
 	ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 0de044dcafd50..fc6fbaf855943 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -397,12 +397,11 @@ static int tdo24m_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int tdo24m_remove(struct spi_device *spi)
+static void tdo24m_remove(struct spi_device *spi)
 {
 	struct tdo24m *lcd = spi_get_drvdata(spi);
 
 	tdo24m_power(lcd, FB_BLANK_POWERDOWN);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index 38765544345b8..23d6c6bf0f543 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -232,15 +232,13 @@ err_register:
 	return ret;
 }
 
-static int tosa_lcd_remove(struct spi_device *spi)
+static void tosa_lcd_remove(struct spi_device *spi)
 {
 	struct tosa_lcd_data *data = spi_get_drvdata(spi);
 
 	i2c_unregister_device(data->i2c);
 
 	tosa_lcd_tg_off(data);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 3567b45f9ba9f..bfc1913e8b55e 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -233,11 +233,9 @@ static int vgg2432a4_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int vgg2432a4_remove(struct spi_device *spi)
+static void vgg2432a4_remove(struct spi_device *spi)
 {
 	ili9320_remove(spi_get_drvdata(spi));
-
-	return 0;
 }
 
 static void vgg2432a4_shutdown(struct spi_device *spi)
diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
index a75ae0c9b14c7..03cff39d392db 100644
--- a/drivers/video/fbdev/omap/lcd_mipid.c
+++ b/drivers/video/fbdev/omap/lcd_mipid.c
@@ -570,14 +570,12 @@ static int mipid_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int mipid_spi_remove(struct spi_device *spi)
+static void mipid_spi_remove(struct spi_device *spi)
 {
 	struct mipid_device *md = dev_get_drvdata(&spi->dev);
 
 	mipid_disable(&md->panel);
 	kfree(md);
-
-	return 0;
 }
 
 static struct spi_driver mipid_spi_driver = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
index 1bec7a4422e80..aab67721263d8 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
@@ -316,7 +316,7 @@ err_gpio:
 	return r;
 }
 
-static int lb035q02_panel_spi_remove(struct spi_device *spi)
+static void lb035q02_panel_spi_remove(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = spi_get_drvdata(spi);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -328,8 +328,6 @@ static int lb035q02_panel_spi_remove(struct spi_device *spi)
 	lb035q02_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id lb035q02_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
index dff9ebbadfc0f..be9910ff6e62d 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
@@ -327,7 +327,7 @@ err_gpio:
 	return r;
 }
 
-static int nec_8048_remove(struct spi_device *spi)
+static void nec_8048_remove(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -341,8 +341,6 @@ static int nec_8048_remove(struct spi_device *spi)
 	nec_8048_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
index 8d8b5ff7d43c8..a909b5385ca5b 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
@@ -857,7 +857,7 @@ err_gpio:
 	return r;
 }
 
-static int acx565akm_remove(struct spi_device *spi)
+static void acx565akm_remove(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -874,8 +874,6 @@ static int acx565akm_remove(struct spi_device *spi)
 	acx565akm_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id acx565akm_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
index 595ebd8bd5dcb..3c0f887d30926 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
@@ -425,7 +425,7 @@ err_reg:
 	return r;
 }
 
-static int td028ttec1_panel_remove(struct spi_device *spi)
+static void td028ttec1_panel_remove(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -439,8 +439,6 @@ static int td028ttec1_panel_remove(struct spi_device *spi)
 	td028ttec1_panel_disconnect(dssdev);
 
 	omap_dss_put_device(in);
-
-	return 0;
 }
 
 static const struct of_device_id td028ttec1_of_match[] = {
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
index afac1d9445aa2..58bbba7c037fa 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
@@ -564,7 +564,7 @@ err_regulator:
 	return r;
 }
 
-static int tpo_td043_remove(struct spi_device *spi)
+static void tpo_td043_remove(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
 	struct omap_dss_device *dssdev = &ddata->dssdev;
@@ -580,8 +580,6 @@ static int tpo_td043_remove(struct spi_device *spi)
 	omap_dss_put_device(in);
 
 	sysfs_remove_group(&spi->dev.kobj, &tpo_td043_attr_group);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7ab3fed7b8043..c84e61b99c7b0 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -280,7 +280,7 @@ struct spi_message;
 struct spi_driver {
 	const struct spi_device_id *id_table;
 	int			(*probe)(struct spi_device *spi);
-	int			(*remove)(struct spi_device *spi);
+	void			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
 	struct device_driver	driver;
 };
diff --git a/sound/pci/hda/cs35l41_hda_spi.c b/sound/pci/hda/cs35l41_hda_spi.c
index 9f8123893cc86..50eb6c0e66588 100644
--- a/sound/pci/hda/cs35l41_hda_spi.c
+++ b/sound/pci/hda/cs35l41_hda_spi.c
@@ -28,11 +28,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi)
 				 devm_regmap_init_spi(spi, &cs35l41_regmap_spi));
 }
 
-static int cs35l41_hda_spi_remove(struct spi_device *spi)
+static void cs35l41_hda_spi_remove(struct spi_device *spi)
 {
 	cs35l41_hda_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id cs35l41_hda_spi_id[] = {
diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c
index 655689c9778ac..7c9242c2ff94b 100644
--- a/sound/soc/codecs/adau1761-spi.c
+++ b/sound/soc/codecs/adau1761-spi.c
@@ -45,10 +45,9 @@ static int adau1761_spi_probe(struct spi_device *spi)
 		id->driver_data, adau1761_spi_switch_mode);
 }
 
-static int adau1761_spi_remove(struct spi_device *spi)
+static void adau1761_spi_remove(struct spi_device *spi)
 {
 	adau17x1_remove(&spi->dev);
-	return 0;
 }
 
 static const struct spi_device_id adau1761_spi_id[] = {
diff --git a/sound/soc/codecs/adau1781-spi.c b/sound/soc/codecs/adau1781-spi.c
index bb5613574786b..1a09633d5a885 100644
--- a/sound/soc/codecs/adau1781-spi.c
+++ b/sound/soc/codecs/adau1781-spi.c
@@ -45,10 +45,9 @@ static int adau1781_spi_probe(struct spi_device *spi)
 		id->driver_data, adau1781_spi_switch_mode);
 }
 
-static int adau1781_spi_remove(struct spi_device *spi)
+static void adau1781_spi_remove(struct spi_device *spi)
 {
 	adau17x1_remove(&spi->dev);
-	return 0;
 }
 
 static const struct spi_device_id adau1781_spi_id[] = {
diff --git a/sound/soc/codecs/cs35l41-spi.c b/sound/soc/codecs/cs35l41-spi.c
index 6dfd5459aa207..169221a5b09f7 100644
--- a/sound/soc/codecs/cs35l41-spi.c
+++ b/sound/soc/codecs/cs35l41-spi.c
@@ -55,13 +55,11 @@ static int cs35l41_spi_probe(struct spi_device *spi)
 	return cs35l41_probe(cs35l41, pdata);
 }
 
-static int cs35l41_spi_remove(struct spi_device *spi)
+static void cs35l41_spi_remove(struct spi_device *spi)
 {
 	struct cs35l41_private *cs35l41 = spi_get_drvdata(spi);
 
 	cs35l41_remove(cs35l41);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/sound/soc/codecs/pcm3168a-spi.c b/sound/soc/codecs/pcm3168a-spi.c
index ecd379f308e6b..b5b08046f5454 100644
--- a/sound/soc/codecs/pcm3168a-spi.c
+++ b/sound/soc/codecs/pcm3168a-spi.c
@@ -26,11 +26,9 @@ static int pcm3168a_spi_probe(struct spi_device *spi)
 	return pcm3168a_probe(&spi->dev, regmap);
 }
 
-static int pcm3168a_spi_remove(struct spi_device *spi)
+static void pcm3168a_spi_remove(struct spi_device *spi)
 {
 	pcm3168a_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id pcm3168a_spi_id[] = {
diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
index 7cf559b47e1c3..4d29e71963800 100644
--- a/sound/soc/codecs/pcm512x-spi.c
+++ b/sound/soc/codecs/pcm512x-spi.c
@@ -26,10 +26,9 @@ static int pcm512x_spi_probe(struct spi_device *spi)
 	return pcm512x_probe(&spi->dev, regmap);
 }
 
-static int pcm512x_spi_remove(struct spi_device *spi)
+static void pcm512x_spi_remove(struct spi_device *spi)
 {
 	pcm512x_remove(&spi->dev);
-	return 0;
 }
 
 static const struct spi_device_id pcm512x_spi_id[] = {
diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c
index a8958cd1c6921..03cce8d6404f8 100644
--- a/sound/soc/codecs/tlv320aic32x4-spi.c
+++ b/sound/soc/codecs/tlv320aic32x4-spi.c
@@ -46,11 +46,9 @@ static int aic32x4_spi_probe(struct spi_device *spi)
 	return aic32x4_probe(&spi->dev, regmap);
 }
 
-static int aic32x4_spi_remove(struct spi_device *spi)
+static void aic32x4_spi_remove(struct spi_device *spi)
 {
 	aic32x4_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id aic32x4_spi_id[] = {
diff --git a/sound/soc/codecs/tlv320aic3x-spi.c b/sound/soc/codecs/tlv320aic3x-spi.c
index 494e844022321..deed6ec7e0816 100644
--- a/sound/soc/codecs/tlv320aic3x-spi.c
+++ b/sound/soc/codecs/tlv320aic3x-spi.c
@@ -35,11 +35,9 @@ static int aic3x_spi_probe(struct spi_device *spi)
 	return aic3x_probe(&spi->dev, regmap, id->driver_data);
 }
 
-static int aic3x_spi_remove(struct spi_device *spi)
+static void aic3x_spi_remove(struct spi_device *spi)
 {
 	aic3x_remove(&spi->dev);
-
-	return 0;
 }
 
 static const struct spi_device_id aic3x_spi_id[] = {
diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c
index 28b4656c4e149..1bef1c500c8e3 100644
--- a/sound/soc/codecs/wm0010.c
+++ b/sound/soc/codecs/wm0010.c
@@ -969,7 +969,7 @@ static int wm0010_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int wm0010_spi_remove(struct spi_device *spi)
+static void wm0010_spi_remove(struct spi_device *spi)
 {
 	struct wm0010_priv *wm0010 = spi_get_drvdata(spi);
 
@@ -980,8 +980,6 @@ static int wm0010_spi_remove(struct spi_device *spi)
 
 	if (wm0010->irq)
 		free_irq(wm0010->irq, wm0010);
-
-	return 0;
 }
 
 static struct spi_driver wm0010_spi_driver = {
diff --git a/sound/soc/codecs/wm8804-spi.c b/sound/soc/codecs/wm8804-spi.c
index 9a8da1511c34b..628568724c200 100644
--- a/sound/soc/codecs/wm8804-spi.c
+++ b/sound/soc/codecs/wm8804-spi.c
@@ -24,10 +24,9 @@ static int wm8804_spi_probe(struct spi_device *spi)
 	return wm8804_probe(&spi->dev, regmap);
 }
 
-static int wm8804_spi_remove(struct spi_device *spi)
+static void wm8804_spi_remove(struct spi_device *spi)
 {
 	wm8804_remove(&spi->dev);
-	return 0;
 }
 
 static const struct of_device_id wm8804_of_match[] = {
diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c
index 76c0e37a838cf..56d2c712e2571 100644
--- a/sound/spi/at73c213.c
+++ b/sound/spi/at73c213.c
@@ -1001,7 +1001,7 @@ out:
 	return retval;
 }
 
-static int snd_at73c213_remove(struct spi_device *spi)
+static void snd_at73c213_remove(struct spi_device *spi)
 {
 	struct snd_card *card = dev_get_drvdata(&spi->dev);
 	struct snd_at73c213 *chip = card->private_data;
@@ -1066,8 +1066,6 @@ out:
 
 	ssc_free(chip->ssc);
 	snd_card_free(card);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:02:59 +0000
Subject: [PATCH 0322/1586] genirq: Allow the PM device to originate from irq
 domain

As a preparation to moving the reference to the device used for
runtime power management, add a new 'dev' field to the irqdomain
structure for that exact purpose.

The irq_chip_pm_{get,put}() helpers are made aware of the dual
location via a new private helper.

No functional change intended.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Tony Lindgren <tony@atomide.com>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org
---
 include/linux/irqdomain.h | 10 ++++++++++
 kernel/irq/chip.c         | 23 ++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d476405802e97..be25a33293e57 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,6 +151,8 @@ struct irq_domain_chip_generic;
  * @gc: Pointer to a list of generic chips. There is a helper function for
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
+ * @dev: Pointer to a device that the domain represent, and that will be
+ *       used for power management purposes.
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
  *
  * Revmap data, used internally by irq_domain
@@ -171,6 +173,7 @@ struct irq_domain {
 	struct fwnode_handle *fwnode;
 	enum irq_domain_bus_token bus_token;
 	struct irq_domain_chip_generic *gc;
+	struct device *dev;
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_domain *parent;
 #endif
@@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 	return to_of_node(d->fwnode);
 }
 
+static inline void irq_domain_set_pm_device(struct irq_domain *d,
+					    struct device *dev)
+{
+	if (d)
+		d->dev = dev;
+}
+
 #ifdef CONFIG_IRQ_DOMAIN
 struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
 						const char *name, phys_addr_t *pa);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c093246630882..a2a12cdbe8725 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1558,6 +1558,17 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	return 0;
 }
 
+static struct device *irq_get_parent_device(struct irq_data *data)
+{
+	if (data->chip->parent_device)
+		return data->chip->parent_device;
+
+	if (data->domain)
+		return data->domain->dev;
+
+	return NULL;
+}
+
 /**
  * irq_chip_pm_get - Enable power for an IRQ chip
  * @data:	Pointer to interrupt specific data
@@ -1567,12 +1578,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
  */
 int irq_chip_pm_get(struct irq_data *data)
 {
+	struct device *dev = irq_get_parent_device(data);
 	int retval;
 
-	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
-		retval = pm_runtime_get_sync(data->chip->parent_device);
+	if (IS_ENABLED(CONFIG_PM) && dev) {
+		retval = pm_runtime_get_sync(dev);
 		if (retval < 0) {
-			pm_runtime_put_noidle(data->chip->parent_device);
+			pm_runtime_put_noidle(dev);
 			return retval;
 		}
 	}
@@ -1590,10 +1602,11 @@ int irq_chip_pm_get(struct irq_data *data)
  */
 int irq_chip_pm_put(struct irq_data *data)
 {
+	struct device *dev = irq_get_parent_device(data);
 	int retval = 0;
 
-	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
-		retval = pm_runtime_put(data->chip->parent_device);
+	if (IS_ENABLED(CONFIG_PM) && dev)
+		retval = pm_runtime_put(dev);
 
 	return (retval < 0) ? retval : 0;
 }
-- 
GitLab


From e95f3efdeb499accf2b05333a1eac7862f5a10f6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:00 +0000
Subject: [PATCH 0323/1586] irqchip/gic: Move PM device over to irq domain

Move the reference to the GIC device over to the irq domain.
This allows for some localised cleanup.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-3-maz@kernel.org
---
 drivers/irqchip/irq-gic.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b8bb46c65a97a..fb741b42ca2d2 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1127,13 +1127,12 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 	.unmap = gic_irq_domain_unmap,
 };
 
-static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
-			  const char *name, bool use_eoimode1)
+static void gic_init_chip(struct gic_chip_data *gic, const char *name,
+			  bool use_eoimode1)
 {
 	/* Initialize irq_chip */
 	gic->chip = gic_chip;
 	gic->chip.name = name;
-	gic->chip.parent_device = dev;
 
 	if (use_eoimode1) {
 		gic->chip.irq_mask = gic_eoimode1_mask_irq;
@@ -1268,10 +1267,10 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
 
 	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
 		name = kasprintf(GFP_KERNEL, "GICv2");
-		gic_init_chip(gic, NULL, name, true);
+		gic_init_chip(gic, name, true);
 	} else {
 		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
-		gic_init_chip(gic, NULL, name, false);
+		gic_init_chip(gic, name, false);
 	}
 
 	ret = gic_init_bases(gic, handle);
@@ -1460,7 +1459,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 	if (!*gic)
 		return -ENOMEM;
 
-	gic_init_chip(*gic, dev, dev->of_node->name, false);
+	gic_init_chip(*gic, dev->of_node->name, false);
 
 	ret = gic_of_setup(*gic, dev->of_node);
 	if (ret)
@@ -1472,6 +1471,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 		return ret;
 	}
 
+	irq_domain_set_pm_device((*gic)->domain, dev);
 	irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
 
 	return 0;
-- 
GitLab


From c2ea6b9b03c1acde89c6100aff894e64386e72cc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:01 +0000
Subject: [PATCH 0324/1586] irqchip/renesas-intc-irqpin: Move PM device over to
 irq domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-4-maz@kernel.org
---
 drivers/irqchip/irq-renesas-intc-irqpin.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 37f9a4499fdb3..e83756aca14e0 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -508,7 +508,6 @@ static int intc_irqpin_probe(struct platform_device *pdev)
 
 	irq_chip = &p->irq_chip;
 	irq_chip->name = "intc-irqpin";
-	irq_chip->parent_device = dev;
 	irq_chip->irq_mask = disable_fn;
 	irq_chip->irq_unmask = enable_fn;
 	irq_chip->irq_set_type = intc_irqpin_irq_set_type;
@@ -523,6 +522,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
+	irq_domain_set_pm_device(p->irq_domain, dev);
+
 	if (p->shared_irqs) {
 		/* request one shared interrupt */
 		if (devm_request_irq(dev, p->irq[0].requested_irq,
-- 
GitLab


From c3ec838e3a390e62cb71a11041db43d0b3b42c99 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:02 +0000
Subject: [PATCH 0325/1586] irqchip/renesas-irqc: Move PM device over to irq
 domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-5-maz@kernel.org
---
 drivers/irqchip/irq-renesas-irqc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 909325f88239d..1ee5e9941f671 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -188,13 +188,14 @@ static int irqc_probe(struct platform_device *pdev)
 	p->gc->reg_base = p->cpu_int_base;
 	p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
 	p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
-	p->gc->chip_types[0].chip.parent_device = dev;
 	p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
 	p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
 	p->gc->chip_types[0].chip.irq_set_type	= irqc_irq_set_type;
 	p->gc->chip_types[0].chip.irq_set_wake	= irqc_irq_set_wake;
 	p->gc->chip_types[0].chip.flags	= IRQCHIP_MASK_ON_SUSPEND;
 
+	irq_domain_set_pm_device(p->irq_domain, dev);
+
 	/* request interrupts one by one */
 	for (k = 0; k < p->number_of_irqs; k++) {
 		if (devm_request_irq(dev, p->irq[k].requested_irq,
-- 
GitLab


From fb140b9c0fe1109ac2269443216d114751a030c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:03 +0000
Subject: [PATCH 0326/1586] irqchip/imx-intmux: Move PM device over to irq
 domain

Move the reference to the device over to the irq domain.
This allows the irq_chip structure to be directly used instead
of taking a copy for each instance.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-6-maz@kernel.org
---
 drivers/irqchip/irq-imx-intmux.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index e86ff743e98c3..80aaea82468aa 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -61,7 +61,6 @@
 #define CHAN_MAX_NUM		0x8
 
 struct intmux_irqchip_data {
-	struct irq_chip		chip;
 	u32			saved_reg;
 	int			chanidx;
 	int			irq;
@@ -114,7 +113,7 @@ static void imx_intmux_irq_unmask(struct irq_data *d)
 	raw_spin_unlock_irqrestore(&data->lock, flags);
 }
 
-static struct irq_chip imx_intmux_irq_chip = {
+static struct irq_chip imx_intmux_irq_chip __ro_after_init = {
 	.name		= "intmux",
 	.irq_mask	= imx_intmux_irq_mask,
 	.irq_unmask	= imx_intmux_irq_unmask,
@@ -126,7 +125,7 @@ static int imx_intmux_irq_map(struct irq_domain *h, unsigned int irq,
 	struct intmux_irqchip_data *data = h->host_data;
 
 	irq_set_chip_data(irq, data);
-	irq_set_chip_and_handler(irq, &data->chip, handle_level_irq);
+	irq_set_chip_and_handler(irq, &imx_intmux_irq_chip, handle_level_irq);
 
 	return 0;
 }
@@ -241,8 +240,6 @@ static int imx_intmux_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < channum; i++) {
-		data->irqchip_data[i].chip = imx_intmux_irq_chip;
-		data->irqchip_data[i].chip.parent_device = &pdev->dev;
 		data->irqchip_data[i].chanidx = i;
 
 		data->irqchip_data[i].irq = irq_of_parse_and_map(np, i);
@@ -260,6 +257,7 @@ static int imx_intmux_probe(struct platform_device *pdev)
 			goto out;
 		}
 		data->irqchip_data[i].domain = domain;
+		irq_domain_set_pm_device(domain, &pdev->dev);
 
 		/* disable all interrupt sources of this channel firstly */
 		writel_relaxed(0, data->regs + CHANIER(i));
-- 
GitLab


From 4b9558f92036c968119e1de383f604c19b3ca99b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:04 +0000
Subject: [PATCH 0327/1586] gpio: mt7621: Kill parent_device usage

This gpio controller sets the parent_device field, but doesn't have
any runtime PM functionality. Get rid of it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-7-maz@kernel.org
---
 drivers/gpio/gpio-mt7621.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
index ccaad1cb3c2e9..d8a26e503ca5d 100644
--- a/drivers/gpio/gpio-mt7621.c
+++ b/drivers/gpio/gpio-mt7621.c
@@ -239,7 +239,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank)
 
 	rg->chip.offset = bank * MTK_BANK_WIDTH;
 	rg->irq_chip.name = dev_name(dev);
-	rg->irq_chip.parent_device = dev;
 	rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask;
 	rg->irq_chip.irq_mask = mediatek_gpio_irq_mask;
 	rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask;
-- 
GitLab


From 989c78f25ade0af66426b935f1113d4b0fe390c5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:05 +0000
Subject: [PATCH 0328/1586] gpio: omap: Move PM device over to irq domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Tested-by: Tony Lindgren <tony@atomide.com>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-8-maz@kernel.org
---
 drivers/gpio/gpio-omap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index e099c39e0355d..80ddc43fd875b 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -986,7 +986,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
 		writel_relaxed(0, base + bank->regs->ctrl);
 }
 
-static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
+static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc,
+			       struct device *pm_dev)
 {
 	struct gpio_irq_chip *irq;
 	static int gpio;
@@ -1052,6 +1053,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
 	if (ret)
 		return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n");
 
+	irq_domain_set_pm_device(bank->chip.irq.domain, pm_dev);
 	ret = devm_request_irq(bank->chip.parent, bank->irq,
 			       omap_gpio_irq_handler,
 			       0, dev_name(bank->chip.parent), bank);
@@ -1402,7 +1404,6 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
 	irqc->name = dev_name(&pdev->dev);
 	irqc->flags = IRQCHIP_MASK_ON_SUSPEND;
-	irqc->parent_device = dev;
 
 	bank->irq = platform_get_irq(pdev, 0);
 	if (bank->irq <= 0) {
@@ -1466,7 +1467,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
 
 	omap_gpio_mod_init(bank);
 
-	ret = omap_gpio_chip_init(bank, irqc);
+	ret = omap_gpio_chip_init(bank, irqc, dev);
 	if (ret) {
 		pm_runtime_put_sync(dev);
 		pm_runtime_disable(dev);
-- 
GitLab


From 373d664b7d3babe1743f64746bc3c553ac23a1bd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:06 +0000
Subject: [PATCH 0329/1586] gpio: rcar: Move PM device over to irq domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-9-maz@kernel.org
---
 drivers/gpio/gpio-rcar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index bd2e16d6e21c4..3a76538f27fad 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -530,7 +530,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 
 	irq_chip = &p->irq_chip;
 	irq_chip->name = "gpio-rcar";
-	irq_chip->parent_device = dev;
 	irq_chip->irq_mask = gpio_rcar_irq_disable;
 	irq_chip->irq_unmask = gpio_rcar_irq_enable;
 	irq_chip->irq_set_type = gpio_rcar_irq_set_type;
@@ -552,6 +551,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
+	irq_domain_set_pm_device(gpio_chip->irq.domain, dev);
 	ret = devm_request_irq(dev, p->irq_parent, gpio_rcar_irq_handler,
 			       IRQF_SHARED, name, p);
 	if (ret) {
-- 
GitLab


From 924610607f191bee4379bc3775b0fd025ad7e922 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:07 +0000
Subject: [PATCH 0330/1586] gpio: tpmx86: Move PM device over to irq domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-10-maz@kernel.org
---
 drivers/gpio/gpio-tqmx86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 5b103221b58dd..fa4bc7481f9a6 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -281,7 +281,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		u8 irq_status;
 
 		irq_chip->name = chip->label;
-		irq_chip->parent_device = &pdev->dev;
 		irq_chip->irq_mask = tqmx86_gpio_irq_mask;
 		irq_chip->irq_unmask = tqmx86_gpio_irq_unmask;
 		irq_chip->irq_set_type = tqmx86_gpio_irq_set_type;
@@ -316,6 +315,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		goto out_pm_dis;
 	}
 
+	irq_domain_set_pm_device(girq->domain, dev);
+
 	dev_info(dev, "GPIO functionality initialized with %d pins\n",
 		 chip->ngpio);
 
-- 
GitLab


From d335092933079e0a48c61ea5791906d040105a4d Mon Sep 17 00:00:00 2001
From: Alexandre Torgue <alexandre.torgue@foss.st.com>
Date: Wed, 2 Feb 2022 15:00:03 +0100
Subject: [PATCH 0331/1586] dt-bindings: interrupt-controller: stm32-exti:
 document st,stm32mp13-exti

Support of STM32MP13 SoC implies to create a new compatible in order to
manage EXTI/GIC mapping changes.

Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220202140005.860-2-alexandre.torgue@foss.st.com
---
 .../devicetree/bindings/interrupt-controller/st,stm32-exti.yaml  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
index d19c881b4abc2..e44daa09b137c 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
@@ -20,6 +20,7 @@ properties:
       - items:
           - enum:
               - st,stm32mp1-exti
+              - st,stm32mp13-exti
           - const: syscon
 
   "#interrupt-cells":
-- 
GitLab


From 04133bb1e710bc3d5532694999fbb3d0f1421724 Mon Sep 17 00:00:00 2001
From: Alexandre Torgue <alexandre.torgue@foss.st.com>
Date: Wed, 2 Feb 2022 15:00:04 +0100
Subject: [PATCH 0332/1586] irqchip/stm32-exti: Add STM32MP13 support

Enhance stm32-exti driver to support STM32MP13 SoC. This SoC uses the same
hardware version than STM32MP15. Only EXTI line mapping is changed and
following EXTI lines are supported: GPIO, RTC, I2C[1-5], UxART[1-8],
USBH_EHCI, USBH_OHCI, USB_OTG, LPTIM[1-5], ETH[1-2].

Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220202140005.860-3-alexandre.torgue@foss.st.com
---
 drivers/irqchip/irq-stm32-exti.c | 50 ++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b7cb2da718880..9d18f47040eb7 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -214,6 +214,48 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
 	{ .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip },
 };
 
+static const struct stm32_desc_irq stm32mp13_desc_irq[] = {
+	{ .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip },
+	{ .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip },
+	{ .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip },
+	{ .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip },
+	{ .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip },
+	{ .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip },
+	{ .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip },
+	{ .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip },
+	{ .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip },
+	{ .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip },
+	{ .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip },
+	{ .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip },
+	{ .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip },
+	{ .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip },
+	{ .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip },
+	{ .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip },
+	{ .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip },
+	{ .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct },
+	{ .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct },
+};
+
 static const struct stm32_exti_drv_data stm32mp1_drv_data = {
 	.exti_banks = stm32mp1_exti_banks,
 	.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
@@ -221,6 +263,13 @@ static const struct stm32_exti_drv_data stm32mp1_drv_data = {
 	.irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
 };
 
+static const struct stm32_exti_drv_data stm32mp13_drv_data = {
+	.exti_banks = stm32mp1_exti_banks,
+	.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
+	.desc_irqs = stm32mp13_desc_irq,
+	.irq_nr = ARRAY_SIZE(stm32mp13_desc_irq),
+};
+
 static const struct
 stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data,
 				    irq_hw_number_t hwirq)
@@ -922,6 +971,7 @@ static int stm32_exti_probe(struct platform_device *pdev)
 /* platform driver only for MP1 */
 static const struct of_device_id stm32_exti_ids[] = {
 	{ .compatible = "st,stm32mp1-exti", .data = &stm32mp1_drv_data},
+	{ .compatible = "st,stm32mp13-exti", .data = &stm32mp13_drv_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, stm32_exti_ids);
-- 
GitLab


From 8e7c8ca6b988904d4c32c4053b325739738c8f36 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 20 Sep 2021 10:27:16 -0700
Subject: [PATCH 0333/1586] test_overflow: Regularize test reporting output

Report test run summaries more regularly, so it's easier to understand
the output:
- Remove noisy "ok" reports for shift and allocator tests.
- Reorganize per-type output to the end of each type's tests.
- Replace redundant vmalloc tests with __vmalloc so that __GFP_NO_WARN
  can be used to keep the expected failure warnings out of dmesg,
  similar to commit 8e060c21ae2c ("lib/test_overflow.c: avoid tainting
  the kernel and fix wrap size")

Resulting output:

  test_overflow: 18 u8 arithmetic tests finished
  test_overflow: 19 s8 arithmetic tests finished
  test_overflow: 17 u16 arithmetic tests finished
  test_overflow: 17 s16 arithmetic tests finished
  test_overflow: 17 u32 arithmetic tests finished
  test_overflow: 17 s32 arithmetic tests finished
  test_overflow: 17 u64 arithmetic tests finished
  test_overflow: 21 s64 arithmetic tests finished
  test_overflow: 113 shift tests finished
  test_overflow: 17 overflow size helper tests finished
  test_overflow: 11 allocation overflow tests finished
  test_overflow: all tests passed

Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/all/eb6d02ae-e2ed-e7bd-c700-8a6d004d84ce@rasmusvillemoes.dk/
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/all/CAKwvOdnYYa+72VhtJ4ug=SJVFn7w+n7Th+hKYE87BRDt4hvqOg@mail.gmail.com/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/test_overflow.c | 54 +++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/lib/test_overflow.c b/lib/test_overflow.c
index 7a4b6f6c5473c..cea37ae826153 100644
--- a/lib/test_overflow.c
+++ b/lib/test_overflow.c
@@ -252,10 +252,10 @@ static int __init test_ ## t ## _overflow(void) {			\
 	int err = 0;							\
 	unsigned i;							\
 									\
-	pr_info("%-3s: %zu arithmetic tests\n", #t,			\
-		ARRAY_SIZE(t ## _tests));				\
 	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
 		err |= do_test_ ## t(&t ## _tests[i]);			\
+	pr_info("%zu %s arithmetic tests finished\n",			\
+		ARRAY_SIZE(t ## _tests), #t);				\
 	return err;							\
 }
 
@@ -291,6 +291,7 @@ static int __init test_overflow_calculation(void)
 static int __init test_overflow_shift(void)
 {
 	int err = 0;
+	int count = 0;
 
 /* Args are: value, shift, type, expected result, overflow expected */
 #define TEST_ONE_SHIFT(a, s, t, expect, of) ({				\
@@ -313,9 +314,7 @@ static int __init test_overflow_shift(void)
 			pr_warn("got %llu\n", (u64)__d);		\
 		__failed = 1;						\
 	}								\
-	if (!__failed)							\
-		pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s,	\
-			of ? "overflow" : #expect);			\
+	count++;							\
 	__failed;							\
 })
 
@@ -479,6 +478,10 @@ static int __init test_overflow_shift(void)
 	err |= TEST_ONE_SHIFT(0, 31, s32, 0, false);
 	err |= TEST_ONE_SHIFT(0, 63, s64, 0, false);
 
+	pr_info("%d shift tests finished\n", count);
+
+#undef TEST_ONE_SHIFT
+
 	return err;
 }
 
@@ -530,7 +533,6 @@ static int __init test_ ## func (void *arg)				\
 		free ## want_arg (free_func, arg, ptr);			\
 		return 1;						\
 	}								\
-	pr_info(#func " detected saturation\n");			\
 	return 0;							\
 }
 
@@ -544,10 +546,7 @@ DEFINE_TEST_ALLOC(kmalloc,	 kfree,	     0, 1, 0);
 DEFINE_TEST_ALLOC(kmalloc_node,	 kfree,	     0, 1, 1);
 DEFINE_TEST_ALLOC(kzalloc,	 kfree,	     0, 1, 0);
 DEFINE_TEST_ALLOC(kzalloc_node,  kfree,	     0, 1, 1);
-DEFINE_TEST_ALLOC(vmalloc,	 vfree,	     0, 0, 0);
-DEFINE_TEST_ALLOC(vmalloc_node,  vfree,	     0, 0, 1);
-DEFINE_TEST_ALLOC(vzalloc,	 vfree,	     0, 0, 0);
-DEFINE_TEST_ALLOC(vzalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(__vmalloc,	 vfree,	     0, 1, 0);
 DEFINE_TEST_ALLOC(kvmalloc,	 kvfree,     0, 1, 0);
 DEFINE_TEST_ALLOC(kvmalloc_node, kvfree,     0, 1, 1);
 DEFINE_TEST_ALLOC(kvzalloc,	 kvfree,     0, 1, 0);
@@ -559,8 +558,14 @@ static int __init test_overflow_allocation(void)
 {
 	const char device_name[] = "overflow-test";
 	struct device *dev;
+	int count = 0;
 	int err = 0;
 
+#define check_allocation_overflow(alloc)	({	\
+	count++;					\
+	test_ ## alloc(dev);				\
+})
+
 	/* Create dummy device for devm_kmalloc()-family tests. */
 	dev = root_device_register(device_name);
 	if (IS_ERR(dev)) {
@@ -568,23 +573,24 @@ static int __init test_overflow_allocation(void)
 		return 1;
 	}
 
-	err |= test_kmalloc(NULL);
-	err |= test_kmalloc_node(NULL);
-	err |= test_kzalloc(NULL);
-	err |= test_kzalloc_node(NULL);
-	err |= test_kvmalloc(NULL);
-	err |= test_kvmalloc_node(NULL);
-	err |= test_kvzalloc(NULL);
-	err |= test_kvzalloc_node(NULL);
-	err |= test_vmalloc(NULL);
-	err |= test_vmalloc_node(NULL);
-	err |= test_vzalloc(NULL);
-	err |= test_vzalloc_node(NULL);
-	err |= test_devm_kmalloc(dev);
-	err |= test_devm_kzalloc(dev);
+	err |= check_allocation_overflow(kmalloc);
+	err |= check_allocation_overflow(kmalloc_node);
+	err |= check_allocation_overflow(kzalloc);
+	err |= check_allocation_overflow(kzalloc_node);
+	err |= check_allocation_overflow(__vmalloc);
+	err |= check_allocation_overflow(kvmalloc);
+	err |= check_allocation_overflow(kvmalloc_node);
+	err |= check_allocation_overflow(kvzalloc);
+	err |= check_allocation_overflow(kvzalloc_node);
+	err |= check_allocation_overflow(devm_kmalloc);
+	err |= check_allocation_overflow(devm_kzalloc);
 
 	device_unregister(dev);
 
+	pr_info("%d allocation overflow tests finished\n", count);
+
+#undef check_allocation_overflow
+
 	return err;
 }
 
-- 
GitLab


From c9edbe1eb98248c290d93aa2ffdc30cab5e2e62c Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Wed, 29 Dec 2021 20:44:19 +0900
Subject: [PATCH 0334/1586] docs: sphinx/kfigure.py: Use rsvg-convert(1) for
 DOT -> PDF conversion

On openSUSE, dot(1) command does not support direct PDF output.
On other distros, generated PDF images have unnecessarily wide margins,
especially for small graphs.

By using dot(1) for DOT -> SVG, then rsvg-convert(1) for SVG -> PDF,
more optimal PDF images can be obtained, with the bonus of improved
portability across various distros.

Add rules in kfigure.py so that the above mentioned route is taken
when rsvg-convert(1) is available.

Note that rsvg-convert(1) is recommended by sphinx_pre_install.
So it is most likely that existing systems for building pdfdocs have
rsvg-convert(1) installed.

Note:
    SVG features supported by rsvg-convert(1) vary depending on its
    version and distro config.
    For example, the one found on Ubuntu Bionic (version 2.40.20) does
    poor job in rendering some of SVG files drawn by Inkscape.
    SVG files generated by dot(1) are converted nicely even with such
    old versions of rsvg-convert.

    So this change does not affect the quality of such figures in any
    way.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Link: https://lore.kernel.org/r/15b56dd3-081a-2469-c3a4-dfc1ca4c6c2d@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kfigure.py | 46 +++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index 3c78828330be0..955e3ec5de5a6 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -31,6 +31,8 @@ u"""
 
     * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
       available, the DOT language is inserted as literal-block.
+      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
+      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
 
     * SVG to PDF: To generate PDF, you need at least one of this tools:
 
@@ -113,6 +115,9 @@ dot_cmd = None
 # ImageMagick' convert(1) support
 convert_cmd = None
 
+# librsvg's rsvg-convert(1) support
+rsvg_convert_cmd = None
+
 
 def setup(app):
     # check toolchain first
@@ -160,11 +165,12 @@ def setupTools(app):
 
     This function is called once, when the builder is initiated.
     """
-    global dot_cmd, convert_cmd   # pylint: disable=W0603
+    global dot_cmd, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
     kernellog.verbose(app, "kfigure: check installed tools ...")
 
     dot_cmd = which('dot')
     convert_cmd = which('convert')
+    rsvg_convert_cmd = which('rsvg-convert')
 
     if dot_cmd:
         kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
@@ -177,6 +183,11 @@ def setupTools(app):
         kernellog.warn(app,
             "convert(1) not found, for SVG to PDF conversion install "
             "ImageMagick (https://www.imagemagick.org)")
+    if rsvg_convert_cmd:
+        kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
+    else:
+        kernellog.verbose(app, "rsvg-convert(1) not found, "
+                          "falling back to raster image conversion")
 
 
 # integrate conversion tools
@@ -266,7 +277,13 @@ def convert_image(img_node, translator, src_fname=None):
 
             if in_ext == '.dot':
                 kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
-                ok = dot2format(app, src_fname, dst_fname)
+                if translator.builder.format == 'latex':
+                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
+                    ok1 = dot2format(app, src_fname, svg_fname)
+                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
+                    ok = ok1 and ok2
+                else:
+                    ok = dot2format(app, src_fname, dst_fname)
 
             elif in_ext == '.svg':
                 kernellog.verbose(app, 'convert SVG to: {out}/' + _name)
@@ -319,6 +336,31 @@ def svg2pdf(app, svg_fname, pdf_fname):
         kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
     return bool(exit_code == 0)
 
+def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
+    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
+
+    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
+    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
+
+    Input SVG file should be the one generated by ``dot2format()``.
+    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
+
+    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
+
+    """
+
+    if rsvg_convert_cmd is None:
+        ok = svg2pdf(app, svg_fname, pdf_fname)
+    else:
+        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
+        # use stdout and stderr from parent
+        exit_code = subprocess.call(cmd)
+        if exit_code != 0:
+            kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+        ok = bool(exit_code == 0)
+
+    return ok
+
 
 # image handling
 # ---------------------
-- 
GitLab


From ecf5fb58cdcd93b9cf555b95da4ef73b1297de4c Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Wed, 29 Dec 2021 20:45:29 +0900
Subject: [PATCH 0335/1586] docs: sphinx/kfigure.py: Add check of 'dot -Tpdf'

To prevent any regression on existing build systems, limit the
fallback of converting DOT -> raster PDF only when both of the
following conditions are met.

 o dot(1) doesn't support -Tpdf
 o rsvg-convert(1) is not found

While we are here, add kernellog.verbose messages related to
rsvg-convert, 'dot -Tpdf', and 'dot -Tsvg' commands.

Suggested-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/e76f61e1-7366-ba00-b119-8ea6a2499861@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kfigure.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index 955e3ec5de5a6..77b0d15dba317 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -51,6 +51,7 @@ import os
 from os import path
 import subprocess
 from hashlib import sha1
+import re
 from docutils import nodes
 from docutils.statemachine import ViewList
 from docutils.parsers.rst import directives
@@ -111,6 +112,8 @@ def pass_handle(self, node):           # pylint: disable=W0613
 
 # Graphviz's dot(1) support
 dot_cmd = None
+# dot(1) -Tpdf should be used
+dot_Tpdf = False
 
 # ImageMagick' convert(1) support
 convert_cmd = None
@@ -165,7 +168,7 @@ def setupTools(app):
 
     This function is called once, when the builder is initiated.
     """
-    global dot_cmd, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
+    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
     kernellog.verbose(app, "kfigure: check installed tools ...")
 
     dot_cmd = which('dot')
@@ -174,6 +177,16 @@ def setupTools(app):
 
     if dot_cmd:
         kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
+
+        try:
+            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
+                                    stderr=subprocess.STDOUT)
+        except subprocess.CalledProcessError as err:
+            dot_Thelp_list = err.output
+            pass
+
+        dot_Tpdf_ptn = b'pdf'
+        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
     else:
         kernellog.warn(app, "dot(1) not found, for better output quality install "
                        "graphviz from https://www.graphviz.org")
@@ -185,9 +198,17 @@ def setupTools(app):
             "ImageMagick (https://www.imagemagick.org)")
     if rsvg_convert_cmd:
         kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
+        kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
+        dot_Tpdf = False
     else:
-        kernellog.verbose(app, "rsvg-convert(1) not found, "
-                          "falling back to raster image conversion")
+        kernellog.verbose(app,
+            "rsvg-convert(1) not found.\n"
+            "  SVG -> PDF conversion by convert() can be poor quality.\n"
+            "  Install librsvg (https://gitlab.gnome.org/GNOME/librsvg)")
+        if dot_Tpdf:
+            kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
+        else:
+            kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
 
 
 # integrate conversion tools
@@ -277,11 +298,12 @@ def convert_image(img_node, translator, src_fname=None):
 
             if in_ext == '.dot':
                 kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
-                if translator.builder.format == 'latex':
+                if translator.builder.format == 'latex' and not dot_Tpdf:
                     svg_fname = path.join(translator.builder.outdir, fname + '.svg')
                     ok1 = dot2format(app, src_fname, svg_fname)
                     ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
                     ok = ok1 and ok2
+
                 else:
                     ok = dot2format(app, src_fname, dst_fname)
 
-- 
GitLab


From 8ccd05697a9d2f837f77a858e81ba13cdb50adac Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Wed, 29 Dec 2021 20:46:58 +0900
Subject: [PATCH 0336/1586] docs: sphinx/kfigure.py: Use inkscape(1) for SVG ->
 PDF conversion

Using convert(1) of ImageMagick for SVG -> PDF conversion results in
PDFs containing raster (bitmap) images which sometimes look blurry.

Ideally speaking, SVG to PDF conversion should retain vector graphics
in SVG.

rsvg-convert(1) can do such conversions with regard to SVG files
generated by dot(1).

Unfortunately, rsvg-convert(1) does not cover some of SVG features
specific to Inkscape.
inkscape(1) of Inkscape naturally covers such SVG features.

So add a route in svg2pdf() so that inkscape(1) is used when it is
available.

Note:
    After this change, if you have Inkscape installed, ImageMagick nor
    librsvg are not required.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Link: https://lore.kernel.org/r/3eea2a8d-c52d-ee07-cf7b-83784c6f6e4b@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kfigure.py | 68 +++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index 77b0d15dba317..e616e49669eb1 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -37,6 +37,7 @@ u"""
     * SVG to PDF: To generate PDF, you need at least one of this tools:
 
       - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
+      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
 
     List of customizations:
 
@@ -121,6 +122,11 @@ convert_cmd = None
 # librsvg's rsvg-convert(1) support
 rsvg_convert_cmd = None
 
+# Inkscape's inkscape(1) support
+inkscape_cmd = None
+# Inkscape prior to 1.0 uses different command options
+inkscape_ver_one = False
+
 
 def setup(app):
     # check toolchain first
@@ -169,11 +175,13 @@ def setupTools(app):
     This function is called once, when the builder is initiated.
     """
     global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
+    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
     kernellog.verbose(app, "kfigure: check installed tools ...")
 
     dot_cmd = which('dot')
     convert_cmd = which('convert')
     rsvg_convert_cmd = which('rsvg-convert')
+    inkscape_cmd = which('inkscape')
 
     if dot_cmd:
         kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
@@ -190,25 +198,37 @@ def setupTools(app):
     else:
         kernellog.warn(app, "dot(1) not found, for better output quality install "
                        "graphviz from https://www.graphviz.org")
-    if convert_cmd:
-        kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
-    else:
-        kernellog.warn(app,
-            "convert(1) not found, for SVG to PDF conversion install "
-            "ImageMagick (https://www.imagemagick.org)")
-    if rsvg_convert_cmd:
-        kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
-        kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
+    if inkscape_cmd:
+        kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
+        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'])
+        ver_one_ptn = b'Inkscape 1'
+        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
+        convert_cmd = None
+        rsvg_convert_cmd = None
         dot_Tpdf = False
+
     else:
-        kernellog.verbose(app,
-            "rsvg-convert(1) not found.\n"
-            "  SVG -> PDF conversion by convert() can be poor quality.\n"
-            "  Install librsvg (https://gitlab.gnome.org/GNOME/librsvg)")
-        if dot_Tpdf:
-            kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
+        if convert_cmd:
+            kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
+        else:
+            kernellog.warn(app,
+                "Neither inkscape(1) nor convert(1) found.\n"
+                "For SVG to PDF conversion, "
+                "install either Inkscape (https://inkscape.org/) (preferred) or\n"
+                "ImageMagick (https://www.imagemagick.org)")
+
+        if rsvg_convert_cmd:
+            kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
+            kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
+            dot_Tpdf = False
         else:
-            kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
+            kernellog.verbose(app,
+                "rsvg-convert(1) not found.\n"
+                "  SVG rendering of convert(1) is done by ImageMagick-native renderer.")
+            if dot_Tpdf:
+                kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
+            else:
+                kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
 
 
 # integrate conversion tools
@@ -274,7 +294,7 @@ def convert_image(img_node, translator, src_fname=None):
     elif in_ext == '.svg':
 
         if translator.builder.format == 'latex':
-            if convert_cmd is None:
+            if not inkscape_cmd and convert_cmd is None:
                 kernellog.verbose(app,
                                   "no SVG to PDF conversion available / include SVG raw.")
                 img_node.replace_self(file2literal(src_fname))
@@ -342,16 +362,24 @@ def dot2format(app, dot_fname, out_fname):
     return bool(exit_code == 0)
 
 def svg2pdf(app, svg_fname, pdf_fname):
-    """Converts SVG to PDF with ``convert(1)`` command.
+    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
 
-    Uses ``convert(1)`` from ImageMagick (https://www.imagemagick.org) for
-    conversion.  Returns ``True`` on success and ``False`` if an error occurred.
+    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
+    from ImageMagick (https://www.imagemagick.org) for conversion.
+    Returns ``True`` on success and ``False`` if an error occurred.
 
     * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
     * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
 
     """
     cmd = [convert_cmd, svg_fname, pdf_fname]
+
+    if inkscape_cmd:
+        if inkscape_ver_one:
+            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
+        else:
+            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
+
     # use stdout and stderr from parent
     exit_code = subprocess.call(cmd)
     if exit_code != 0:
-- 
GitLab


From f30a7ac8c6100e88dc416b675425541a337a46c8 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Wed, 29 Dec 2021 20:47:56 +0900
Subject: [PATCH 0337/1586] docs: sphinx/kfigure.py: Delegate inkscape msg to
 kernellog.verbose

Depending on its version, distro config, and system-setup type,
inkscape(1) emits various warning messages which are harmless in
command-line uses.

List of such warning messages (incomplete, long ones wrapped):

  - Gtk-Message: hh:mm:ss.nnn: Failed to load module "canberra-gtk-module"
  - Unable to init server: Could not connect: Connection refused
  - Failed to get connection
  - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_proxy_new_for_name:
    assertion 'connection != NULL' failed
  - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_proxy_call:
    assertion 'DBUS_IS_G_PROXY (proxy)' failed
  - ** (inkscape:xxx): CRITICAL **: hh:mm:ss.nnn: dbus_g_connection_register_g_object:
    assertion 'connection != NULL' failed
  - ** (inkscape:xxx): WARNING **: hh:mm:ss.nnn:
    Fonts dir '/usr/share/inkscape/fonts' does not exist and will be ignored.

To avoid unnecessary anxiety, capture the message and output it via
kernellog.verbose or kernellog.warn depending on the exit code.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/e26a7b53-9155-8394-4a31-6006379b65a5@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kfigure.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py
index e616e49669eb1..24d2b2addcce3 100644
--- a/Documentation/sphinx/kfigure.py
+++ b/Documentation/sphinx/kfigure.py
@@ -200,7 +200,8 @@ def setupTools(app):
                        "graphviz from https://www.graphviz.org")
     if inkscape_cmd:
         kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
-        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'])
+        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
+                                               stderr=subprocess.DEVNULL)
         ver_one_ptn = b'Inkscape 1'
         inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
         convert_cmd = None
@@ -373,17 +374,32 @@ def svg2pdf(app, svg_fname, pdf_fname):
 
     """
     cmd = [convert_cmd, svg_fname, pdf_fname]
+    cmd_name = 'convert(1)'
 
     if inkscape_cmd:
+        cmd_name = 'inkscape(1)'
         if inkscape_ver_one:
             cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
         else:
             cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
 
-    # use stdout and stderr from parent
-    exit_code = subprocess.call(cmd)
+    try:
+        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+        exit_code = 0
+    except subprocess.CalledProcessError as err:
+        warning_msg = err.output
+        exit_code = err.returncode
+        pass
+
     if exit_code != 0:
         kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
+        if warning_msg:
+            kernellog.warn(app, "Warning msg from %s: %s"
+                           % (cmd_name, str(warning_msg, 'utf-8')))
+    elif warning_msg:
+        kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s"
+                          % (cmd_name, str(warning_msg, 'utf-8')))
+
     return bool(exit_code == 0)
 
 def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
-- 
GitLab


From f7e53e2255808ca3abcc8f38d18ad0823425e771 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:08 +0000
Subject: [PATCH 0338/1586] pinctrl: npcm: Fix broken references to
 chip->parent_device

The npcm driver has a bunch of references to the irq_chip parent_device
field, but never sets it.

Fix it by fishing that reference from somewhere else, but it is
obvious that these debug statements were never used. Also remove
an unused field in a local data structure.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-11-maz@kernel.org
---
 drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c | 25 +++++++++++------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
index 4d81908d6725d..ba536fd4d6740 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c
@@ -78,7 +78,6 @@ struct npcm7xx_gpio {
 	struct gpio_chip	gc;
 	int			irqbase;
 	int			irq;
-	void			*priv;
 	struct irq_chip		irq_chip;
 	u32			pinctrl_id;
 	int (*direction_input)(struct gpio_chip *chip, unsigned offset);
@@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 	sts = ioread32(bank->base + NPCM7XX_GP_N_EVST);
 	en  = ioread32(bank->base + NPCM7XX_GP_N_EVEN);
-	dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts,
+	dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts,
 		en);
 
 	sts &= en;
@@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type)
 		gpiochip_get_data(irq_data_get_irq_chip_data(d));
 	unsigned int gpio = BIT(d->hwirq);
 
-	dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio,
+	dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio,
 		d->irq, type);
 	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
-		dev_dbg(d->chip->parent_device, "edge.rising\n");
+		dev_dbg(bank->gc.parent, "edge.rising\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		dev_dbg(d->chip->parent_device, "edge.falling\n");
+		dev_dbg(bank->gc.parent, "edge.falling\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
-		dev_dbg(d->chip->parent_device, "edge.both\n");
+		dev_dbg(bank->gc.parent, "edge.both\n");
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio);
 		break;
 	case IRQ_TYPE_LEVEL_LOW:
-		dev_dbg(d->chip->parent_device, "level.low\n");
+		dev_dbg(bank->gc.parent, "level.low\n");
 		npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
-		dev_dbg(d->chip->parent_device, "level.high\n");
+		dev_dbg(bank->gc.parent, "level.high\n");
 		npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio);
 		break;
 	default:
-		dev_dbg(d->chip->parent_device, "invalid irq type\n");
+		dev_dbg(bank->gc.parent, "invalid irq type\n");
 		return -EINVAL;
 	}
 
@@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d)
 		gpiochip_get_data(irq_data_get_irq_chip_data(d));
 	unsigned int gpio = d->hwirq;
 
-	dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST);
 }
 
@@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* Clear events */
-	dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC);
 }
 
@@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* Enable events */
-	dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq);
+	dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq);
 	iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS);
 }
 
@@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d)
 	unsigned int gpio = d->hwirq;
 
 	/* active-high, input, clear interrupt, enable interrupt */
-	dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq);
+	dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq);
 	npcmgpio_direction_input(gc, gpio);
 	npcmgpio_irq_ack(d);
 	npcmgpio_irq_unmask(d);
-- 
GitLab


From 0d872ed9e2148a8ba29de5a71c352fa54abf8e5e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:09 +0000
Subject: [PATCH 0339/1586] pinctrl: starfive: Move PM device over to irq
 domain

Move the reference to the device over to the irq domain.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Emil Renner Berthing <kernel@esmil.dk>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-12-maz@kernel.org
---
 drivers/pinctrl/pinctrl-starfive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c
index 0b912152a405a..5be9866c2b3c0 100644
--- a/drivers/pinctrl/pinctrl-starfive.c
+++ b/drivers/pinctrl/pinctrl-starfive.c
@@ -1307,7 +1307,6 @@ static int starfive_probe(struct platform_device *pdev)
 	sfp->gc.base = -1;
 	sfp->gc.ngpio = NR_GPIOS;
 
-	starfive_irq_chip.parent_device = dev;
 	starfive_irq_chip.name = sfp->gc.label;
 
 	sfp->gc.irq.chip = &starfive_irq_chip;
@@ -1330,6 +1329,8 @@ static int starfive_probe(struct platform_device *pdev)
 	if (ret)
 		return dev_err_probe(dev, ret, "could not register gpiochip\n");
 
+	irq_domain_set_pm_device(sfp->gc.irq.domain, dev);
+
 out_pinctrl_enable:
 	return pinctrl_enable(sfp->pctl);
 }
-- 
GitLab


From beb0622138cd2848dec06b0651a988c39d099574 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 1 Feb 2022 12:03:10 +0000
Subject: [PATCH 0340/1586] genirq: Kill irq_chip::parent_device

Now that noone is using irq_chip::parent_device in the tree, get
rid of it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
Link: https://lore.kernel.org/r/20220201120310.878267-13-maz@kernel.org
---
 include/linux/irq.h | 2 --
 kernel/irq/chip.c   | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 848e1e12c5c63..2cb2e2ac2703c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 /**
  * struct irq_chip - hardware interrupt chip descriptor
  *
- * @parent_device:	pointer to parent device for irqchip
  * @name:		name for /proc/interrupts
  * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
  * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
@@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @flags:		chip specific flags
  */
 struct irq_chip {
-	struct device	*parent_device;
 	const char	*name;
 	unsigned int	(*irq_startup)(struct irq_data *data);
 	void		(*irq_shutdown)(struct irq_data *data);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a2a12cdbe8725..24b6f2b40e5e9 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1560,9 +1560,6 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 
 static struct device *irq_get_parent_device(struct irq_data *data)
 {
-	if (data->chip->parent_device)
-		return data->chip->parent_device;
-
 	if (data->domain)
 		return data->domain->dev;
 
-- 
GitLab


From c95aa2bab974394809edea28690f6504a15791b6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 8 Feb 2022 16:15:46 -0800
Subject: [PATCH 0341/1586] thermal: intel: hfi: INTEL_HFI_THERMAL depends on
 NET

THERMAL_NETLINK depends on NET and since 'select' does not follow
any dependency chain, INTEL_HFI_THERMAL also should depend on NET.

Fix one Kconfig warning and 48 subsequent build errors:

WARNING: unmet direct dependencies detected for THERMAL_NETLINK
  Depends on [n]: THERMAL [=y] && NET [=n]
  Selected by [y]:
  - INTEL_HFI_THERMAL [=y] && THERMAL [=y] && (X86 [=y] || X86_INTEL_QUARK [=n] || COMPILE_TEST [=y]) && CPU_SUP_INTEL [=y] && X86_THERMAL_VECTOR [=y]

Fixes: bd30cdfd9bd7 ("thermal: intel: hfi: Notify user space for HFI events")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index 6cf3fe36a4ae8..f0c8456792509 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -102,6 +102,7 @@ config INTEL_MENLOW
 
 config INTEL_HFI_THERMAL
 	bool "Intel Hardware Feedback Interface"
+	depends on NET
 	depends on CPU_SUP_INTEL
 	depends on X86_THERMAL_VECTOR
 	select THERMAL_NETLINK
-- 
GitLab


From c9c28ed0ab611b6ee3bfab88eba334e272642433 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 1 Feb 2022 09:40:58 +0100
Subject: [PATCH 0342/1586] crypto: hmac - add fips_skip support

By adding the support for the flag fips_skip, hash / HMAC test vectors
may be marked to be not applicable in FIPS mode. Such vectors are
silently skipped in FIPS mode.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c | 3 +++
 crypto/testmgr.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 3a5a3e5cb77bf..0c2efde2f6c65 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1851,6 +1851,9 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
 	}
 
 	for (i = 0; i < num_vecs; i++) {
+		if (fips_enabled && vecs[i].fips_skip)
+			continue;
+
 		err = test_hash_vec(&vecs[i], i, req, desc, tsgl, hashstate);
 		if (err)
 			goto out;
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index a253d66ba1c1a..17b37525f2890 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -33,6 +33,7 @@
  * @ksize:	Length of @key in bytes (0 if no key)
  * @setkey_error: Expected error from setkey()
  * @digest_error: Expected error from digest()
+ * @fips_skip:	Skip the test vector in FIPS mode
  */
 struct hash_testvec {
 	const char *key;
@@ -42,6 +43,7 @@ struct hash_testvec {
 	unsigned short ksize;
 	int setkey_error;
 	int digest_error;
+	bool fips_skip;
 };
 
 /*
-- 
GitLab


From 37f36e5717869a69775ecb23baedf0f06ea940b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 1 Feb 2022 09:41:32 +0100
Subject: [PATCH 0343/1586] crypto: hmac - disallow keys < 112 bits in FIPS
 mode

FIPS 140 requires a minimum security strength of 112 bits. This implies
that the HMAC key must not be smaller than 112 in FIPS mode.

This restriction implies that the test vectors for HMAC that have a key
that is smaller than 112 bits must be disabled when FIPS support is
compiled.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/hmac.c    | 4 ++++
 crypto/testmgr.h | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/crypto/hmac.c b/crypto/hmac.c
index 25856aa7ccbf9..3610ff0b67392 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -15,6 +15,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -51,6 +52,9 @@ static int hmac_setkey(struct crypto_shash *parent,
 	SHASH_DESC_ON_STACK(shash, hash);
 	unsigned int i;
 
+	if (fips_enabled && (keylen < 112 / 8))
+		return -EINVAL;
+
 	shash->tfm = hash;
 
 	if (keylen > bs) {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 17b37525f2890..85ccf811f5e79 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -5715,6 +5715,7 @@ static const struct hash_testvec hmac_sha1_tv_template[] = {
 		.psize	= 28,
 		.digest	= "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74"
 			  "\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa",
 		.ksize	= 20,
@@ -5804,6 +5805,7 @@ static const struct hash_testvec hmac_sha224_tv_template[] = {
 			"\x45\x69\x0f\x3a\x7e\x9e\x6d\x0f"
 			"\x8b\xbe\xa2\xa3\x9e\x61\x48\x00"
 			"\x8f\xd0\x5e\x44",
+		.fips_skip = 1,
 	}, {
 		.key    = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -5947,6 +5949,7 @@ static const struct hash_testvec hmac_sha256_tv_template[] = {
 			  "\x6a\x04\x24\x26\x08\x95\x75\xc7"
 			  "\x5a\x00\x3f\x08\x9d\x27\x39\x83"
 			  "\x9d\xec\x58\xb9\x64\xec\x38\x43",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			"\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6445,6 +6448,7 @@ static const struct hash_testvec hmac_sha384_tv_template[] = {
 			  "\xe4\x2e\xc3\x73\x63\x22\x44\x5e"
 			  "\x8e\x22\x40\xca\x5e\x69\xe2\xc7"
 			  "\x8b\x32\x39\xec\xfa\xb2\x16\x49",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6545,6 +6549,7 @@ static const struct hash_testvec hmac_sha512_tv_template[] = {
 			  "\x6d\x03\x4f\x65\xf8\xf0\xe6\xfd"
 			  "\xca\xea\xb1\xa3\x4d\x4a\x6b\x4b"
 			  "\x63\x6e\x07\x0a\x38\xbc\xe7\x37",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6640,6 +6645,7 @@ static const struct hash_testvec hmac_sha3_224_tv_template[] = {
 			  "\x1b\x79\x86\x34\xad\x38\x68\x11"
 			  "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b"
 			  "\xba\xce\x5e\x66",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6727,6 +6733,7 @@ static const struct hash_testvec hmac_sha3_256_tv_template[] = {
 			  "\x35\x96\xbb\xb0\xda\x73\xb8\x87"
 			  "\xc9\x17\x1f\x93\x09\x5b\x29\x4a"
 			  "\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6818,6 +6825,7 @@ static const struct hash_testvec hmac_sha3_384_tv_template[] = {
 			  "\x3c\xa1\x35\x08\xa9\x32\x43\xce"
 			  "\x48\xc0\x45\xdc\x00\x7f\x26\xa2"
 			  "\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
@@ -6917,6 +6925,7 @@ static const struct hash_testvec hmac_sha3_512_tv_template[] = {
 			  "\xee\x7a\x0c\x31\xd0\x22\xa9\x5e"
 			  "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83"
 			  "\x96\x02\x75\xbe\xb4\xe6\x20\x24",
+		.fips_skip = 1,
 	}, {
 		.key	= "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
 			  "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-- 
GitLab


From e8bf24bd439da1ee7f37c2b03f44c6ad37c0c8c0 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 4 Feb 2022 15:35:22 +0200
Subject: [PATCH 0344/1586] crypto: atmel-tdes - Add support for the TDES IP
 available on sama7g5 SoC

Add support for the TDES IP found on sama7g5.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-tdes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index e30786ec9f2d4..9fd7b8e439d2f 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1130,6 +1130,7 @@ static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd)
 
 	/* keep only major version number */
 	switch (dd->hw_version & 0xf00) {
+	case 0x800:
 	case 0x700:
 		dd->caps.has_dma = 1;
 		dd->caps.has_cfb_3keys = 1;
-- 
GitLab


From 297565aa22cfa80ab0f88c3569693aea0b6afb6d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 5 Feb 2022 16:23:45 +0100
Subject: [PATCH 0345/1586] lib/xor: make xor prototypes more friendly to
 compiler vectorization

Modern compilers are perfectly capable of extracting parallelism from
the XOR routines, provided that the prototypes reflect the nature of the
input accurately, in particular, the fact that the input vectors are
expected not to overlap. This is not documented explicitly, but is
implied by the interchangeability of the various C routines, some of
which use temporary variables while others don't: this means that these
routines only behave identically for non-overlapping inputs.

So let's decorate these input vectors with the __restrict modifier,
which informs the compiler that there is no overlap. While at it, make
the input-only vectors pointer-to-const as well.

Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/563
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/alpha/include/asm/xor.h           | 53 ++++++++++------
 arch/arm/include/asm/xor.h             | 42 ++++++++-----
 arch/arm64/include/asm/xor.h           | 21 ++++---
 arch/arm64/lib/xor-neon.c              | 46 ++++++++------
 arch/ia64/include/asm/xor.h            | 21 ++++---
 arch/powerpc/include/asm/xor_altivec.h | 25 ++++----
 arch/powerpc/lib/xor_vmx.c             | 28 ++++++---
 arch/powerpc/lib/xor_vmx.h             | 27 +++++----
 arch/powerpc/lib/xor_vmx_glue.c        | 32 +++++-----
 arch/s390/lib/xor.c                    | 21 ++++---
 arch/sparc/include/asm/xor_32.h        | 21 ++++---
 arch/sparc/include/asm/xor_64.h        | 42 ++++++++-----
 arch/x86/include/asm/xor.h             | 42 ++++++++-----
 arch/x86/include/asm/xor_32.h          | 42 ++++++++-----
 arch/x86/include/asm/xor_avx.h         | 21 ++++---
 include/asm-generic/xor.h              | 84 +++++++++++++++++---------
 include/linux/raid/xor.h               | 21 ++++---
 17 files changed, 381 insertions(+), 208 deletions(-)

diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
index 5aeb4fb3cb7cb..e0de0c233ab92 100644
--- a/arch/alpha/include/asm/xor.h
+++ b/arch/alpha/include/asm/xor.h
@@ -5,24 +5,43 @@
  * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
  */
 
-extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *);
-extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *, unsigned long *);
-extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
-		        unsigned long *, unsigned long *, unsigned long *);
+extern void
+xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2);
+extern void
+xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3);
+extern void
+xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4);
+extern void
+xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4,
+	    const unsigned long * __restrict p5);
 
-extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
-				 unsigned long *);
-extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *);
-extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *,
-				 unsigned long *);
-extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
-				 unsigned long *, unsigned long *,
-				 unsigned long *, unsigned long *);
+extern void
+xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2);
+extern void
+xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3);
+extern void
+xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4);
+extern void
+xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5);
 
 asm("								\n\
 	.text							\n\
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
index aefddec79286a..669cad5194d3d 100644
--- a/arch/arm/include/asm/xor.h
+++ b/arch/arm/include/asm/xor.h
@@ -44,7 +44,8 @@
 		: "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
 
 static void
-xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 4;
 	register unsigned int a1 __asm__("r4");
@@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 4;
 	register unsigned int a1 __asm__("r4");
@@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 2;
 	register unsigned int a1 __asm__("r8");
@@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	unsigned int lines = bytes / sizeof(unsigned long) / 2;
 	register unsigned int a1 __asm__("r8");
@@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = {
 extern struct xor_block_template const xor_block_neon_inner;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_2(bytes, p1, p2);
@@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_3(bytes, p1, p2, p3);
@@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_4(bytes, p1, p2, p3, p4);
@@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4,
+	   const unsigned long * __restrict p5)
 {
 	if (in_interrupt()) {
 		xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index 947f6a4f1aa0a..befcd8a7abc98 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -16,7 +16,8 @@
 extern struct xor_block_template const xor_block_inner_neon;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_2(bytes, p1, p2);
@@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_3(bytes, p1, p2, p3);
@@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
@@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	   const unsigned long * __restrict p2,
+	   const unsigned long * __restrict p3,
+	   const unsigned long * __restrict p4,
+	   const unsigned long * __restrict p5)
 {
 	kernel_neon_begin();
 	xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index d189cf4e70ea6..96b171995d198 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -10,8 +10,8 @@
 #include <linux/module.h>
 #include <asm/neon-intrinsics.h>
 
-void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2)
+void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3)
+void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3,
-	unsigned long *p4, unsigned long *p5)
+void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
 	return res;
 }
 
-static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3)
+static void xor_arm64_eor3_3(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3,
-			     unsigned long *p4)
+static void xor_arm64_eor3_4(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
@@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
 	} while (--lines > 0);
 }
 
-static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1,
-			     unsigned long *p2, unsigned long *p3,
-			     unsigned long *p4, unsigned long *p5)
+static void xor_arm64_eor3_5(unsigned long bytes,
+	unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	uint64_t *dp1 = (uint64_t *)p1;
 	uint64_t *dp2 = (uint64_t *)p2;
diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h
index 673051bf9d7da..6785f70d3208b 100644
--- a/arch/ia64/include/asm/xor.h
+++ b/arch/ia64/include/asm/xor.h
@@ -4,13 +4,20 @@
  */
 
 
-extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *);
-extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *, unsigned long *);
-extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
-		       unsigned long *, unsigned long *, unsigned long *);
+extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2);
+extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3);
+extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3,
+		       const unsigned long * __restrict p4);
+extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1,
+		       const unsigned long * __restrict p2,
+		       const unsigned long * __restrict p3,
+		       const unsigned long * __restrict p4,
+		       const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_ia64 = {
 	.name =	"ia64",
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
index 6ca923510b597..294620a25f802 100644
--- a/arch/powerpc/include/asm/xor_altivec.h
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -3,17 +3,20 @@
 #define _ASM_POWERPC_XOR_ALTIVEC_H
 
 #ifdef CONFIG_ALTIVEC
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in, unsigned long *v5_in);
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5);
 
 #endif
 #endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index 54e61979e80e5..aab49d056d188 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -49,8 +49,9 @@ typedef vector signed char unative_t;
 		V1##_3 = vec_xor(V1##_3, V2##_3);	\
 	} while (0)
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in,
-		     unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
@@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
 	} while (--lines > 0);
 }
 
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-		     unsigned long *v2_in, unsigned long *v3_in,
-		     unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in,
+		     const unsigned long * __restrict v5_in)
 {
 	DEFINE(v1);
 	DEFINE(v2);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
index 5c2b0839b1794..573c41d90dac5 100644
--- a/arch/powerpc/lib/xor_vmx.h
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -6,16 +6,17 @@
  * outside of the enable/disable altivec block.
  */
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in);
-
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in);
-
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in,
-			     unsigned long *v4_in);
-
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-			     unsigned long *v2_in, unsigned long *v3_in,
-			     unsigned long *v4_in, unsigned long *v5_in);
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 80dba916c3674..35d917ece4d1e 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -12,47 +12,51 @@
 #include <asm/xor_altivec.h>
 #include "xor_vmx.h"
 
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-		   unsigned long *v2_in)
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_2(bytes, v1_in, v2_in);
+	__xor_altivec_2(bytes, p1, p2);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_2);
 
-void xor_altivec_3(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in)
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+	__xor_altivec_3(bytes, p1, p2, p3);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_3);
 
-void xor_altivec_4(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in)
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+	__xor_altivec_4(bytes, p1, p2, p3, p4);
 	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_4);
 
-void xor_altivec_5(unsigned long bytes,  unsigned long *v1_in,
-		   unsigned long *v2_in, unsigned long *v3_in,
-		   unsigned long *v4_in, unsigned long *v5_in)
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5)
 {
 	preempt_disable();
 	enable_kernel_altivec();
-	__xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+	__xor_altivec_5(bytes, p1, p2, p3, p4, p5);
 	disable_kernel_altivec();
 	preempt_enable();
 }
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index a963c3d8ad0d9..fb924a8041dc7 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -11,7 +11,8 @@
 #include <linux/raid/xor.h>
 #include <asm/xor.h>
 
-static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 		: "0", "1", "cc", "memory");
 }
 
-static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3)
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 		: : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3, unsigned long *p4)
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4)
 {
 	asm volatile(
 		"	larl	1,2f\n"
@@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 		: : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-		     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5)
 {
 	asm volatile(
 		"	larl	1,2f\n"
diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
index 3e5af37e4b9cd..0351813cf3af5 100644
--- a/arch/sparc/include/asm/xor_32.h
+++ b/arch/sparc/include/asm/xor_32.h
@@ -13,7 +13,8 @@
  */
 
 static void
-sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+sparc_2(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3)
+sparc_3(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3, unsigned long *p4)
+sparc_4(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
@@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	unsigned long *p3, unsigned long *p4, unsigned long *p5)
+sparc_5(unsigned long bytes, unsigned long * __restrict p1,
+	const unsigned long * __restrict p2,
+	const unsigned long * __restrict p3,
+	const unsigned long * __restrict p4,
+	const unsigned long * __restrict p5)
 {
 	int lines = bytes / (sizeof (long)) / 8;
 
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index 16169f3edcd5b..caaddea8ad79d 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -12,13 +12,20 @@
 
 #include <asm/spitfire.h>
 
-void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
-void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *);
-void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *, unsigned long *);
-void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
-	       unsigned long *, unsigned long *, unsigned long *);
+void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2);
+void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3);
+void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4);
+void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5);
 
 /* XXX Ugh, write cheetah versions... -DaveM */
 
@@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = {
         .do_5	= xor_vis_5,
 };
 
-void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
-void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *);
-void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *, unsigned long *);
-void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
-		   unsigned long *, unsigned long *, unsigned long *);
+void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2);
+void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3);
+void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4);
+void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_niagara = {
         .name	= "Niagara",
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 2ee95a7769e60..7b0307acc4103 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -57,7 +57,8 @@
 					op(i + 3, 3)
 
 static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+	  const unsigned long * __restrict p2,
+	  const unsigned long * __restrict p3,
+	  const unsigned long * __restrict p4,
+	  const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
@@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 8;
 
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 67ceb790e6397..7a6b9474591e7 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -21,7 +21,8 @@
 #include <asm/fpu/api.h>
 
 static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 
 
 static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4,
+	      const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 7;
 
@@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 #undef BLOCK
 
 static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 6;
 
@@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	     unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4,
+	     const unsigned long * __restrict p5)
 {
 	unsigned long lines = bytes >> 6;
 
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 0c4e5b5e3852b..7f81dd5897f41 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -26,7 +26,8 @@
 		BLOCK4(8) \
 		BLOCK4(12)
 
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -52,8 +53,9 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -82,8 +84,10 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+		      const unsigned long * __restrict p1,
+		      const unsigned long * __restrict p2,
+		      const unsigned long * __restrict p3)
 {
 	unsigned long lines = bytes >> 9;
 
@@ -115,8 +119,11 @@ do { \
 	kernel_fpu_end();
 }
 
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+	     const unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	unsigned long lines = bytes >> 9;
 
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index b62a2a56a4d49..44509d48fca21 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -8,7 +8,8 @@
 #include <linux/prefetch.h>
 
 static void
-xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	    const unsigned long * __restrict p2,
+	    const unsigned long * __restrict p3,
+	    const unsigned long * __restrict p4,
+	    const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
+	     const unsigned long * __restrict p2,
+	     const unsigned long * __restrict p3,
+	     const unsigned long * __restrict p4,
+	     const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8;
 
@@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 	prefetchw(p1);
@@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 	prefetchw(p1);
@@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+	      const unsigned long * __restrict p2,
+	      const unsigned long * __restrict p3,
+	      const unsigned long * __restrict p4,
+	      const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3)
+xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4)
+xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+	       const unsigned long * __restrict p2,
+	       const unsigned long * __restrict p3,
+	       const unsigned long * __restrict p4,
+	       const unsigned long * __restrict p5)
 {
 	long lines = bytes / (sizeof (long)) / 8 - 1;
 
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index 2a9fee8ddae3f..51b811b623224 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -11,13 +11,20 @@ struct xor_block_template {
         struct xor_block_template *next;
         const char *name;
         int speed;
-	void (*do_2)(unsigned long, unsigned long *, unsigned long *);
-	void (*do_3)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *);
-	void (*do_4)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *, unsigned long *);
-	void (*do_5)(unsigned long, unsigned long *, unsigned long *,
-		     unsigned long *, unsigned long *, unsigned long *);
+	void (*do_2)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_3)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_4)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
+	void (*do_5)(unsigned long, unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict,
+		     const unsigned long * __restrict);
 };
 
 #endif
-- 
GitLab


From a69cb445f7d129abf7c50d48c8a8eca7c8d5df15 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 5 Feb 2022 16:23:46 +0100
Subject: [PATCH 0346/1586] crypto: arm/xor - make vectorized C code
 Clang-friendly

The ARM version of the accelerated XOR routines are simply the 8-way C
routines passed through the auto-vectorizer with SIMD codegen enabled.
This used to require GCC version 4.6 at least, but given that 5.1 is now
the baseline, this check is no longer necessary, and actually
misidentifies Clang as GCC < 4.6 as Clang defines the GCC major/minor as
well, but makes no attempt at doing this in a way that conveys feature
parity with a certain version of GCC (which would not be a great idea in
the first place).

So let's drop the version check, and make the auto-vectorize pragma
(which is based on a GCC-specific command line option) GCC-only. Since
Clang performs SIMD auto-vectorization by default at -O2, no pragma is
necessary here.

Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/496
Link: https://github.com/ClangBuiltLinux/linux/issues/503
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/lib/xor-neon.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
index b99dd8e1c93f1..522510baed490 100644
--- a/arch/arm/lib/xor-neon.c
+++ b/arch/arm/lib/xor-neon.c
@@ -17,17 +17,11 @@ MODULE_LICENSE("GPL");
 /*
  * Pull in the reference implementations while instructing GCC (through
  * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
- * NEON instructions.
+ * NEON instructions. Clang does this by default at O2 so no pragma is
+ * needed.
  */
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#ifdef CONFIG_CC_IS_GCC
 #pragma GCC optimize "tree-vectorize"
-#else
-/*
- * While older versions of GCC do not generate incorrect code, they fail to
- * recognize the parallel nature of these functions, and emit plain ARM code,
- * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
- */
-#warning This code requires at least version 4.6 of GCC
 #endif
 
 #pragma GCC diagnostic ignored "-Wunused-variable"
-- 
GitLab


From d5869fdc189f0f12a954a48d58a48104a2f5d044 Mon Sep 17 00:00:00 2001
From: Yang Shi <shy828301@gmail.com>
Date: Thu, 10 Feb 2022 14:52:22 -0800
Subject: [PATCH 0347/1586] block: introduce block_rq_error tracepoint

Currently, rasdaemon uses the existing tracepoint block_rq_complete
and filters out non-error cases in order to capture block disk errors.

But there are a few problems with this approach:

1. Even kernel trace filter could do the filtering work, there is
   still some overhead after we enable this tracepoint.

2. The filter is merely based on errno, which does not align with kernel
   logic to check the errors for print_req_error().

3. block_rq_complete only provides dev major and minor to identify
   the block device, it is not convenient to use in user-space.

So introduce a new tracepoint block_rq_error just for the error case.
With this patch, rasdaemon could switch to block_rq_error.

Since the new tracepoint has the similar implementation with
block_rq_complete, so move the existing code from TRACE_EVENT
block_rq_complete() into new event class block_rq_completion(). Then add
event for block_rq_complete and block_rq_err respectively from the newly
created event class per the suggestion from Chaitanya Kulkarni.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220210225222.260069-1-shy828301@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c               |  4 ++-
 include/trace/events/block.h | 49 ++++++++++++++++++++++++++----------
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4b868e792ba4a..6c59ffe765fde 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -789,8 +789,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
 #endif
 
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
-		     !(req->rq_flags & RQF_QUIET)))
+		     !(req->rq_flags & RQF_QUIET))) {
 		blk_print_req_error(req, error);
+		trace_block_rq_error(req, error, nr_bytes);
+	}
 
 	blk_account_io_completion(req, nr_bytes);
 
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 27170e40e8c95..7f4dfbdf12a6f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -100,19 +100,7 @@ TRACE_EVENT(block_rq_requeue,
 		  __entry->nr_sector, 0)
 );
 
-/**
- * block_rq_complete - block IO operation completed by device driver
- * @rq: block operations request
- * @error: status code
- * @nr_bytes: number of completed bytes
- *
- * The block_rq_complete tracepoint event indicates that some portion
- * of operation request has been completed by the device driver.  If
- * the @rq->bio is %NULL, then there is absolutely no additional work to
- * do for the request. If @rq->bio is non-NULL then there is
- * additional work required to complete the request.
- */
-TRACE_EVENT(block_rq_complete,
+DECLARE_EVENT_CLASS(block_rq_completion,
 
 	TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
 
@@ -144,6 +132,41 @@ TRACE_EVENT(block_rq_complete,
 		  __entry->nr_sector, __entry->error)
 );
 
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_complete,
+
+	TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+	TP_ARGS(rq, error, nr_bytes)
+);
+
+/**
+ * block_rq_error - block IO operation error reported by device driver
+ * @rq: block operations request
+ * @error: status code
+ * @nr_bytes: number of completed bytes
+ *
+ * The block_rq_error tracepoint event indicates that some portion
+ * of operation request has failed as reported by the device driver.
+ */
+DEFINE_EVENT(block_rq_completion, block_rq_error,
+
+	TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
+
+	TP_ARGS(rq, error, nr_bytes)
+);
+
 DECLARE_EVENT_CLASS(block_rq,
 
 	TP_PROTO(struct request *rq),
-- 
GitLab


From 0e51e2ab49a99bc5077760aa083dfa1c3bf9899b Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 11 Feb 2022 18:11:47 +0800
Subject: [PATCH 0348/1586] block: remove THROTL_IOPS_MAX

No one uses THROTL_IOPS_MAX any more, so remove it.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220211101149.2368042-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index b4de2010fba55..bdc49bd4eef02 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -28,8 +28,6 @@
 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
 #define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
 
-/* Max limits for throttle policy */
-#define THROTL_IOPS_MAX		UINT_MAX
 #define FC_APPID_LEN              129
 
 
-- 
GitLab


From 472e4314c039d6cf36e28783b1c63f87b5b394c2 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 11 Feb 2022 18:11:48 +0800
Subject: [PATCH 0349/1586] block: move initialization of q->blkg_list into
 blkcg_init_queue

q->blkg_list is only used by blkcg code, so move it into
blkcg_init_queue.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220211101149.2368042-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 2 ++
 block/blk-core.c   | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 671debbae9413..35deaceba1f07 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1175,6 +1175,8 @@ int blkcg_init_queue(struct request_queue *q)
 	bool preloaded;
 	int ret;
 
+	INIT_LIST_HEAD(&q->blkg_list);
+
 	new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
 	if (!new_blkg)
 		return -ENOMEM;
diff --git a/block/blk-core.c b/block/blk-core.c
index be8812f5489d4..ff972b968f253 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -476,9 +476,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
 	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
 	INIT_WORK(&q->timeout_work, blk_timeout_work);
 	INIT_LIST_HEAD(&q->icq_list);
-#ifdef CONFIG_BLK_CGROUP
-	INIT_LIST_HEAD(&q->blkg_list);
-#endif
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
-- 
GitLab


From 672fdcf0e7de3b1e39416ac85abf178f023271f1 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 11 Feb 2022 18:11:49 +0800
Subject: [PATCH 0350/1586] block: partition include/linux/blk-cgroup.h

Partition include/linux/blk-cgroup.h into two parts: one is public part,
the other is block layer private part.

Suggested by Christoph Hellwig.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220211101149.2368042-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.h         |   1 -
 block/bio.c                 |   2 +-
 block/blk-cgroup-rwstat.h   |   2 +-
 block/blk-cgroup.c          |   2 +-
 block/blk-cgroup.h          | 477 ++++++++++++++++++++++++++++++++++++
 block/blk-core.c            |   2 +-
 block/blk-crypto-fallback.c |   2 +-
 block/blk-iocost.c          |   2 +-
 block/blk-iolatency.c       |   2 +-
 block/blk-ioprio.c          |   2 +-
 block/blk-sysfs.c           |   2 +-
 block/blk-throttle.c        |   1 -
 block/bounce.c              |   2 +-
 block/elevator.c            |   2 +-
 include/linux/blk-cgroup.h  | 459 +---------------------------------
 15 files changed, 493 insertions(+), 467 deletions(-)
 create mode 100644 block/blk-cgroup.h

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 07288b9da3895..72255ec44f8f8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -8,7 +8,6 @@
 
 #include <linux/blktrace_api.h>
 #include <linux/hrtimer.h>
-#include <linux/blk-cgroup.h>
 
 #include "blk-cgroup-rwstat.h"
 
diff --git a/block/bio.c b/block/bio.c
index 18d34b33351b8..b15f5466ce084 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -15,7 +15,6 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
-#include <linux/blk-cgroup.h>
 #include <linux/highmem.h>
 #include <linux/sched/sysctl.h>
 #include <linux/blk-crypto.h>
@@ -24,6 +23,7 @@
 #include <trace/events/block.h>
 #include "blk.h"
 #include "blk-rq-qos.h"
+#include "blk-cgroup.h"
 
 struct bio_alloc_cache {
 	struct bio		*free_list;
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
index ee746919c41fc..9f2723b34b759 100644
--- a/block/blk-cgroup-rwstat.h
+++ b/block/blk-cgroup-rwstat.h
@@ -6,7 +6,7 @@
 #ifndef _BLK_CGROUP_RWSTAT_H
 #define _BLK_CGROUP_RWSTAT_H
 
-#include <linux/blk-cgroup.h>
+#include "blk-cgroup.h"
 
 enum blkg_rwstat_type {
 	BLKG_RWSTAT_READ,
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 35deaceba1f07..4108d445c73af 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -26,11 +26,11 @@
 #include <linux/delay.h>
 #include <linux/atomic.h>
 #include <linux/ctype.h>
-#include <linux/blk-cgroup.h>
 #include <linux/tracehook.h>
 #include <linux/psi.h>
 #include <linux/part_stat.h>
 #include "blk.h"
+#include "blk-cgroup.h"
 #include "blk-ioprio.h"
 #include "blk-throttle.h"
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
new file mode 100644
index 0000000000000..3e91803c4a555
--- /dev/null
+++ b/block/blk-cgroup.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BLK_CGROUP_PRIVATE_H
+#define _BLK_CGROUP_PRIVATE_H
+/*
+ * block cgroup private header
+ *
+ * Based on ideas and code from CFQ, CFS and BFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+ *
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ *		      Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
+ * 	              Nauman Rafique <nauman@google.com>
+ */
+
+#include <linux/blk-cgroup.h>
+
+/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
+#define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
+
+#ifdef CONFIG_BLK_CGROUP
+
+/*
+ * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
+ * request_queue (q).  This is used by blkcg policies which need to track
+ * information per blkcg - q pair.
+ *
+ * There can be multiple active blkcg policies and each blkg:policy pair is
+ * represented by a blkg_policy_data which is allocated and freed by each
+ * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
+ * area by allocating larger data structure which embeds blkg_policy_data
+ * at the beginning.
+ */
+struct blkg_policy_data {
+	/* the blkg and policy id this per-policy data belongs to */
+	struct blkcg_gq			*blkg;
+	int				plid;
+};
+
+/*
+ * Policies that need to keep per-blkcg data which is independent from any
+ * request_queue associated to it should implement cpd_alloc/free_fn()
+ * methods.  A policy can allocate private data area by allocating larger
+ * data structure which embeds blkcg_policy_data at the beginning.
+ * cpd_init() is invoked to let each policy handle per-blkcg data.
+ */
+struct blkcg_policy_data {
+	/* the blkcg and policy id this per-policy data belongs to */
+	struct blkcg			*blkcg;
+	int				plid;
+};
+
+typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
+typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
+				struct request_queue *q, struct blkcg *blkcg);
+typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
+typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+				struct seq_file *s);
+
+struct blkcg_policy {
+	int				plid;
+	/* cgroup files for the policy */
+	struct cftype			*dfl_cftypes;
+	struct cftype			*legacy_cftypes;
+
+	/* operations */
+	blkcg_pol_alloc_cpd_fn		*cpd_alloc_fn;
+	blkcg_pol_init_cpd_fn		*cpd_init_fn;
+	blkcg_pol_free_cpd_fn		*cpd_free_fn;
+	blkcg_pol_bind_cpd_fn		*cpd_bind_fn;
+
+	blkcg_pol_alloc_pd_fn		*pd_alloc_fn;
+	blkcg_pol_init_pd_fn		*pd_init_fn;
+	blkcg_pol_online_pd_fn		*pd_online_fn;
+	blkcg_pol_offline_pd_fn		*pd_offline_fn;
+	blkcg_pol_free_pd_fn		*pd_free_fn;
+	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
+	blkcg_pol_stat_pd_fn		*pd_stat_fn;
+};
+
+extern struct blkcg blkcg_root;
+extern bool blkcg_debug_stats;
+
+struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
+				      struct request_queue *q, bool update_hint);
+int blkcg_init_queue(struct request_queue *q);
+void blkcg_exit_queue(struct request_queue *q);
+
+/* Blkio controller policy registration */
+int blkcg_policy_register(struct blkcg_policy *pol);
+void blkcg_policy_unregister(struct blkcg_policy *pol);
+int blkcg_activate_policy(struct request_queue *q,
+			  const struct blkcg_policy *pol);
+void blkcg_deactivate_policy(struct request_queue *q,
+			     const struct blkcg_policy *pol);
+
+const char *blkg_dev_name(struct blkcg_gq *blkg);
+void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
+		       u64 (*prfill)(struct seq_file *,
+				     struct blkg_policy_data *, int),
+		       const struct blkcg_policy *pol, int data,
+		       bool show_total);
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
+
+struct blkg_conf_ctx {
+	struct block_device		*bdev;
+	struct blkcg_gq			*blkg;
+	char				*body;
+};
+
+struct block_device *blkcg_conf_open_bdev(char **inputp);
+int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+		   char *input, struct blkg_conf_ctx *ctx);
+void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+	struct cgroup_subsys_state *css;
+
+	css = kthread_blkcg();
+	if (css)
+		return css;
+	return task_css(current, io_cgrp_id);
+}
+
+/**
+ * __bio_blkcg - internal, inconsistent version to get blkcg
+ *
+ * DO NOT USE.
+ * This function is inconsistent and consequently is dangerous to use.  The
+ * first part of the function returns a blkcg where a reference is owned by the
+ * bio.  This means it does not need to be rcu protected as it cannot go away
+ * with the bio owning a reference to it.  However, the latter potentially gets
+ * it from task_css().  This can race against task migration and the cgroup
+ * dying.  It is also semantically different as it must be called rcu protected
+ * and is susceptible to failure when trying to get a reference to it.
+ * Therefore, it is not ok to assume that *_get() will always succeed on the
+ * blkcg returned here.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
+{
+	if (bio && bio->bi_blkg)
+		return bio->bi_blkg->blkcg;
+	return css_to_blkcg(blkcg_css());
+}
+
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio.  Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
+}
+
+/**
+ * __blkg_lookup - internal version of blkg_lookup()
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ * @update_hint: whether to update lookup hint with the result or not
+ *
+ * This is internal version and shouldn't be used by policy
+ * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
+ * @q's bypass state.  If @update_hint is %true, the caller should be
+ * holding @q->queue_lock and lookup hint is updated on success.
+ */
+static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+					     struct request_queue *q,
+					     bool update_hint)
+{
+	struct blkcg_gq *blkg;
+
+	if (blkcg == &blkcg_root)
+		return q->root_blkg;
+
+	blkg = rcu_dereference(blkcg->blkg_hint);
+	if (blkg && blkg->q == q)
+		return blkg;
+
+	return blkg_lookup_slowpath(blkcg, q, update_hint);
+}
+
+/**
+ * blkg_lookup - lookup blkg for the specified blkcg - q pair
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for the @blkcg - @q pair.  This function should be called
+ * under RCU read lock.
+ */
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
+					   struct request_queue *q)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return __blkg_lookup(blkcg, q, false);
+}
+
+/**
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{
+	return q->root_blkg;
+}
+
+/**
+ * blkg_to_pdata - get policy private data
+ * @blkg: blkg of interest
+ * @pol: policy of interest
+ *
+ * Return pointer to private data associated with the @blkg-@pol pair.
+ */
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol)
+{
+	return blkg ? blkg->pd[pol->plid] : NULL;
+}
+
+static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
+						     struct blkcg_policy *pol)
+{
+	return blkcg ? blkcg->cpd[pol->plid] : NULL;
+}
+
+/**
+ * pdata_to_blkg - get blkg associated with policy private data
+ * @pd: policy private data of interest
+ *
+ * @pd is policy private data.  Determine the blkg it's associated with.
+ */
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
+{
+	return pd ? pd->blkg : NULL;
+}
+
+static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
+{
+	return cpd ? cpd->blkcg : NULL;
+}
+
+/**
+ * blkg_path - format cgroup path of blkg
+ * @blkg: blkg of interest
+ * @buf: target buffer
+ * @buflen: target buffer length
+ *
+ * Format the path of the cgroup of @blkg into @buf.
+ */
+static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
+{
+	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
+}
+
+/**
+ * blkg_get - get a blkg reference
+ * @blkg: blkg to get
+ *
+ * The caller should be holding an existing reference.
+ */
+static inline void blkg_get(struct blkcg_gq *blkg)
+{
+	percpu_ref_get(&blkg->refcnt);
+}
+
+/**
+ * blkg_tryget - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
+{
+	return blkg && percpu_ref_tryget(&blkg->refcnt);
+}
+
+/**
+ * blkg_put - put a blkg reference
+ * @blkg: blkg to put
+ */
+static inline void blkg_put(struct blkcg_gq *blkg)
+{
+	percpu_ref_put(&blkg->refcnt);
+}
+
+/**
+ * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
+ * read locked.  If called under either blkcg or queue lock, the iteration
+ * is guaranteed to include all and only online blkgs.  The caller may
+ * update @pos_css by calling css_rightmost_descendant() to skip subtree.
+ * @p_blkg is included in the iteration and the first node to be visited.
+ */
+#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
+					      (p_blkg)->q, false)))
+
+/**
+ * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_css: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Similar to blkg_for_each_descendant_pre() but performs post-order
+ * traversal instead.  Synchronization rules are the same.  @p_blkg is
+ * included in the iteration and the last node to be visited.
+ */
+#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
+	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
+		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
+					      (p_blkg)->q, false)))
+
+bool __blkcg_punt_bio_submit(struct bio *bio);
+
+static inline bool blkcg_punt_bio_submit(struct bio *bio)
+{
+	if (bio->bi_opf & REQ_CGROUP_PUNT)
+		return __blkcg_punt_bio_submit(bio);
+	else
+		return false;
+}
+
+static inline void blkcg_bio_issue_init(struct bio *bio)
+{
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+}
+
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+		return;
+	if (atomic_add_return(1, &blkg->use_delay) == 1)
+		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	if (WARN_ON_ONCE(old < 0))
+		return 0;
+	if (old == 0)
+		return 0;
+
+	/*
+	 * We do this song and dance because we can race with somebody else
+	 * adding or removing delay.  If we just did an atomic_dec we'd end up
+	 * negative and we'd already be in trouble.  We need to subtract 1 and
+	 * then check to see if we were the last delay so we can drop the
+	 * congestion count on the cgroup.
+	 */
+	while (old) {
+		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+		if (cur == old)
+			break;
+		old = cur;
+	}
+
+	if (old == 0)
+		return 0;
+	if (old == 1)
+		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+	return 1;
+}
+
+/**
+ * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
+ * @blkg: target blkg
+ * @delay: delay duration in nsecs
+ *
+ * When enabled with this function, the delay is not decayed and must be
+ * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
+ * blkcg_[un]use_delay() and blkcg_add_delay() usages.
+ */
+static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	/* We only want 1 person setting the congestion count for this blkg. */
+	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
+		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+
+	atomic64_set(&blkg->delay_nsec, delay);
+}
+
+/**
+ * blkcg_clear_delay - Disable allocator delay mechanism
+ * @blkg: target blkg
+ *
+ * Disable use_delay mechanism. See blkcg_set_delay().
+ */
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	/* We only want 1 person clearing the congestion count for this blkg. */
+	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
+		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+void blk_cgroup_bio_start(struct bio *bio);
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+#else	/* CONFIG_BLK_CGROUP */
+
+struct blkg_policy_data {
+};
+
+struct blkcg_policy_data {
+};
+
+struct blkcg_policy {
+};
+
+#ifdef CONFIG_BLOCK
+
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
+static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
+static inline void blkcg_exit_queue(struct request_queue *q) { }
+static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
+static inline int blkcg_activate_policy(struct request_queue *q,
+					const struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_deactivate_policy(struct request_queue *q,
+					   const struct blkcg_policy *pol) { }
+
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
+
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol) { return NULL; }
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
+static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
+static inline void blkg_get(struct blkcg_gq *blkg) { }
+static inline void blkg_put(struct blkcg_gq *blkg) { }
+
+static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
+static inline void blk_cgroup_bio_start(struct bio *bio) { }
+
+#define blk_queue_for_each_rl(rl, q)	\
+	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
+
+#endif	/* CONFIG_BLOCK */
+#endif	/* CONFIG_BLK_CGROUP */
+
+#endif /* _BLK_CGROUP_PRIVATE_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index ff972b968f253..5a4a590416298 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,7 +34,6 @@
 #include <linux/delay.h>
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
 #include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
@@ -49,6 +48,7 @@
 #include "blk.h"
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
+#include "blk-cgroup.h"
 #include "blk-throttle.h"
 
 struct dentry *blk_debugfs_root;
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index c87aba8584c64..18c8eafe20b94 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -10,7 +10,6 @@
 #define pr_fmt(fmt) "blk-crypto-fallback: " fmt
 
 #include <crypto/skcipher.h>
-#include <linux/blk-cgroup.h>
 #include <linux/blk-crypto.h>
 #include <linux/blk-crypto-profile.h>
 #include <linux/blkdev.h>
@@ -20,6 +19,7 @@
 #include <linux/random.h>
 #include <linux/scatterlist.h>
 
+#include "blk-cgroup.h"
 #include "blk-crypto-internal.h"
 
 static unsigned int num_prealloc_bounce_pg = 32;
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 769b643942989..70a0a3d680a35 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -178,12 +178,12 @@
 #include <linux/time64.h>
 #include <linux/parser.h>
 #include <linux/sched/signal.h>
-#include <linux/blk-cgroup.h>
 #include <asm/local.h>
 #include <asm/local64.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 #include "blk-wbt.h"
+#include "blk-cgroup.h"
 
 #ifdef CONFIG_TRACEPOINTS
 
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 6593c7123b97e..010e658d44a82 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -74,9 +74,9 @@
 #include <linux/sched/signal.h>
 #include <trace/events/block.h>
 #include <linux/blk-mq.h>
-#include <linux/blk-cgroup.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
+#include "blk-cgroup.h"
 #include "blk.h"
 
 #define DEFAULT_SCALE_COOKIE 1000000U
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 2e7f10e1c03fc..79e797f5d1949 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -12,11 +12,11 @@
  *   Documentation/admin-guide/cgroup-v2.rst.
  */
 
-#include <linux/blk-cgroup.h>
 #include <linux/blk-mq.h>
 #include <linux/blk_types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include "blk-cgroup.h"
 #include "blk-ioprio.h"
 #include "blk-rq-qos.h"
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9f32882ceb2f6..4c6b7dff71e5b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -10,7 +10,6 @@
 #include <linux/backing-dev.h>
 #include <linux/blktrace_api.h>
 #include <linux/blk-mq.h>
-#include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
 
 #include "blk.h"
@@ -18,6 +17,7 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-sched.h"
 #include "blk-wbt.h"
+#include "blk-cgroup.h"
 #include "blk-throttle.h"
 
 struct queue_sysfs_entry {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 7c462c006b269..73640d80e99ec 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,7 +10,6 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/blktrace_api.h>
-#include <linux/blk-cgroup.h>
 #include "blk.h"
 #include "blk-cgroup-rwstat.h"
 #include "blk-stat.h"
diff --git a/block/bounce.c b/block/bounce.c
index 3fd3bc6fd5dbb..3d50d19cde72a 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -14,7 +14,6 @@
 #include <linux/pagemap.h>
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
-#include <linux/blk-cgroup.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/hash.h>
@@ -24,6 +23,7 @@
 
 #include <trace/events/block.h>
 #include "blk.h"
+#include "blk-cgroup.h"
 
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
diff --git a/block/elevator.c b/block/elevator.c
index ec98aed39c4f5..6847ab6e7aa50 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -35,7 +35,6 @@
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
-#include <linux/blk-cgroup.h>
 
 #include <trace/events/block.h>
 
@@ -44,6 +43,7 @@
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
 #include "blk-wbt.h"
+#include "blk-cgroup.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index bdc49bd4eef02..f2ad8ed8f777c 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -25,12 +25,8 @@
 #include <linux/kthread.h>
 #include <linux/fs.h>
 
-/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
-#define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
-
 #define FC_APPID_LEN              129
 
-
 #ifdef CONFIG_BLK_CGROUP
 
 enum blkg_iostat_type {
@@ -42,6 +38,7 @@ enum blkg_iostat_type {
 };
 
 struct blkcg_gq;
+struct blkg_policy_data;
 
 struct blkcg {
 	struct cgroup_subsys_state	css;
@@ -74,36 +71,6 @@ struct blkg_iostat_set {
 	struct blkg_iostat		last;
 };
 
-/*
- * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
- * request_queue (q).  This is used by blkcg policies which need to track
- * information per blkcg - q pair.
- *
- * There can be multiple active blkcg policies and each blkg:policy pair is
- * represented by a blkg_policy_data which is allocated and freed by each
- * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
- * area by allocating larger data structure which embeds blkg_policy_data
- * at the beginning.
- */
-struct blkg_policy_data {
-	/* the blkg and policy id this per-policy data belongs to */
-	struct blkcg_gq			*blkg;
-	int				plid;
-};
-
-/*
- * Policies that need to keep per-blkcg data which is independent from any
- * request_queue associated to it should implement cpd_alloc/free_fn()
- * methods.  A policy can allocate private data area by allocating larger
- * data structure which embeds blkcg_policy_data at the beginning.
- * cpd_init() is invoked to let each policy handle per-blkcg data.
- */
-struct blkcg_policy_data {
-	/* the blkcg and policy id this per-policy data belongs to */
-	struct blkcg			*blkcg;
-	int				plid;
-};
-
 /* association between a blk cgroup and a request queue */
 struct blkcg_gq {
 	/* Pointer to the associated request_queue */
@@ -139,120 +106,17 @@ struct blkcg_gq {
 	struct rcu_head			rcu_head;
 };
 
-typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
-typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
-				struct request_queue *q, struct blkcg *blkcg);
-typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
-typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
-				struct seq_file *s);
-
-struct blkcg_policy {
-	int				plid;
-	/* cgroup files for the policy */
-	struct cftype			*dfl_cftypes;
-	struct cftype			*legacy_cftypes;
-
-	/* operations */
-	blkcg_pol_alloc_cpd_fn		*cpd_alloc_fn;
-	blkcg_pol_init_cpd_fn		*cpd_init_fn;
-	blkcg_pol_free_cpd_fn		*cpd_free_fn;
-	blkcg_pol_bind_cpd_fn		*cpd_bind_fn;
-
-	blkcg_pol_alloc_pd_fn		*pd_alloc_fn;
-	blkcg_pol_init_pd_fn		*pd_init_fn;
-	blkcg_pol_online_pd_fn		*pd_online_fn;
-	blkcg_pol_offline_pd_fn		*pd_offline_fn;
-	blkcg_pol_free_pd_fn		*pd_free_fn;
-	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
-	blkcg_pol_stat_pd_fn		*pd_stat_fn;
-};
-
-extern struct blkcg blkcg_root;
 extern struct cgroup_subsys_state * const blkcg_root_css;
-extern bool blkcg_debug_stats;
-
-struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
-				      struct request_queue *q, bool update_hint);
-int blkcg_init_queue(struct request_queue *q);
-void blkcg_exit_queue(struct request_queue *q);
-
-/* Blkio controller policy registration */
-int blkcg_policy_register(struct blkcg_policy *pol);
-void blkcg_policy_unregister(struct blkcg_policy *pol);
-int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkcg_policy *pol);
-void blkcg_deactivate_policy(struct request_queue *q,
-			     const struct blkcg_policy *pol);
-
-const char *blkg_dev_name(struct blkcg_gq *blkg);
-void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
-		       u64 (*prfill)(struct seq_file *,
-				     struct blkg_policy_data *, int),
-		       const struct blkcg_policy *pol, int data,
-		       bool show_total);
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
-
-struct blkg_conf_ctx {
-	struct block_device		*bdev;
-	struct blkcg_gq			*blkg;
-	char				*body;
-};
-
-struct block_device *blkcg_conf_open_bdev(char **inputp);
-int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
-		   char *input, struct blkg_conf_ctx *ctx);
-void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-	struct cgroup_subsys_state *css;
-
-	css = kthread_blkcg();
-	if (css)
-		return css;
-	return task_css(current, io_cgrp_id);
-}
+void blkcg_destroy_blkgs(struct blkcg *blkcg);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use.  The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio.  This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it.  However, the latter potentially gets
- * it from task_css().  This can race against task migration and the cgroup
- * dying.  It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return css_to_blkcg(blkcg_css());
-}
-
 /**
  * bio_blkcg - grab the blkcg associated with a bio
  * @bio: target bio
@@ -288,22 +152,6 @@ static inline bool blk_cgroup_congested(void)
 	return ret;
 }
 
-/**
- * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
- * @return: true if this bio needs to be submitted with the root blkg context.
- *
- * In order to avoid priority inversions we sometimes need to issue a bio as if
- * it were attached to the root blkg, and then backcharge to the actual owning
- * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio.  Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
- * backcharging to the originating cgroup once the io is complete.
- */
-static inline bool bio_issue_as_root_blkg(struct bio *bio)
-{
-	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
-}
-
 /**
  * blkcg_parent - get the parent of a blkcg
  * @blkcg: blkcg of interest
@@ -315,96 +163,6 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 	return css_to_blkcg(blkcg->css.parent);
 }
 
-/**
- * __blkg_lookup - internal version of blkg_lookup()
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- * @update_hint: whether to update lookup hint with the result or not
- *
- * This is internal version and shouldn't be used by policy
- * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
- * @q's bypass state.  If @update_hint is %true, the caller should be
- * holding @q->queue_lock and lookup hint is updated on success.
- */
-static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
-					     struct request_queue *q,
-					     bool update_hint)
-{
-	struct blkcg_gq *blkg;
-
-	if (blkcg == &blkcg_root)
-		return q->root_blkg;
-
-	blkg = rcu_dereference(blkcg->blkg_hint);
-	if (blkg && blkg->q == q)
-		return blkg;
-
-	return blkg_lookup_slowpath(blkcg, q, update_hint);
-}
-
-/**
- * blkg_lookup - lookup blkg for the specified blkcg - q pair
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- *
- * Lookup blkg for the @blkcg - @q pair.  This function should be called
- * under RCU read lock.
- */
-static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
-					   struct request_queue *q)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	return __blkg_lookup(blkcg, q, false);
-}
-
-/**
- * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
- * @q: request_queue of interest
- *
- * Lookup blkg for @q at the root level. See also blkg_lookup().
- */
-static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
-{
-	return q->root_blkg;
-}
-
-/**
- * blkg_to_pdata - get policy private data
- * @blkg: blkg of interest
- * @pol: policy of interest
- *
- * Return pointer to private data associated with the @blkg-@pol pair.
- */
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-						  struct blkcg_policy *pol)
-{
-	return blkg ? blkg->pd[pol->plid] : NULL;
-}
-
-static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
-						     struct blkcg_policy *pol)
-{
-	return blkcg ? blkcg->cpd[pol->plid] : NULL;
-}
-
-/**
- * pdata_to_blkg - get blkg associated with policy private data
- * @pd: policy private data of interest
- *
- * @pd is policy private data.  Determine the blkg it's associated with.
- */
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
-{
-	return pd ? pd->blkg : NULL;
-}
-
-static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
-{
-	return cpd ? cpd->blkcg : NULL;
-}
-
-extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
-
 /**
  * blkcg_pin_online - pin online state
  * @blkcg: blkcg of interest
@@ -437,231 +195,24 @@ static inline void blkcg_unpin_online(struct blkcg *blkcg)
 	} while (blkcg);
 }
 
-/**
- * blkg_path - format cgroup path of blkg
- * @blkg: blkg of interest
- * @buf: target buffer
- * @buflen: target buffer length
- *
- * Format the path of the cgroup of @blkg into @buf.
- */
-static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
-{
-	return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-}
-
-/**
- * blkg_get - get a blkg reference
- * @blkg: blkg to get
- *
- * The caller should be holding an existing reference.
- */
-static inline void blkg_get(struct blkcg_gq *blkg)
-{
-	percpu_ref_get(&blkg->refcnt);
-}
-
-/**
- * blkg_tryget - try and get a blkg reference
- * @blkg: blkg to get
- *
- * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
- * of freeing this blkg, so we can only use it if the refcnt is not zero.
- */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
-{
-	return blkg && percpu_ref_tryget(&blkg->refcnt);
-}
-
-/**
- * blkg_put - put a blkg reference
- * @blkg: blkg to put
- */
-static inline void blkg_put(struct blkcg_gq *blkg)
-{
-	percpu_ref_put(&blkg->refcnt);
-}
-
-/**
- * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
- * read locked.  If called under either blkcg or queue lock, the iteration
- * is guaranteed to include all and only online blkgs.  The caller may
- * update @pos_css by calling css_rightmost_descendant() to skip subtree.
- * @p_blkg is included in the iteration and the first node to be visited.
- */
-#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)		\
-	css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)	\
-		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
-					      (p_blkg)->q, false)))
-
-/**
- * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead.  Synchronization rules are the same.  @p_blkg is
- * included in the iteration and the last node to be visited.
- */
-#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)		\
-	css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)	\
-		if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),	\
-					      (p_blkg)->q, false)))
-
-bool __blkcg_punt_bio_submit(struct bio *bio);
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio)
-{
-	if (bio->bi_opf & REQ_CGROUP_PUNT)
-		return __blkcg_punt_bio_submit(bio);
-	else
-		return false;
-}
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
-static inline void blkcg_use_delay(struct blkcg_gq *blkg)
-{
-	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
-		return;
-	if (atomic_add_return(1, &blkg->use_delay) == 1)
-		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
-}
-
-static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
-{
-	int old = atomic_read(&blkg->use_delay);
-
-	if (WARN_ON_ONCE(old < 0))
-		return 0;
-	if (old == 0)
-		return 0;
-
-	/*
-	 * We do this song and dance because we can race with somebody else
-	 * adding or removing delay.  If we just did an atomic_dec we'd end up
-	 * negative and we'd already be in trouble.  We need to subtract 1 and
-	 * then check to see if we were the last delay so we can drop the
-	 * congestion count on the cgroup.
-	 */
-	while (old) {
-		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
-		if (cur == old)
-			break;
-		old = cur;
-	}
-
-	if (old == 0)
-		return 0;
-	if (old == 1)
-		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
-	return 1;
-}
-
-/**
- * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
- * @blkg: target blkg
- * @delay: delay duration in nsecs
- *
- * When enabled with this function, the delay is not decayed and must be
- * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
- * blkcg_[un]use_delay() and blkcg_add_delay() usages.
- */
-static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
-{
-	int old = atomic_read(&blkg->use_delay);
-
-	/* We only want 1 person setting the congestion count for this blkg. */
-	if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
-		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
-
-	atomic64_set(&blkg->delay_nsec, delay);
-}
-
-/**
- * blkcg_clear_delay - Disable allocator delay mechanism
- * @blkg: target blkg
- *
- * Disable use_delay mechanism. See blkcg_set_delay().
- */
-static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
-{
-	int old = atomic_read(&blkg->use_delay);
-
-	/* We only want 1 person clearing the congestion count for this blkg. */
-	if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
-		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
-}
-
-void blk_cgroup_bio_start(struct bio *bio);
-void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
-void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
-void blkcg_maybe_throttle_current(void);
 #else	/* CONFIG_BLK_CGROUP */
 
 struct blkcg {
 };
 
-struct blkg_policy_data {
-};
-
-struct blkcg_policy_data {
-};
-
 struct blkcg_gq {
 };
 
-struct blkcg_policy {
-};
-
 #define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
 static inline void blkcg_maybe_throttle_current(void) { }
 static inline bool blk_cgroup_congested(void) { return false; }
 
 #ifdef CONFIG_BLOCK
-
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
-
-static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
-{ return NULL; }
-static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
-static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
-static inline int blkcg_activate_policy(struct request_queue *q,
-					const struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_deactivate_policy(struct request_queue *q,
-					   const struct blkcg_policy *pol) { }
-
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
+#endif /* CONFIG_BLOCK */
 
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-						  struct blkcg_policy *pol) { return NULL; }
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
-static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
-static inline void blkg_get(struct blkcg_gq *blkg) { }
-static inline void blkg_put(struct blkcg_gq *blkg) { }
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
-static inline void blk_cgroup_bio_start(struct bio *bio) { }
-
-#define blk_queue_for_each_rl(rl, q)	\
-	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-
-#endif	/* CONFIG_BLOCK */
 #endif	/* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_BLK_CGROUP_FC_APPID
-- 
GitLab


From 2d03861e0d1d1ee81efc59338101cdd86a7474f6 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:39 -0800
Subject: [PATCH 0351/1586] selftests/sgx: Fix NULL-pointer-dereference upon
 early test failure

== Background ==

The SGX selftests track parts of the enclave binaries in an array:
encl->segment_tbl[]. That array is dynamically allocated early
(but not first) in the test's lifetime. The array is referenced
at the end of the test in encl_delete().

== Problem ==

encl->segment_tbl[] can be NULL if the test fails before its
allocation. That leads to a NULL-pointer-dereference in encl_delete().
This is triggered during early failures of the selftest like if the
enclave binary ("test_encl.elf") is deleted.

== Solution ==

Ensure encl->segment_tbl[] is valid before attempting to access
its members. The offset with which it is accessed, encl->nr_segments,
is initialized before encl->segment_tbl[] and thus considered valid
to use after the encl->segment_tbl[] check succeeds.

Fixes: 3200505d4de6 ("selftests/sgx: Create a heap for the test enclave")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/90a31dfd640ea756fa324712e7cbab4a90fa7518.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/load.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d4322c946e2b..006b464c8fc94 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,7 +21,7 @@
 
 void encl_delete(struct encl *encl)
 {
-	struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+	struct encl_segment *heap_seg;
 
 	if (encl->encl_base)
 		munmap((void *)encl->encl_base, encl->encl_size);
@@ -32,10 +32,11 @@ void encl_delete(struct encl *encl)
 	if (encl->fd)
 		close(encl->fd);
 
-	munmap(heap_seg->src, heap_seg->size);
-
-	if (encl->segment_tbl)
+	if (encl->segment_tbl) {
+		heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+		munmap(heap_seg->src, heap_seg->size);
 		free(encl->segment_tbl);
+	}
 
 	memset(encl, 0, sizeof(*encl));
 }
-- 
GitLab


From fff36bcbfde1126f6b81cb8ee12a58aada17ca29 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:40 -0800
Subject: [PATCH 0352/1586] selftests/sgx: Do not attempt enclave build without
 valid enclave

It is not possible to build an enclave if it was not possible to load
the binary from which it should be constructed. Do not attempt
to make further progress but instead return with failure. A
"return false" from setup_test_encl() is expected to trip an
ASSERT_TRUE() and abort the rest of the test.

Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/e3778c77f95e6dca348c732b12f155051d2899b4.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 370c4995f7c4a..a7cd2c3e6f7e1 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -147,6 +147,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	if (!encl_load("test_encl.elf", encl, heap_size)) {
 		encl_delete(encl);
 		TH_LOG("Failed to load the test enclave.\n");
+		return false;
 	}
 
 	if (!encl_measure(encl))
-- 
GitLab


From 2db703fc3b15e7ef68c82eca613a3c00d43d70af Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:41 -0800
Subject: [PATCH 0353/1586] selftests/sgx: Ensure enclave data available during
 debug print

In support of debugging the SGX tests print details from
the enclave and its memory mappings if any failure is encountered
during enclave loading.

When a failure is encountered no data is printed because the
printing of the data is preceded by cleanup of the data.

Move the data cleanup after the data print.

Fixes: 147172148909 ("selftests/sgx: Dump segments and /proc/self/maps only on failure")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/dab672f771e9b99e50c17ae2a75dc0b020cb0ce9.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index a7cd2c3e6f7e1..b0bd95a4730d5 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -186,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	return true;
 
 err:
-	encl_delete(encl);
-
 	for (i = 0; i < encl->nr_segments; i++) {
 		seg = &encl->segment_tbl[i];
 
@@ -208,6 +206,8 @@ err:
 
 	TH_LOG("Failed to initialize the test enclave.\n");
 
+	encl_delete(encl);
+
 	return false;
 }
 
-- 
GitLab


From 5626de65f97ae152e6dafdc528a36c1cbb7146ee Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:42 -0800
Subject: [PATCH 0354/1586] selftests/sgx: Remove extra newlines in test output

The TH_LOG() macro is an optional debug logging function made
available by kselftest itself. When TH_LOG_ENABLED is set it
prints the provided message with additional information and
formatting that already includes a newline.

Providing a newline to the message printed by TH_LOG() results
in a double newline that produces irregular test output.

Remove the unnecessary newlines from the text provided to
TH_LOG().

Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/6fd171ba622aed172a7c5b129d34d50bd0482f24.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index b0bd95a4730d5..dd74fa42302e0 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -146,7 +146,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 
 	if (!encl_load("test_encl.elf", encl, heap_size)) {
 		encl_delete(encl);
-		TH_LOG("Failed to load the test enclave.\n");
+		TH_LOG("Failed to load the test enclave.");
 		return false;
 	}
 
@@ -204,7 +204,7 @@ err:
 		fclose(maps_file);
 	}
 
-	TH_LOG("Failed to initialize the test enclave.\n");
+	TH_LOG("Failed to initialize the test enclave.");
 
 	encl_delete(encl);
 
-- 
GitLab


From 2e2f0199a20780463945e1cbffb3a191fa84bd9d Mon Sep 17 00:00:00 2001
From: Barry Song <21cnbao@gmail.com>
Date: Mon, 7 Feb 2022 15:49:31 +0800
Subject: [PATCH 0355/1586] docs: block: biodoc.rst: Drop the obsolete and
 incorrect content

Since commit 7eaceaccab5f ("block: remove per-queue plugging"), kernel
has removed blk_run_address_space(), blk_unplug() and sync_buffer(),
and moved to on-stack plugging. The document has been obsolete for
years.
Given that there is no obvious counterparts in the new mechinism to
replace old APIs, this patch drops the content directly.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Link: https://lore.kernel.org/r/20220207074931.20067-1-song.bao.hua@hisilicon.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index 4fbc367e62f95..fdebc0fd8468f 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -965,11 +965,7 @@ till it fills up with a few more requests, before starting to service
 the requests. This provides an opportunity to merge/sort the requests before
 passing them down to the device. There are various conditions when the queue is
 unplugged (to open up the flow again), either through a scheduled task or
-could be on demand. For example wait_on_buffer sets the unplugging going
-through sync_buffer() running blk_run_address_space(mapping). Or the caller
-can do it explicity through blk_unplug(bdev). So in the read case,
-the queue gets explicitly unplugged as part of waiting for completion on that
-buffer.
+could be on demand.
 
 Aside:
   This is kind of controversial territory, as it's not clear if plugging is
-- 
GitLab


From 5f117033243488a0080f837540c27999aa31870e Mon Sep 17 00:00:00 2001
From: Marco Bonelli <marco@mebeim.net>
Date: Fri, 11 Feb 2022 17:23:50 +0100
Subject: [PATCH 0356/1586] x86/head64: Add missing __head annotation to
 sme_postprocess_startup()

This function was previously part of __startup_64() which is marked
__head, and is currently only called from there. Mark it __head too.

Signed-off-by: Marco Bonelli <marco@mebeim.net>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220211162350.11780-1-marco@mebeim.net
---
 arch/x86/kernel/head64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de563db9cdcd2..4f5ecbbaae77c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr)
 }
 #endif
 
-static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
+static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd)
 {
 	unsigned long vaddr, vaddr_end;
 	int i;
-- 
GitLab


From 47c8ebcce85ed7113e9e3e3f1d8c6374fa87848e Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 27 Jan 2022 13:31:43 -0800
Subject: [PATCH 0357/1586] f2fs: add a way to limit roll forward recovery time

This adds a sysfs entry to call checkpoint during fsync() in order to avoid
long elapsed time to run roll-forward recovery when booting the device.
Default value doesn't enforce the limitation which is same as before.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 fs/f2fs/checkpoint.c                    |  1 +
 fs/f2fs/debug.c                         |  3 +++
 fs/f2fs/f2fs.h                          |  3 +++
 fs/f2fs/node.c                          |  2 ++
 fs/f2fs/node.h                          |  3 +++
 fs/f2fs/recovery.c                      |  4 ++++
 fs/f2fs/super.c                         | 14 ++++++++++++--
 fs/f2fs/sysfs.c                         |  2 ++
 9 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 7b50bf82f14dd..58bf0dc83712d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -568,3 +568,9 @@ Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	You can set the trial count limit for GC urgent high mode with this value.
 		If GC thread gets to the limit, the mode will turn back to GC normal mode.
 		By default, the value is zero, which means there is no limit like before.
+
+What:		/sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
+Date:		January 2022
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Controls max # of node block writes to be used for roll forward
+		recovery. This can limit the roll forward recovery time.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a13b6b4af220a..203a1577942d3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1547,6 +1547,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	/* update user_block_counts */
 	sbi->last_valid_block_count = sbi->total_valid_block_count;
 	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
+	percpu_counter_set(&sbi->rf_node_block_count, 0);
 
 	/* Here, we have one bio having CP pack except cp pack 2 page */
 	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8c50518475a99..9a13902c77026 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -532,6 +532,9 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->ndirty_meta, si->meta_pages);
 		seq_printf(s, "  - imeta: %4d\n",
 			   si->ndirty_imeta);
+		seq_printf(s, "  - fsync mark: %4lld\n",
+			   percpu_counter_sum_positive(
+					&si->sbi->rf_node_block_count));
 		seq_printf(s, "  - NATs: %9d/%9d\n  - SITs: %9d/%9d\n",
 			   si->dirty_nats, si->nats, si->dirty_sits, si->sits);
 		seq_printf(s, "  - free_nids: %9d/%9d\n  - alloc_nids: %9d\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3b4bf1c3f1ed6..c9515c3c54fd5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -917,6 +917,7 @@ struct f2fs_nm_info {
 	nid_t max_nid;			/* maximum possible node ids */
 	nid_t available_nids;		/* # of available node ids */
 	nid_t next_scan_nid;		/* the next nid to be scanned */
+	nid_t max_rf_node_blocks;	/* max # of nodes for recovery */
 	unsigned int ram_thresh;	/* control the memory footprint */
 	unsigned int ra_nid_pages;	/* # of nid pages to be readaheaded */
 	unsigned int dirty_nats_ratio;	/* control dirty nats ratio threshold */
@@ -1688,6 +1689,8 @@ struct f2fs_sb_info {
 	atomic_t nr_pages[NR_COUNT_TYPE];
 	/* # of allocated blocks */
 	struct percpu_counter alloc_valid_block_count;
+	/* # of node block writes as roll forward recovery */
+	struct percpu_counter rf_node_block_count;
 
 	/* writeback control */
 	atomic_t wb_sync_req[META];	/* count # of WB_SYNC threads */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 93512f8859d5d..0d98834575797 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1782,6 +1782,7 @@ continue_unlock:
 
 			if (!atomic || page == last_page) {
 				set_fsync_mark(page, 1);
+				percpu_counter_inc(&sbi->rf_node_block_count);
 				if (IS_INODE(page)) {
 					if (is_inode_flag_set(inode,
 								FI_DIRTY_INODE))
@@ -3218,6 +3219,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
 	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
 	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
+	nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS;
 
 	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
 	INIT_LIST_HEAD(&nm_i->free_nid_list);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 18b98cf0465b8..4c1d34bfea781 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -31,6 +31,9 @@
 /* control total # of nats */
 #define DEF_NAT_CACHE_THRESHOLD			100000
 
+/* control total # of node writes used for roll-fowrad recovery */
+#define DEF_RF_NODE_BLOCKS			0
+
 /* vector size for gang look-up from nat cache that consists of radix tree */
 #define NATVEC_SIZE	64
 #define SETVEC_SIZE	32
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2af503f75b4fa..ab33e474af070 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -56,6 +56,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
 
 	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
 		return false;
+	if (NM_I(sbi)->max_rf_node_blocks &&
+		percpu_counter_sum_positive(&sbi->rf_node_block_count) >=
+						NM_I(sbi)->max_rf_node_blocks)
+		return false;
 	return true;
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 806836184ebc9..f816d7d1987d9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1501,8 +1501,9 @@ static void f2fs_free_inode(struct inode *inode)
 
 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
 {
-	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
+	percpu_counter_destroy(&sbi->rf_node_block_count);
+	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 }
 
 static void destroy_device_list(struct f2fs_sb_info *sbi)
@@ -3619,11 +3620,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL);
+	if (err)
+		goto err_valid_block;
+
 	err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
 								GFP_KERNEL);
 	if (err)
-		percpu_counter_destroy(&sbi->alloc_valid_block_count);
+		goto err_node_block;
+	return 0;
 
+err_node_block:
+	percpu_counter_destroy(&sbi->rf_node_block_count);
+err_valid_block:
+	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	return err;
 }
 
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 281bc0133ee6f..47efcf233afdd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -732,6 +732,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
@@ -855,6 +856,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(ram_thresh),
 	ATTR_LIST(ra_nid_pages),
 	ATTR_LIST(dirty_nats_ratio),
+	ATTR_LIST(max_roll_forward_node_blocks),
 	ATTR_LIST(cp_interval),
 	ATTR_LIST(idle_interval),
 	ATTR_LIST(discard_idle_interval),
-- 
GitLab


From 984fc4e76d63345499f01c0c198a4b44860cf027 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 4 Feb 2022 13:24:56 +0800
Subject: [PATCH 0358/1586] f2fs: support idmapped mounts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch enables idmapped mounts for f2fs, since all dedicated helpers
for this functionality existsm, so, in this patch we just pass down the
user_namespace argument from the VFS methods to the relevant helpers.

Simple idmap example on f2fs image:

1. truncate -s 128M f2fs.img
2. mkfs.f2fs f2fs.img
3. mount f2fs.img /mnt/f2fs/
4. touch /mnt/f2fs/file

5. ls -ln /mnt/f2fs/
total 0
-rw-r--r-- 1 0 0 0 2月   4 13:17 file

6. ./mount-idmapped --map-mount b:0:1001:1 /mnt/f2fs/ /mnt/scratch_f2fs/

7. ls -ln /mnt/scratch_f2fs/
total 0
-rw-r--r-- 1 1001 1001 0 2月   4 13:17 file

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/acl.c   | 21 ++++++++++++---------
 fs/f2fs/file.c  | 23 ++++++++++++++---------
 fs/f2fs/namei.c | 41 +++++++++++++++++++++++------------------
 fs/f2fs/super.c |  2 +-
 4 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 16e826e01f095..eaa240b21f071 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -204,8 +204,9 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu)
 	return __f2fs_get_acl(inode, type, NULL);
 }
 
-static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
-			  struct posix_acl **acl)
+static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
+				struct inode *inode, umode_t *mode_p,
+				struct posix_acl **acl)
 {
 	umode_t mode = inode->i_mode;
 	int error;
@@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
 		return error;
 	if (error == 0)
 		*acl = NULL;
-	if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
-	    !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
+	if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+	    !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
 		mode &= ~S_ISGID;
 	*mode_p = mode;
 	return 0;
 }
 
-static int __f2fs_set_acl(struct inode *inode, int type,
+static int __f2fs_set_acl(struct user_namespace *mnt_userns,
+			struct inode *inode, int type,
 			struct posix_acl *acl, struct page *ipage)
 {
 	int name_index;
@@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	case ACL_TYPE_ACCESS:
 		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
 		if (acl && !ipage) {
-			error = f2fs_acl_update_mode(inode, &mode, &acl);
+			error = f2fs_acl_update_mode(mnt_userns, inode,
+								&mode, &acl);
 			if (error)
 				return error;
 			set_acl_inode(inode, mode);
@@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
 		return -EIO;
 
-	return __f2fs_set_acl(inode, type, acl, NULL);
+	return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL);
 }
 
 /*
@@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
 	f2fs_mark_inode_dirty_sync(inode, true);
 
 	if (default_acl) {
-		error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
+		error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl,
 				       ipage);
 		posix_acl_release(default_acl);
 	} else {
@@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
 	}
 	if (acl) {
 		if (!error)
-			error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
+			error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl,
 					       ipage);
 		posix_acl_release(acl);
 	} else {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 42fbdcf0ccc9b..cfdc41f87f5de 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -844,7 +844,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 				  STATX_ATTR_NODUMP |
 				  STATX_ATTR_VERITY);
 
-	generic_fillattr(&init_user_ns, inode, stat);
+	generic_fillattr(mnt_userns, inode, stat);
 
 	/* we need to show initial sectors used for inline_data/dentries */
 	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -904,7 +904,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		!f2fs_is_compress_backend_ready(inode))
 		return -EOPNOTSUPP;
 
-	err = setattr_prepare(&init_user_ns, dentry, attr);
+	err = setattr_prepare(mnt_userns, dentry, attr);
 	if (err)
 		return err;
 
@@ -980,10 +980,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		spin_unlock(&F2FS_I(inode)->i_size_lock);
 	}
 
-	__setattr_copy(&init_user_ns, inode, attr);
+	__setattr_copy(mnt_userns, inode, attr);
 
 	if (attr->ia_valid & ATTR_MODE) {
-		err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode));
+		err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode));
 
 		if (is_inode_flag_set(inode, FI_ACL_MODE)) {
 			if (!err)
@@ -1989,11 +1989,12 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
 static int f2fs_ioc_start_atomic_write(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
+	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	int ret;
 
-	if (!inode_owner_or_capable(&init_user_ns, inode))
+	if (!inode_owner_or_capable(mnt_userns, inode))
 		return -EACCES;
 
 	if (!S_ISREG(inode->i_mode))
@@ -2058,9 +2059,10 @@ out:
 static int f2fs_ioc_commit_atomic_write(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
+	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
 	int ret;
 
-	if (!inode_owner_or_capable(&init_user_ns, inode))
+	if (!inode_owner_or_capable(mnt_userns, inode))
 		return -EACCES;
 
 	ret = mnt_want_write_file(filp);
@@ -2100,9 +2102,10 @@ err_out:
 static int f2fs_ioc_start_volatile_write(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
+	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
 	int ret;
 
-	if (!inode_owner_or_capable(&init_user_ns, inode))
+	if (!inode_owner_or_capable(mnt_userns, inode))
 		return -EACCES;
 
 	if (!S_ISREG(inode->i_mode))
@@ -2135,9 +2138,10 @@ out:
 static int f2fs_ioc_release_volatile_write(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
+	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
 	int ret;
 
-	if (!inode_owner_or_capable(&init_user_ns, inode))
+	if (!inode_owner_or_capable(mnt_userns, inode))
 		return -EACCES;
 
 	ret = mnt_want_write_file(filp);
@@ -2164,9 +2168,10 @@ out:
 static int f2fs_ioc_abort_volatile_write(struct file *filp)
 {
 	struct inode *inode = file_inode(filp);
+	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
 	int ret;
 
-	if (!inode_owner_or_capable(&init_user_ns, inode))
+	if (!inode_owner_or_capable(mnt_userns, inode))
 		return -EACCES;
 
 	ret = mnt_want_write_file(filp);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0347c5780910a..2b23a76bdae92 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -22,7 +22,8 @@
 #include "acl.h"
 #include <trace/events/f2fs.h>
 
-static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
+						struct inode *dir, umode_t mode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	nid_t ino;
@@ -46,7 +47,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 
 	nid_free = true;
 
-	inode_init_owner(&init_user_ns, inode, dir, mode);
+	inode_init_owner(mnt_userns, inode, dir, mode);
 
 	inode->i_ino = ino;
 	inode->i_blocks = 0;
@@ -67,7 +68,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 		(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
 		F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
 	else
-		F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
+		F2FS_I(inode)->i_projid = make_kprojid(mnt_userns,
 							F2FS_DEF_PROJID);
 
 	err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
@@ -349,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -679,7 +680,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+	inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -750,7 +751,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(dir, S_IFDIR | mode);
+	inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -807,7 +808,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -834,8 +835,9 @@ out:
 	return err;
 }
 
-static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
-					umode_t mode, struct inode **whiteout)
+static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+					struct dentry *dentry, umode_t mode,
+					struct inode **whiteout)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	struct inode *inode;
@@ -845,7 +847,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -909,20 +911,22 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
 	if (!f2fs_is_checkpoint_ready(sbi))
 		return -ENOSPC;
 
-	return __f2fs_tmpfile(dir, dentry, mode, NULL);
+	return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL);
 }
 
-static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
+static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
+				struct inode *dir, struct inode **whiteout)
 {
 	if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
 		return -EIO;
 
-	return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
+	return __f2fs_tmpfile(mnt_userns, dir, NULL,
+				S_IFCHR | WHITEOUT_MODE, whiteout);
 }
 
-static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
-			struct inode *new_dir, struct dentry *new_dentry,
-			unsigned int flags)
+static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+			struct dentry *old_dentry, struct inode *new_dir,
+			struct dentry *new_dentry, unsigned int flags)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
 	struct inode *old_inode = d_inode(old_dentry);
@@ -960,7 +964,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	if (flags & RENAME_WHITEOUT) {
-		err = f2fs_create_whiteout(old_dir, &whiteout);
+		err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout);
 		if (err)
 			return err;
 	}
@@ -1300,7 +1304,8 @@ static int f2fs_rename2(struct user_namespace *mnt_userns,
 	 * VFS has already handled the new dentry existence case,
 	 * here, we just deal with "RENAME_NOREPLACE" as regular rename.
 	 */
-	return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+	return f2fs_rename(mnt_userns, old_dir, old_dentry,
+					new_dir, new_dentry, flags);
 }
 
 static const char *f2fs_encrypted_get_link(struct dentry *dentry,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f816d7d1987d9..22fb4d3b11702 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4539,7 +4539,7 @@ static struct file_system_type f2fs_fs_type = {
 	.name		= "f2fs",
 	.mount		= f2fs_mount,
 	.kill_sb	= kill_f2fs_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("f2fs");
 
-- 
GitLab


From 73ab4a3509e6b8f93b87398db2aaabd3c9cbe487 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 18 Aug 2021 08:03:08 -0700
Subject: [PATCH 0359/1586] KVM: x86: Replace memset() "optimization" with
 normal per-field writes

Explicitly zero select fields in the emulator's decode cache instead of
zeroing the fields via a gross memset() that spans six fields. gcc and
clang are both clever enough to batch the first five fields into a single
quadword MOV, i.e. memset() and individually zeroing generate identical
code.

Removing the wart also prepares KVM for FORTIFY_SOURCE performing
compile-time and run-time field bounds checking for memset().

No functional change intended.

Reported-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/lkml/YR0jIEzEcUom/7rd@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/kvm/emulate.c     | 9 +++++++--
 arch/x86/kvm/kvm_emulate.h | 6 +-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5719d8cfdbd90..6b820cc2b51b9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5380,8 +5380,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
 
 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
-	memset(&ctxt->rip_relative, 0,
-	       (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
+	/* Clear fields that are set conditionally but read without a guard. */
+	ctxt->rip_relative = false;
+	ctxt->rex_prefix = 0;
+	ctxt->lock_prefix = 0;
+	ctxt->rep_prefix = 0;
+	ctxt->regs_valid = 0;
+	ctxt->regs_dirty = 0;
 
 	ctxt->io_read.pos = 0;
 	ctxt->io_read.end = 0;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 39eded2426ffd..840ddb4a93025 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -336,11 +336,7 @@ struct x86_emulate_ctxt {
 		fastop_t fop;
 	};
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
-	/*
-	 * The following six fields are cleared together,
-	 * the rest are initialized unconditionally in x86_decode_insn
-	 * or elsewhere
-	 */
+
 	bool rip_relative;
 	u8 rex_prefix;
 	u8 lock_prefix;
-- 
GitLab


From 9ed0a59c0cbb663ea09588322498499a937d08fa Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 24 May 2021 21:36:48 -0700
Subject: [PATCH 0360/1586] intel_th: msu: Use memset_startat() for clearing hw
 header

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memset(), avoid intentionally writing across
neighboring fields.

Use memset_startat() so memset() doesn't get confused about writing
beyond the destination member that is intended to be the starting point
of zeroing through the end of the struct.

Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Link: https://lore.kernel.org/lkml/87sfyzi97l.fsf@ashishki-desk.ger.corp.intel.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/hwtracing/intel_th/msu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 432ade0842f68..70a07b4e99673 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -658,13 +658,11 @@ static void msc_buffer_clear_hw_header(struct msc *msc)
 
 	list_for_each_entry(win, &msc->win_list, entry) {
 		unsigned int blk;
-		size_t hw_sz = sizeof(struct msc_block_desc) -
-			offsetof(struct msc_block_desc, hw_tag);
 
 		for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
 			struct msc_block_desc *bdesc = sg_virt(sg);
 
-			memset(&bdesc->hw_tag, 0, hw_sz);
+			memset_startat(bdesc, 0, hw_tag);
 		}
 	}
 }
-- 
GitLab


From 2a55550fbba643f41318bb12ab20e6510c7df231 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Feb 2022 14:48:34 -0800
Subject: [PATCH 0361/1586] m68k: cmpxchg: Dereference matching size

Similar to the recent arm64 fix[1], avoid overly wide casts in the m68k
cmpxchg implementation. Avoids this warning under -Warray-bounds with
GCC 11:

net/sched/cls_tcindex.c: In function 'tcindex_set_parms':
./arch/m68k/include/asm/cmpxchg.h:64:17: warning: array subscript 'volatile struct __xchg_dummy[0]' is partly outside array bounds of 'struct tcf_result[1]' [-Warray-bounds]
   64 |                 __asm__ __volatile__
      |                 ^~~~~~~
net/sched/cls_tcindex.c:338:27: note: while referencing 'cr'
  338 |         struct tcf_result cr = {};
      |                           ^~

No binary output difference are seen from this change.

[1] commit 3364c6ce23c6 ("arm64: atomics: lse: Dereference matching size")

Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/lkml/CAMuHMdVRrD+2zKoHxAaQdDuiK5JFDanbv0SJ91OdWfx+eyekPQ@mail.gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/m68k/include/asm/cmpxchg.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index e8ca4b0ccefaa..6cf464cdab067 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -4,8 +4,7 @@
 
 #include <linux/irqflags.h>
 
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+#define __xg(type, x) ((volatile type *)(x))
 
 extern unsigned long __invalid_xchg_size(unsigned long, volatile void *, int);
 
@@ -50,7 +49,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 			 "1:\n\t"
 			 "casb %0,%1,%2\n\t"
 			 "jne 1b"
-			 : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+			 : "=&d" (x) : "d" (x), "m" (*__xg(u8, ptr)) : "memory");
 		break;
 	case 2:
 		__asm__ __volatile__
@@ -58,7 +57,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 			 "1:\n\t"
 			 "casw %0,%1,%2\n\t"
 			 "jne 1b"
-			 : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+			 : "=&d" (x) : "d" (x), "m" (*__xg(u16, ptr)) : "memory");
 		break;
 	case 4:
 		__asm__ __volatile__
@@ -66,7 +65,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 			 "1:\n\t"
 			 "casl %0,%1,%2\n\t"
 			 "jne 1b"
-			 : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory");
+			 : "=&d" (x) : "d" (x), "m" (*__xg(u32, ptr)) : "memory");
 		break;
 	default:
 		x = __invalid_xchg_size(x, ptr, size);
-- 
GitLab


From 545c272232caded141abb273745091d6a0d8b14b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Feb 2022 14:13:15 -0800
Subject: [PATCH 0362/1586] alpha: Silence -Warray-bounds warnings

GCC 11 (incorrectly[1]) assumes that literal values cast to (void *)
should be treated like a NULL pointer with an offset, and raises
diagnostics when doing bounds checking:

In function '__memset',
    inlined from '__bad_pagetable' at arch/alpha/mm/init.c:79:2:
./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds]
   37 |                         return __builtin_memset(s, c, n);
      |                                ^~~~~~~~~~~~~~~~~~~~~~~~~
In function '__memset',
    inlined from '__bad_page' at arch/alpha/mm/init.c:86:2:
./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds]
   37 |                         return __builtin_memset(s, c, n);
      |                                ^~~~~~~~~~~~~~~~~~~~~~~~~
In function '__memset',
    inlined from 'paging_init' at arch/alpha/mm/init.c:256:2:
./arch/alpha/include/asm/string.h:37:32: error: '__builtin_memset' offset [0, 8191] is out of the bounds [0, 0] [-Werror=array-bounds]
   37 |                         return __builtin_memset(s, c, n);

This has been solved in other places[2] already by using the recently
added absolute_pointer() macro. Do the same here.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578
[2] https://lore.kernel.org/all/20210912160149.2227137-1-linux@roeck-us.net/

Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-alpha@vger.kernel.org
Reviewed-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/alpha/mm/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index f6114d03357c4..7511723b76693 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -76,14 +76,14 @@ pgd_alloc(struct mm_struct *mm)
 pmd_t *
 __bad_pagetable(void)
 {
-	memset((void *) EMPTY_PGT, 0, PAGE_SIZE);
+	memset(absolute_pointer(EMPTY_PGT), 0, PAGE_SIZE);
 	return (pmd_t *) EMPTY_PGT;
 }
 
 pte_t
 __bad_page(void)
 {
-	memset((void *) EMPTY_PGE, 0, PAGE_SIZE);
+	memset(absolute_pointer(EMPTY_PGE), 0, PAGE_SIZE);
 	return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED));
 }
 
@@ -253,7 +253,7 @@ void __init paging_init(void)
 	free_area_init(max_zone_pfn);
 
 	/* Initialize the kernel's ZERO_PGE. */
-	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+	memset(absolute_pointer(ZERO_PGE), 0, PAGE_SIZE);
 }
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
-- 
GitLab


From 2d2329787ba2e70eae330f1cecd61a9576fe65de Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 12 Feb 2022 06:31:44 -0800
Subject: [PATCH 0363/1586] regmap: irq: cleanup comments

Replace the second 'which' with 'the'.
Change 'acknowleding' to 'acknowledging'.

Signed-off-by: Tom Rix <trix@redhat.com>
Link: https://lore.kernel.org/r/20220212143144.2648689-1-trix@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index d5604f4972967..81f6356f05892 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -537,7 +537,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 
 	/*
 	 * Ignore masked IRQs and ack if we need to; we ack early so
-	 * there is no race between handling and acknowleding the
+	 * there is no race between handling and acknowledging the
 	 * interrupt.  We assume that typically few of the interrupts
 	 * will fire simultaneously so don't worry about overhead from
 	 * doing a write per register.
@@ -1082,7 +1082,7 @@ EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
 /**
  * devm_regmap_del_irq_chip() - Resource managed regmap_del_irq_chip()
  *
- * @dev: Device for which which resource was allocated.
+ * @dev: Device for which the resource was allocated.
  * @irq: Primary IRQ for the device.
  * @data: &regmap_irq_chip_data allocated by regmap_add_irq_chip().
  *
-- 
GitLab


From a94e5cd8457fb46866459562ef6c53f9dcc375f7 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Fri, 11 Feb 2022 14:24:49 +0100
Subject: [PATCH 0364/1586] regulator: tps62864: Fix bindings for SW property

The property is not a pattern, so it should use 'properties' instead of
'patternProperties'.  Also, unevaluatedProperties should be set to false
like in the other regulator bindings.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220211132449.2899924-1-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/ti,tps62864.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
index e3d739a37ab04..0f29c75f42ea6 100644
--- a/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62864.yaml
@@ -23,10 +23,11 @@ properties:
   regulators:
     type: object
 
-    patternProperties:
+    properties:
       "SW":
         type: object
         $ref: regulator.yaml#
+        unevaluatedProperties: false
 
     additionalProperties: false
 
-- 
GitLab


From 715bea3568e78b80b1d127d8452eac0e3cb6f299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Fri, 11 Feb 2022 11:31:53 -0300
Subject: [PATCH 0365/1586] spi: amd: Use iopoll for busy waiting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of implementing a custom IO busy wait function, just use
readl_poll_timeout().

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Link: https://lore.kernel.org/r/20220211143155.75513-2-andrealmeid@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index 4b3ac7aceaf62..899b8d90ff61b 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
+#include <linux/iopoll.h>
 
 #define AMD_SPI_CTRL0_REG	0x00
 #define AMD_SPI_EXEC_CMD	BIT(16)
@@ -103,16 +104,10 @@ static inline void amd_spi_set_tx_count(struct amd_spi *amd_spi, u8 tx_count)
 
 static int amd_spi_busy_wait(struct amd_spi *amd_spi)
 {
-	int timeout = 100000;
+	u32 val;
 
-	/* poll for SPI bus to become idle */
-	while (amd_spi_readreg32(amd_spi, AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) {
-		usleep_range(10, 20);
-		if (timeout-- < 0)
-			return -ETIMEDOUT;
-	}
-
-	return 0;
+	return readl_poll_timeout(amd_spi->io_remap_addr + AMD_SPI_CTRL0_REG,
+				  val, !(val & AMD_SPI_BUSY), 20, 2000000);
 }
 
 static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
-- 
GitLab


From fbc71367288cf1902822618e00c7c1cf6f35348d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Fri, 11 Feb 2022 11:31:54 -0300
Subject: [PATCH 0366/1586] spi: amd: Remove needless rom_addr variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rom_addr is not used in the code, so we can just drop it from struct
amd_spi.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Link: https://lore.kernel.org/r/20220211143155.75513-3-andrealmeid@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index 899b8d90ff61b..417ce14a21c63 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -38,7 +38,6 @@
 struct amd_spi {
 	void __iomem *io_remap_addr;
 	unsigned long io_base_addr;
-	u32 rom_addr;
 };
 
 static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx)
-- 
GitLab


From 209043554915d7c51ac112a668ad1a255e1bea61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Fri, 11 Feb 2022 11:31:55 -0300
Subject: [PATCH 0367/1586] spi: amd: Add support for version AMDI0062
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the AMD SPI controller version AMDI0062. Do this in a
modular way where's easy to add new versions.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Link: https://lore.kernel.org/r/20220211143155.75513-4-andrealmeid@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 81 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index 417ce14a21c63..d909afac6e219 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -19,6 +19,10 @@
 #define AMD_SPI_FIFO_CLEAR	BIT(20)
 #define AMD_SPI_BUSY		BIT(31)
 
+#define AMD_SPI_OPCODE_REG	0x45
+#define AMD_SPI_CMD_TRIGGER_REG	0x47
+#define AMD_SPI_TRIGGER_CMD	BIT(7)
+
 #define AMD_SPI_OPCODE_MASK	0xFF
 
 #define AMD_SPI_ALT_CS_REG	0x1D
@@ -35,9 +39,15 @@
 #define AMD_SPI_XFER_TX		1
 #define AMD_SPI_XFER_RX		2
 
+enum amd_spi_versions {
+	AMD_SPI_V1 = 1,	/* AMDI0061 */
+	AMD_SPI_V2,	/* AMDI0062 */
+};
+
 struct amd_spi {
 	void __iomem *io_remap_addr;
 	unsigned long io_base_addr;
+	enum amd_spi_versions version;
 };
 
 static inline u8 amd_spi_readreg8(struct amd_spi *amd_spi, int idx)
@@ -81,14 +91,29 @@ static void amd_spi_select_chip(struct amd_spi *amd_spi, u8 cs)
 	amd_spi_setclear_reg8(amd_spi, AMD_SPI_ALT_CS_REG, cs, AMD_SPI_ALT_CS_MASK);
 }
 
+static inline void amd_spi_clear_chip(struct amd_spi *amd_spi, u8 chip_select)
+{
+	amd_spi_writereg8(amd_spi, AMD_SPI_ALT_CS_REG, chip_select & ~AMD_SPI_ALT_CS_MASK);
+}
+
 static void amd_spi_clear_fifo_ptr(struct amd_spi *amd_spi)
 {
 	amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_FIFO_CLEAR, AMD_SPI_FIFO_CLEAR);
 }
 
-static void amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode)
+static int amd_spi_set_opcode(struct amd_spi *amd_spi, u8 cmd_opcode)
 {
-	amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode, AMD_SPI_OPCODE_MASK);
+	switch (amd_spi->version) {
+	case AMD_SPI_V1:
+		amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, cmd_opcode,
+				       AMD_SPI_OPCODE_MASK);
+		return 0;
+	case AMD_SPI_V2:
+		amd_spi_writereg8(amd_spi, AMD_SPI_OPCODE_REG, cmd_opcode);
+		return 0;
+	default:
+		return -ENODEV;
+	}
 }
 
 static inline void amd_spi_set_rx_count(struct amd_spi *amd_spi, u8 rx_count)
@@ -104,9 +129,21 @@ static inline void amd_spi_set_tx_count(struct amd_spi *amd_spi, u8 tx_count)
 static int amd_spi_busy_wait(struct amd_spi *amd_spi)
 {
 	u32 val;
+	int reg;
+
+	switch (amd_spi->version) {
+	case AMD_SPI_V1:
+		reg = AMD_SPI_CTRL0_REG;
+		break;
+	case AMD_SPI_V2:
+		reg = AMD_SPI_STATUS_REG;
+		break;
+	default:
+		return -ENODEV;
+	}
 
-	return readl_poll_timeout(amd_spi->io_remap_addr + AMD_SPI_CTRL0_REG,
-				  val, !(val & AMD_SPI_BUSY), 20, 2000000);
+	return readl_poll_timeout(amd_spi->io_remap_addr + reg, val,
+				  !(val & AMD_SPI_BUSY), 20, 2000000);
 }
 
 static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
@@ -117,10 +154,20 @@ static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
 	if (ret)
 		return ret;
 
-	/* Set ExecuteOpCode bit in the CTRL0 register */
-	amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD, AMD_SPI_EXEC_CMD);
-
-	return 0;
+	switch (amd_spi->version) {
+	case AMD_SPI_V1:
+		/* Set ExecuteOpCode bit in the CTRL0 register */
+		amd_spi_setclear_reg32(amd_spi, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD,
+				       AMD_SPI_EXEC_CMD);
+		return 0;
+	case AMD_SPI_V2:
+		/* Trigger the command execution */
+		amd_spi_setclear_reg8(amd_spi, AMD_SPI_CMD_TRIGGER_REG,
+				      AMD_SPI_TRIGGER_CMD, AMD_SPI_TRIGGER_CMD);
+		return 0;
+	default:
+		return -ENODEV;
+	}
 }
 
 static int amd_spi_master_setup(struct spi_device *spi)
@@ -190,6 +237,17 @@ static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi,
 	message->actual_length = tx_len + rx_len + 1;
 	/* complete the transaction */
 	message->status = 0;
+
+	switch (amd_spi->version) {
+	case AMD_SPI_V1:
+		break;
+	case AMD_SPI_V2:
+		amd_spi_clear_chip(amd_spi, message->spi->chip_select);
+		break;
+	default:
+		return -ENODEV;
+	}
+
 	spi_finalize_current_message(master);
 
 	return 0;
@@ -235,6 +293,8 @@ static int amd_spi_probe(struct platform_device *pdev)
 	}
 	dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
 
+	amd_spi->version = (enum amd_spi_versions) device_get_match_data(dev);
+
 	/* Initialize the spi_master fields */
 	master->bus_num = 0;
 	master->num_chipselect = 4;
@@ -260,7 +320,8 @@ err_free_master:
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id spi_acpi_match[] = {
-	{ "AMDI0061", 0 },
+	{ "AMDI0061", AMD_SPI_V1 },
+	{ "AMDI0062", AMD_SPI_V2 },
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
@@ -269,7 +330,7 @@ MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
 static struct platform_driver amd_spi_driver = {
 	.driver = {
 		.name = "amd_spi",
-		.acpi_match_table = ACPI_PTR(spi_acpi_match),
+		.acpi_match_table = spi_acpi_match,
 	},
 	.probe = amd_spi_probe,
 };
-- 
GitLab


From cd149eff8d2201a63c074a6d9d03e52926aa535d Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 9 Feb 2022 15:27:04 +0300
Subject: [PATCH 0368/1586] mtd: spi-nor: intel-spi: Disable write protection
 only if asked

Currently the driver tries to disable the BIOS write protection
automatically even if this is not what the user wants. For this reason
modify the driver so that by default it does not touch the write
protection. Only if specifically asked by the user (setting writeable=1
command line parameter) the driver tries to disable the BIOS write
protection.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mauro Lima <mauro.lima@eclypsium.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220209122706.42439-2-mika.westerberg@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mfd/lpc_ich.c                         | 59 +++++++++++++++++--
 .../mtd/spi-nor/controllers/intel-spi-pci.c   | 29 +++++----
 drivers/mtd/spi-nor/controllers/intel-spi.c   | 41 ++++++-------
 include/linux/platform_data/x86/intel-spi.h   |  6 +-
 4 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index f10e53187f67a..9ffab9aafd81b 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -63,6 +63,8 @@
 #define SPIBASE_BYT		0x54
 #define SPIBASE_BYT_SZ		512
 #define SPIBASE_BYT_EN		BIT(1)
+#define BYT_BCR			0xfc
+#define BYT_BCR_WPD		BIT(0)
 
 #define SPIBASE_LPT		0x3800
 #define SPIBASE_LPT_SZ		512
@@ -1084,12 +1086,57 @@ wdt_done:
 	return ret;
 }
 
+static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data)
+{
+	u32 val;
+
+	val = readl(base + BYT_BCR);
+	if (!(val & BYT_BCR_WPD)) {
+		val |= BYT_BCR_WPD;
+		writel(val, base + BYT_BCR);
+		val = readl(base + BYT_BCR);
+	}
+
+	return val & BYT_BCR_WPD;
+}
+
+static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data)
+{
+	struct pci_dev *pdev = data;
+	u32 bcr;
+
+	pci_read_config_dword(pdev, BCR, &bcr);
+	if (!(bcr & BCR_WPD)) {
+		bcr |= BCR_WPD;
+		pci_write_config_dword(pdev, BCR, bcr);
+		pci_read_config_dword(pdev, BCR, &bcr);
+	}
+
+	return bcr & BCR_WPD;
+}
+
+static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data)
+{
+	unsigned int spi = PCI_DEVFN(13, 2);
+	struct pci_bus *bus = data;
+	u32 bcr;
+
+	pci_bus_read_config_dword(bus, spi, BCR, &bcr);
+	if (!(bcr & BCR_WPD)) {
+		bcr |= BCR_WPD;
+		pci_bus_write_config_dword(bus, spi, BCR, bcr);
+		pci_bus_read_config_dword(bus, spi, BCR, &bcr);
+	}
+
+	return bcr & BCR_WPD;
+}
+
 static int lpc_ich_init_spi(struct pci_dev *dev)
 {
 	struct lpc_ich_priv *priv = pci_get_drvdata(dev);
 	struct resource *res = &intel_spi_res[0];
 	struct intel_spi_boardinfo *info;
-	u32 spi_base, rcba, bcr;
+	u32 spi_base, rcba;
 
 	info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL);
 	if (!info)
@@ -1103,6 +1150,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
 		if (spi_base & SPIBASE_BYT_EN) {
 			res->start = spi_base & ~(SPIBASE_BYT_SZ - 1);
 			res->end = res->start + SPIBASE_BYT_SZ - 1;
+
+			info->set_writeable = lpc_ich_byt_set_writeable;
 		}
 		break;
 
@@ -1113,8 +1162,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
 			res->start = spi_base + SPIBASE_LPT;
 			res->end = res->start + SPIBASE_LPT_SZ - 1;
 
-			pci_read_config_dword(dev, BCR, &bcr);
-			info->writeable = !!(bcr & BCR_WPD);
+			info->set_writeable = lpc_ich_lpt_set_writeable;
+			info->data = dev;
 		}
 		break;
 
@@ -1135,8 +1184,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev)
 			res->start = spi_base & 0xfffffff0;
 			res->end = res->start + SPIBASE_APL_SZ - 1;
 
-			pci_bus_read_config_dword(bus, spi, BCR, &bcr);
-			info->writeable = !!(bcr & BCR_WPD);
+			info->set_writeable = lpc_ich_bxt_set_writeable;
+			info->data = bus;
 		}
 
 		pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1);
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
index 1bc53b8bb88a9..508f7ca098eff 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
@@ -16,12 +16,30 @@
 #define BCR		0xdc
 #define BCR_WPD		BIT(0)
 
+static bool intel_spi_pci_set_writeable(void __iomem *base, void *data)
+{
+	struct pci_dev *pdev = data;
+	u32 bcr;
+
+	/* Try to make the chip read/write */
+	pci_read_config_dword(pdev, BCR, &bcr);
+	if (!(bcr & BCR_WPD)) {
+		bcr |= BCR_WPD;
+		pci_write_config_dword(pdev, BCR, bcr);
+		pci_read_config_dword(pdev, BCR, &bcr);
+	}
+
+	return bcr & BCR_WPD;
+}
+
 static const struct intel_spi_boardinfo bxt_info = {
 	.type = INTEL_SPI_BXT,
+	.set_writeable = intel_spi_pci_set_writeable,
 };
 
 static const struct intel_spi_boardinfo cnl_info = {
 	.type = INTEL_SPI_CNL,
+	.set_writeable = intel_spi_pci_set_writeable,
 };
 
 static int intel_spi_pci_probe(struct pci_dev *pdev,
@@ -29,7 +47,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
 {
 	struct intel_spi_boardinfo *info;
 	struct intel_spi *ispi;
-	u32 bcr;
 	int ret;
 
 	ret = pcim_enable_device(pdev);
@@ -41,15 +58,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
 	if (!info)
 		return -ENOMEM;
 
-	/* Try to make the chip read/write */
-	pci_read_config_dword(pdev, BCR, &bcr);
-	if (!(bcr & BCR_WPD)) {
-		bcr |= BCR_WPD;
-		pci_write_config_dword(pdev, BCR, bcr);
-		pci_read_config_dword(pdev, BCR, &bcr);
-	}
-	info->writeable = !!(bcr & BCR_WPD);
-
+	info->data = pdev;
 	ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
 	if (IS_ERR(ispi))
 		return PTR_ERR(ispi);
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c
index a413892ff449f..f35597cbea0ca 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi.c
+++ b/drivers/mtd/spi-nor/controllers/intel-spi.c
@@ -131,7 +131,6 @@
  * @sregs: Start of software sequencer registers
  * @nregions: Maximum number of regions
  * @pr_num: Maximum number of protected range registers
- * @writeable: Is the chip writeable
  * @locked: Is SPI setting locked
  * @swseq_reg: Use SW sequencer in register reads/writes
  * @swseq_erase: Use SW sequencer in erase operation
@@ -149,7 +148,6 @@ struct intel_spi {
 	void __iomem *sregs;
 	size_t nregions;
 	size_t pr_num;
-	bool writeable;
 	bool locked;
 	bool swseq_reg;
 	bool swseq_erase;
@@ -304,6 +302,14 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi)
 				  INTEL_SPI_TIMEOUT * 1000);
 }
 
+static bool intel_spi_set_writeable(struct intel_spi *ispi)
+{
+	if (!ispi->info->set_writeable)
+		return false;
+
+	return ispi->info->set_writeable(ispi->base, ispi->info->data);
+}
+
 static int intel_spi_init(struct intel_spi *ispi)
 {
 	u32 opmenu0, opmenu1, lvscc, uvscc, val;
@@ -316,19 +322,6 @@ static int intel_spi_init(struct intel_spi *ispi)
 		ispi->nregions = BYT_FREG_NUM;
 		ispi->pr_num = BYT_PR_NUM;
 		ispi->swseq_reg = true;
-
-		if (writeable) {
-			/* Disable write protection */
-			val = readl(ispi->base + BYT_BCR);
-			if (!(val & BYT_BCR_WPD)) {
-				val |= BYT_BCR_WPD;
-				writel(val, ispi->base + BYT_BCR);
-				val = readl(ispi->base + BYT_BCR);
-			}
-
-			ispi->writeable = !!(val & BYT_BCR_WPD);
-		}
-
 		break;
 
 	case INTEL_SPI_LPT:
@@ -358,6 +351,12 @@ static int intel_spi_init(struct intel_spi *ispi)
 		return -EINVAL;
 	}
 
+	/* Try to disable write protection if user asked to do so */
+	if (writeable && !intel_spi_set_writeable(ispi)) {
+		dev_warn(ispi->dev, "can't disable chip write protection\n");
+		writeable = false;
+	}
+
 	/* Disable #SMI generation from HW sequencer */
 	val = readl(ispi->base + HSFSTS_CTL);
 	val &= ~HSFSTS_CTL_FSMIE;
@@ -884,9 +883,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 		/*
 		 * If any of the regions have protection bits set, make the
 		 * whole partition read-only to be on the safe side.
+		 *
+		 * Also if the user did not ask the chip to be writeable
+		 * mask the bit too.
 		 */
-		if (intel_spi_is_protected(ispi, base, limit))
-			ispi->writeable = false;
+		if (!writeable || intel_spi_is_protected(ispi, base, limit))
+			part->mask_flags |= MTD_WRITEABLE;
 
 		end = (limit << 12) + 4096;
 		if (end > part->size)
@@ -927,7 +929,6 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 
 	ispi->dev = dev;
 	ispi->info = info;
-	ispi->writeable = info->writeable;
 
 	ret = intel_spi_init(ispi);
 	if (ret)
@@ -945,10 +946,6 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 
 	intel_spi_fill_partition(ispi, &part);
 
-	/* Prevent writes if not explicitly enabled */
-	if (!ispi->writeable || !writeable)
-		ispi->nor.mtd.flags &= ~MTD_WRITEABLE;
-
 	ret = mtd_device_register(&ispi->nor.mtd, &part, 1);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h
index 7f53a5c6f35e8..7dda3f6904654 100644
--- a/include/linux/platform_data/x86/intel-spi.h
+++ b/include/linux/platform_data/x86/intel-spi.h
@@ -19,11 +19,13 @@ enum intel_spi_type {
 /**
  * struct intel_spi_boardinfo - Board specific data for Intel SPI driver
  * @type: Type which this controller is compatible with
- * @writeable: The chip is writeable
+ * @set_writeable: Try to make the chip writeable (optional)
+ * @data: Data to be passed to @set_writeable can be %NULL
  */
 struct intel_spi_boardinfo {
 	enum intel_spi_type type;
-	bool writeable;
+	bool (*set_writeable)(void __iomem *base, void *data);
+	void *data;
 };
 
 #endif /* INTEL_SPI_PDATA_H */
-- 
GitLab


From e23e5a05d1fd9479586c40ffbcc056b3e34ef816 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 9 Feb 2022 15:27:05 +0300
Subject: [PATCH 0369/1586] mtd: spi-nor: intel-spi: Convert to SPI MEM

The preferred way to implement SPI-NOR controller drivers is through SPI
subsubsystem utilizing the SPI MEM core functions. This converts the
Intel SPI flash controller driver over the SPI MEM by moving the driver
from SPI-NOR subsystem to SPI subsystem and in one go make it use the
SPI MEM functions. The driver name will be changed from intel-spi to
spi-intel to match the convention used in the SPI subsystem.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mauro Lima <mauro.lima@eclypsium.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Pratyush Yadav <p.yadav@ti.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Link: https://lore.kernel.org/r/20220209122706.42439-3-mika.westerberg@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mtd/spi-nor/controllers/Kconfig       |  36 -
 drivers/mtd/spi-nor/controllers/Makefile      |   3 -
 drivers/mtd/spi-nor/controllers/intel-spi.h   |  21 -
 drivers/spi/Kconfig                           |  39 +
 drivers/spi/Makefile                          |   3 +
 .../intel-spi-pci.c => spi/spi-intel-pci.c}   |  20 +-
 .../spi-intel-platform.c}                     |  21 +-
 .../intel-spi.c => spi/spi-intel.c}           | 829 ++++++++++++------
 drivers/spi/spi-intel.h                       |  19 +
 include/linux/mfd/lpc_ich.h                   |   2 +-
 .../x86/{intel-spi.h => spi-intel.h}          |   6 +-
 11 files changed, 628 insertions(+), 371 deletions(-)
 delete mode 100644 drivers/mtd/spi-nor/controllers/intel-spi.h
 rename drivers/{mtd/spi-nor/controllers/intel-spi-pci.c => spi/spi-intel-pci.c} (86%)
 rename drivers/{mtd/spi-nor/controllers/intel-spi-platform.c => spi/spi-intel-platform.c} (65%)
 rename drivers/{mtd/spi-nor/controllers/intel-spi.c => spi/spi-intel.c} (57%)
 create mode 100644 drivers/spi/spi-intel.h
 rename include/linux/platform_data/x86/{intel-spi.h => spi-intel.h} (89%)

diff --git a/drivers/mtd/spi-nor/controllers/Kconfig b/drivers/mtd/spi-nor/controllers/Kconfig
index 5c0e0ec2e6d1f..50f4f3484d426 100644
--- a/drivers/mtd/spi-nor/controllers/Kconfig
+++ b/drivers/mtd/spi-nor/controllers/Kconfig
@@ -26,39 +26,3 @@ config SPI_NXP_SPIFI
 	  SPIFI is a specialized controller for connecting serial SPI
 	  Flash. Enable this option if you have a device with a SPIFI
 	  controller and want to access the Flash as a mtd device.
-
-config SPI_INTEL_SPI
-	tristate
-
-config SPI_INTEL_SPI_PCI
-	tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)"
-	depends on X86 && PCI
-	select SPI_INTEL_SPI
-	help
-	  This enables PCI support for the Intel PCH/PCU SPI controller in
-	  master mode. This controller is present in modern Intel hardware
-	  and is used to hold BIOS and other persistent settings. Using
-	  this driver it is possible to upgrade BIOS directly from Linux.
-
-	  Say N here unless you know what you are doing. Overwriting the
-	  SPI flash may render the system unbootable.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel-spi-pci.
-
-config SPI_INTEL_SPI_PLATFORM
-	tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)"
-	depends on X86
-	select SPI_INTEL_SPI
-	help
-	  This enables platform support for the Intel PCH/PCU SPI
-	  controller in master mode. This controller is present in modern
-	  Intel hardware and is used to hold BIOS and other persistent
-	  settings. Using this driver it is possible to upgrade BIOS
-	  directly from Linux.
-
-	  Say N here unless you know what you are doing. Overwriting the
-	  SPI flash may render the system unbootable.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called intel-spi-platform.
diff --git a/drivers/mtd/spi-nor/controllers/Makefile b/drivers/mtd/spi-nor/controllers/Makefile
index e7abba491d983..6e2a1dc684662 100644
--- a/drivers/mtd/spi-nor/controllers/Makefile
+++ b/drivers/mtd/spi-nor/controllers/Makefile
@@ -2,6 +2,3 @@
 obj-$(CONFIG_SPI_ASPEED_SMC)	+= aspeed-smc.o
 obj-$(CONFIG_SPI_HISI_SFC)	+= hisi-sfc.o
 obj-$(CONFIG_SPI_NXP_SPIFI)	+= nxp-spifi.o
-obj-$(CONFIG_SPI_INTEL_SPI)	+= intel-spi.o
-obj-$(CONFIG_SPI_INTEL_SPI_PCI)	+= intel-spi-pci.o
-obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM)	+= intel-spi-platform.o
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.h b/drivers/mtd/spi-nor/controllers/intel-spi.h
deleted file mode 100644
index f2871179fd34d..0000000000000
--- a/drivers/mtd/spi-nor/controllers/intel-spi.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel PCH/PCU SPI flash driver.
- *
- * Copyright (C) 2016, Intel Corporation
- * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- */
-
-#ifndef INTEL_SPI_H
-#define INTEL_SPI_H
-
-#include <linux/platform_data/x86/intel-spi.h>
-
-struct intel_spi;
-struct resource;
-
-struct intel_spi *intel_spi_probe(struct device *dev,
-	struct resource *mem, const struct intel_spi_boardinfo *info);
-int intel_spi_remove(struct intel_spi *ispi);
-
-#endif /* INTEL_SPI_H */
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b2a8821971e1d..0201257511fbe 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -427,6 +427,45 @@ config SPI_INGENIC
 	  To compile this driver as a module, choose M here: the module
 	  will be called spi-ingenic.
 
+config SPI_INTEL
+	tristate
+
+config SPI_INTEL_PCI
+	tristate "Intel PCH/PCU SPI flash PCI driver (DANGEROUS)"
+	depends on PCI
+	depends on X86 || COMPILE_TEST
+	depends on SPI_MEM
+	select SPI_INTEL
+	help
+	  This enables PCI support for the Intel PCH/PCU SPI controller in
+	  master mode. This controller is present in modern Intel hardware
+	  and is used to hold BIOS and other persistent settings. Using
+	  this driver it is possible to upgrade BIOS directly from Linux.
+
+	  Say N here unless you know what you are doing. Overwriting the
+	  SPI flash may render the system unbootable.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called spi-intel-pci.
+
+config SPI_INTEL_PLATFORM
+	tristate "Intel PCH/PCU SPI flash platform driver (DANGEROUS)"
+	depends on X86 || COMPILE_TEST
+	depends on SPI_MEM
+	select SPI_INTEL
+	help
+	  This enables platform support for the Intel PCH/PCU SPI
+	  controller in master mode. This controller is present in modern
+	  Intel hardware and is used to hold BIOS and other persistent
+	  settings. Using this driver it is possible to upgrade BIOS
+	  directly from Linux.
+
+	  Say N here unless you know what you are doing. Overwriting the
+	  SPI flash may render the system unbootable.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called spi-intel-platform.
+
 config SPI_JCORE
 	tristate "J-Core SPI Master"
 	depends on OF && (SUPERH || COMPILE_TEST)
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index dd7393a6046fa..36b2045f08d25 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -61,6 +61,9 @@ obj-$(CONFIG_SPI_HISI_SFC_V3XX)		+= spi-hisi-sfc-v3xx.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
 obj-$(CONFIG_SPI_IMX)			+= spi-imx.o
 obj-$(CONFIG_SPI_INGENIC)		+= spi-ingenic.o
+obj-$(CONFIG_SPI_INTEL)			+= spi-intel.o
+obj-$(CONFIG_SPI_INTEL_PCI)		+= spi-intel-pci.o
+obj-$(CONFIG_SPI_INTEL_PLATFORM)	+= spi-intel-platform.o
 obj-$(CONFIG_SPI_LANTIQ_SSC)		+= spi-lantiq-ssc.o
 obj-$(CONFIG_SPI_JCORE)			+= spi-jcore.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi-lm70llp.o
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/spi/spi-intel-pci.c
similarity index 86%
rename from drivers/mtd/spi-nor/controllers/intel-spi-pci.c
rename to drivers/spi/spi-intel-pci.c
index 508f7ca098eff..a9cb4d77ffe39 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -2,16 +2,14 @@
 /*
  * Intel PCH/PCU SPI flash PCI driver.
  *
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
-#include <linux/ioport.h>
-#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 
-#include "intel-spi.h"
+#include "spi-intel.h"
 
 #define BCR		0xdc
 #define BCR_WPD		BIT(0)
@@ -46,7 +44,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
 			       const struct pci_device_id *id)
 {
 	struct intel_spi_boardinfo *info;
-	struct intel_spi *ispi;
 	int ret;
 
 	ret = pcim_enable_device(pdev);
@@ -59,17 +56,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	info->data = pdev;
-	ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
-	if (IS_ERR(ispi))
-		return PTR_ERR(ispi);
-
-	pci_set_drvdata(pdev, ispi);
-	return 0;
-}
-
-static void intel_spi_pci_remove(struct pci_dev *pdev)
-{
-	intel_spi_remove(pci_get_drvdata(pdev));
+	return intel_spi_probe(&pdev->dev, &pdev->resource[0], info);
 }
 
 static const struct pci_device_id intel_spi_pci_ids[] = {
@@ -98,7 +85,6 @@ static struct pci_driver intel_spi_pci_driver = {
 	.name = "intel-spi",
 	.id_table = intel_spi_pci_ids,
 	.probe = intel_spi_pci_probe,
-	.remove = intel_spi_pci_remove,
 };
 
 module_pci_driver(intel_spi_pci_driver);
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c b/drivers/spi/spi-intel-platform.c
similarity index 65%
rename from drivers/mtd/spi-nor/controllers/intel-spi-platform.c
rename to drivers/spi/spi-intel-platform.c
index f80f1086f928c..2ef09fa356612 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi-platform.c
+++ b/drivers/spi/spi-intel-platform.c
@@ -2,20 +2,18 @@
 /*
  * Intel PCH/PCU SPI flash platform driver.
  *
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
-#include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
-#include "intel-spi.h"
+#include "spi-intel.h"
 
 static int intel_spi_platform_probe(struct platform_device *pdev)
 {
 	struct intel_spi_boardinfo *info;
-	struct intel_spi *ispi;
 	struct resource *mem;
 
 	info = dev_get_platdata(&pdev->dev);
@@ -23,24 +21,11 @@ static int intel_spi_platform_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ispi = intel_spi_probe(&pdev->dev, mem, info);
-	if (IS_ERR(ispi))
-		return PTR_ERR(ispi);
-
-	platform_set_drvdata(pdev, ispi);
-	return 0;
-}
-
-static int intel_spi_platform_remove(struct platform_device *pdev)
-{
-	struct intel_spi *ispi = platform_get_drvdata(pdev);
-
-	return intel_spi_remove(ispi);
+	return intel_spi_probe(&pdev->dev, mem, info);
 }
 
 static struct platform_driver intel_spi_platform_driver = {
 	.probe = intel_spi_platform_probe,
-	.remove = intel_spi_platform_remove,
 	.driver = {
 		.name = "intel-spi",
 	},
diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/spi/spi-intel.c
similarity index 57%
rename from drivers/mtd/spi-nor/controllers/intel-spi.c
rename to drivers/spi/spi-intel.c
index f35597cbea0ca..e937cfe85559b 100644
--- a/drivers/mtd/spi-nor/controllers/intel-spi.c
+++ b/drivers/spi/spi-intel.c
@@ -2,21 +2,21 @@
 /*
  * Intel PCH/PCU SPI flash driver.
  *
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016 - 2022, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
-#include <linux/err.h>
-#include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sizes.h>
-#include <linux/mtd/mtd.h>
+
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/spi-nor.h>
 
-#include "intel-spi.h"
+#include <linux/spi/flash.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#include "spi-intel.h"
 
 /* Offsets are from @ispi->base */
 #define BFPREG				0x00
@@ -92,8 +92,6 @@
 /* CPU specifics */
 #define BYT_PR				0x74
 #define BYT_SSFSTS_CTL			0x90
-#define BYT_BCR				0xfc
-#define BYT_BCR_WPD			BIT(0)
 #define BYT_FREG_NUM			5
 #define BYT_PR_NUM			5
 
@@ -125,35 +123,43 @@
  * struct intel_spi - Driver private data
  * @dev: Device pointer
  * @info: Pointer to board specific info
- * @nor: SPI NOR layer structure
  * @base: Beginning of MMIO space
  * @pregs: Start of protection registers
  * @sregs: Start of software sequencer registers
+ * @master: Pointer to the SPI controller structure
  * @nregions: Maximum number of regions
  * @pr_num: Maximum number of protected range registers
  * @locked: Is SPI setting locked
  * @swseq_reg: Use SW sequencer in register reads/writes
  * @swseq_erase: Use SW sequencer in erase operation
- * @erase_64k: 64k erase supported
  * @atomic_preopcode: Holds preopcode when atomic sequence is requested
  * @opcodes: Opcodes which are supported. This are programmed by BIOS
  *           before it locks down the controller.
+ * @mem_ops: Pointer to SPI MEM ops supported by the controller
  */
 struct intel_spi {
 	struct device *dev;
 	const struct intel_spi_boardinfo *info;
-	struct spi_nor nor;
 	void __iomem *base;
 	void __iomem *pregs;
 	void __iomem *sregs;
+	struct spi_controller *master;
 	size_t nregions;
 	size_t pr_num;
 	bool locked;
 	bool swseq_reg;
 	bool swseq_erase;
-	bool erase_64k;
 	u8 atomic_preopcode;
 	u8 opcodes[8];
+	const struct intel_spi_mem_op *mem_ops;
+};
+
+struct intel_spi_mem_op {
+	struct spi_mem_op mem_op;
+	u32 replacement_op;
+	int (*exec_op)(struct intel_spi *ispi,
+		       const struct intel_spi_mem_op *iop,
+		       const struct spi_mem_op *op);
 };
 
 static bool writeable;
@@ -199,9 +205,6 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
 			readl(ispi->sregs + OPMENU1));
 	}
 
-	if (ispi->info->type == INTEL_SPI_BYT)
-		dev_dbg(ispi->dev, "BCR=0x%08x\n", readl(ispi->base + BYT_BCR));
-
 	dev_dbg(ispi->dev, "LVSCC=0x%08x\n", readl(ispi->base + LVSCC));
 	dev_dbg(ispi->dev, "UVSCC=0x%08x\n", readl(ispi->base + UVSCC));
 
@@ -217,9 +220,8 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
 		base = value & PR_BASE_MASK;
 
 		dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x [%c%c]\n",
-			 i, base << 12, (limit << 12) | 0xfff,
-			 value & PR_WPE ? 'W' : '.',
-			 value & PR_RPE ? 'R' : '.');
+			i, base << 12, (limit << 12) | 0xfff,
+			value & PR_WPE ? 'W' : '.', value & PR_RPE ? 'R' : '.');
 	}
 
 	dev_dbg(ispi->dev, "Flash regions:\n");
@@ -234,7 +236,7 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
 			dev_dbg(ispi->dev, " %02d disabled\n", i);
 		else
 			dev_dbg(ispi->dev, " %02d base: 0x%08x limit: 0x%08x\n",
-				 i, base << 12, (limit << 12) | 0xfff);
+				i, base << 12, (limit << 12) | 0xfff);
 	}
 
 	dev_dbg(ispi->dev, "Using %cW sequencer for register access\n",
@@ -310,119 +312,6 @@ static bool intel_spi_set_writeable(struct intel_spi *ispi)
 	return ispi->info->set_writeable(ispi->base, ispi->info->data);
 }
 
-static int intel_spi_init(struct intel_spi *ispi)
-{
-	u32 opmenu0, opmenu1, lvscc, uvscc, val;
-	int i;
-
-	switch (ispi->info->type) {
-	case INTEL_SPI_BYT:
-		ispi->sregs = ispi->base + BYT_SSFSTS_CTL;
-		ispi->pregs = ispi->base + BYT_PR;
-		ispi->nregions = BYT_FREG_NUM;
-		ispi->pr_num = BYT_PR_NUM;
-		ispi->swseq_reg = true;
-		break;
-
-	case INTEL_SPI_LPT:
-		ispi->sregs = ispi->base + LPT_SSFSTS_CTL;
-		ispi->pregs = ispi->base + LPT_PR;
-		ispi->nregions = LPT_FREG_NUM;
-		ispi->pr_num = LPT_PR_NUM;
-		ispi->swseq_reg = true;
-		break;
-
-	case INTEL_SPI_BXT:
-		ispi->sregs = ispi->base + BXT_SSFSTS_CTL;
-		ispi->pregs = ispi->base + BXT_PR;
-		ispi->nregions = BXT_FREG_NUM;
-		ispi->pr_num = BXT_PR_NUM;
-		ispi->erase_64k = true;
-		break;
-
-	case INTEL_SPI_CNL:
-		ispi->sregs = NULL;
-		ispi->pregs = ispi->base + CNL_PR;
-		ispi->nregions = CNL_FREG_NUM;
-		ispi->pr_num = CNL_PR_NUM;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	/* Try to disable write protection if user asked to do so */
-	if (writeable && !intel_spi_set_writeable(ispi)) {
-		dev_warn(ispi->dev, "can't disable chip write protection\n");
-		writeable = false;
-	}
-
-	/* Disable #SMI generation from HW sequencer */
-	val = readl(ispi->base + HSFSTS_CTL);
-	val &= ~HSFSTS_CTL_FSMIE;
-	writel(val, ispi->base + HSFSTS_CTL);
-
-	/*
-	 * Determine whether erase operation should use HW or SW sequencer.
-	 *
-	 * The HW sequencer has a predefined list of opcodes, with only the
-	 * erase opcode being programmable in LVSCC and UVSCC registers.
-	 * If these registers don't contain a valid erase opcode, erase
-	 * cannot be done using HW sequencer.
-	 */
-	lvscc = readl(ispi->base + LVSCC);
-	uvscc = readl(ispi->base + UVSCC);
-	if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK))
-		ispi->swseq_erase = true;
-	/* SPI controller on Intel BXT supports 64K erase opcode */
-	if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase)
-		if (!(lvscc & ERASE_64K_OPCODE_MASK) ||
-		    !(uvscc & ERASE_64K_OPCODE_MASK))
-			ispi->erase_64k = false;
-
-	if (ispi->sregs == NULL && (ispi->swseq_reg || ispi->swseq_erase)) {
-		dev_err(ispi->dev, "software sequencer not supported, but required\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Some controllers can only do basic operations using hardware
-	 * sequencer. All other operations are supposed to be carried out
-	 * using software sequencer.
-	 */
-	if (ispi->swseq_reg) {
-		/* Disable #SMI generation from SW sequencer */
-		val = readl(ispi->sregs + SSFSTS_CTL);
-		val &= ~SSFSTS_CTL_FSMIE;
-		writel(val, ispi->sregs + SSFSTS_CTL);
-	}
-
-	/* Check controller's lock status */
-	val = readl(ispi->base + HSFSTS_CTL);
-	ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN);
-
-	if (ispi->locked && ispi->sregs) {
-		/*
-		 * BIOS programs allowed opcodes and then locks down the
-		 * register. So read back what opcodes it decided to support.
-		 * That's the set we are going to support as well.
-		 */
-		opmenu0 = readl(ispi->sregs + OPMENU0);
-		opmenu1 = readl(ispi->sregs + OPMENU1);
-
-		if (opmenu0 && opmenu1) {
-			for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) {
-				ispi->opcodes[i] = opmenu0 >> i * 8;
-				ispi->opcodes[i + 4] = opmenu1 >> i * 8;
-			}
-		}
-	}
-
-	intel_spi_dump_regs(ispi);
-
-	return 0;
-}
-
 static int intel_spi_opcode_index(struct intel_spi *ispi, u8 opcode, int optype)
 {
 	int i;
@@ -536,7 +425,6 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len,
 		default:
 			return -EINVAL;
 		}
-
 	}
 	writel(val, ispi->sregs + SSFSTS_CTL);
 
@@ -553,31 +441,35 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len,
 	return 0;
 }
 
-static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
-			      size_t len)
+static int intel_spi_read_reg(struct intel_spi *ispi,
+			      const struct intel_spi_mem_op *iop,
+			      const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = nor->priv;
+	size_t nbytes = op->data.nbytes;
+	u8 opcode = op->cmd.opcode;
 	int ret;
 
 	/* Address of the first chip */
 	writel(0, ispi->base + FADDR);
 
 	if (ispi->swseq_reg)
-		ret = intel_spi_sw_cycle(ispi, opcode, len,
+		ret = intel_spi_sw_cycle(ispi, opcode, nbytes,
 					 OPTYPE_READ_NO_ADDR);
 	else
-		ret = intel_spi_hw_cycle(ispi, opcode, len);
+		ret = intel_spi_hw_cycle(ispi, opcode, nbytes);
 
 	if (ret)
 		return ret;
 
-	return intel_spi_read_block(ispi, buf, len);
+	return intel_spi_read_block(ispi, op->data.buf.in, nbytes);
 }
 
-static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
-			       size_t len)
+static int intel_spi_write_reg(struct intel_spi *ispi,
+			       const struct intel_spi_mem_op *iop,
+			       const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = nor->priv;
+	size_t nbytes = op->data.nbytes;
+	u8 opcode = op->cmd.opcode;
 	int ret;
 
 	/*
@@ -622,23 +514,25 @@ static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
 	writel(0, ispi->base + FADDR);
 
 	/* Write the value beforehand */
-	ret = intel_spi_write_block(ispi, buf, len);
+	ret = intel_spi_write_block(ispi, op->data.buf.out, nbytes);
 	if (ret)
 		return ret;
 
 	if (ispi->swseq_reg)
-		return intel_spi_sw_cycle(ispi, opcode, len,
+		return intel_spi_sw_cycle(ispi, opcode, nbytes,
 					  OPTYPE_WRITE_NO_ADDR);
-	return intel_spi_hw_cycle(ispi, opcode, len);
+	return intel_spi_hw_cycle(ispi, opcode, nbytes);
 }
 
-static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
-			      u_char *read_buf)
+static int intel_spi_read(struct intel_spi *ispi,
+			  const struct intel_spi_mem_op *iop,
+			  const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = nor->priv;
-	size_t block_size, retlen = 0;
+	void *read_buf = op->data.buf.in;
+	size_t block_size, nbytes = op->data.nbytes;
+	u32 addr = op->addr.val;
 	u32 val, status;
-	ssize_t ret;
+	int ret;
 
 	/*
 	 * Atomic sequence is not expected with HW sequencer reads. Make
@@ -647,24 +541,14 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
 	if (WARN_ON_ONCE(ispi->atomic_preopcode))
 		ispi->atomic_preopcode = 0;
 
-	switch (nor->read_opcode) {
-	case SPINOR_OP_READ:
-	case SPINOR_OP_READ_FAST:
-	case SPINOR_OP_READ_4B:
-	case SPINOR_OP_READ_FAST_4B:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	while (len > 0) {
-		block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ);
+	while (nbytes > 0) {
+		block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ);
 
 		/* Read cannot cross 4K boundary */
-		block_size = min_t(loff_t, from + block_size,
-				   round_up(from + 1, SZ_4K)) - from;
+		block_size = min_t(loff_t, addr + block_size,
+				   round_up(addr + 1, SZ_4K)) - addr;
 
-		writel(from, ispi->base + FADDR);
+		writel(addr, ispi->base + FADDR);
 
 		val = readl(ispi->base + HSFSTS_CTL);
 		val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
@@ -685,8 +569,7 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
 			ret = -EACCES;
 
 		if (ret < 0) {
-			dev_err(ispi->dev, "read error: %llx: %#x\n", from,
-				status);
+			dev_err(ispi->dev, "read error: %x: %#x\n", addr, status);
 			return ret;
 		}
 
@@ -694,34 +577,35 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len,
 		if (ret)
 			return ret;
 
-		len -= block_size;
-		from += block_size;
-		retlen += block_size;
+		nbytes -= block_size;
+		addr += block_size;
 		read_buf += block_size;
 	}
 
-	return retlen;
+	return 0;
 }
 
-static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len,
-			       const u_char *write_buf)
+static int intel_spi_write(struct intel_spi *ispi,
+			   const struct intel_spi_mem_op *iop,
+			   const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = nor->priv;
-	size_t block_size, retlen = 0;
+	size_t block_size, nbytes = op->data.nbytes;
+	const void *write_buf = op->data.buf.out;
+	u32 addr = op->addr.val;
 	u32 val, status;
-	ssize_t ret;
+	int ret;
 
 	/* Not needed with HW sequencer write, make sure it is cleared */
 	ispi->atomic_preopcode = 0;
 
-	while (len > 0) {
-		block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ);
+	while (nbytes > 0) {
+		block_size = min_t(size_t, nbytes, INTEL_SPI_FIFO_SZ);
 
 		/* Write cannot cross 4K boundary */
-		block_size = min_t(loff_t, to + block_size,
-				   round_up(to + 1, SZ_4K)) - to;
+		block_size = min_t(loff_t, addr + block_size,
+				   round_up(addr + 1, SZ_4K)) - addr;
 
-		writel(to, ispi->base + FADDR);
+		writel(addr, ispi->base + FADDR);
 
 		val = readl(ispi->base + HSFSTS_CTL);
 		val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
@@ -752,79 +636,476 @@ static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len,
 			ret = -EACCES;
 
 		if (ret < 0) {
-			dev_err(ispi->dev, "write error: %llx: %#x\n", to,
-				status);
+			dev_err(ispi->dev, "write error: %x: %#x\n", addr, status);
 			return ret;
 		}
 
-		len -= block_size;
-		to += block_size;
-		retlen += block_size;
+		nbytes -= block_size;
+		addr += block_size;
 		write_buf += block_size;
 	}
 
-	return retlen;
+	return 0;
 }
 
-static int intel_spi_erase(struct spi_nor *nor, loff_t offs)
+static int intel_spi_erase(struct intel_spi *ispi,
+			   const struct intel_spi_mem_op *iop,
+			   const struct spi_mem_op *op)
 {
-	size_t erase_size, len = nor->mtd.erasesize;
-	struct intel_spi *ispi = nor->priv;
-	u32 val, status, cmd;
+	u8 opcode = op->cmd.opcode;
+	u32 addr = op->addr.val;
+	u32 val, status;
 	int ret;
 
-	/* If the hardware can do 64k erase use that when possible */
-	if (len >= SZ_64K && ispi->erase_64k) {
-		cmd = HSFSTS_CTL_FCYCLE_ERASE_64K;
-		erase_size = SZ_64K;
-	} else {
-		cmd = HSFSTS_CTL_FCYCLE_ERASE;
-		erase_size = SZ_4K;
+	writel(addr, ispi->base + FADDR);
+
+	if (ispi->swseq_erase)
+		return intel_spi_sw_cycle(ispi, opcode, 0,
+					  OPTYPE_WRITE_WITH_ADDR);
+
+	/* Not needed with HW sequencer erase, make sure it is cleared */
+	ispi->atomic_preopcode = 0;
+
+	val = readl(ispi->base + HSFSTS_CTL);
+	val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
+	val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE;
+	val |= HSFSTS_CTL_FGO;
+	val |= iop->replacement_op;
+	writel(val, ispi->base + HSFSTS_CTL);
+
+	ret = intel_spi_wait_hw_busy(ispi);
+	if (ret)
+		return ret;
+
+	status = readl(ispi->base + HSFSTS_CTL);
+	if (status & HSFSTS_CTL_FCERR)
+		return -EIO;
+	if (status & HSFSTS_CTL_AEL)
+		return -EACCES;
+
+	return 0;
+}
+
+static bool intel_spi_cmp_mem_op(const struct intel_spi_mem_op *iop,
+				 const struct spi_mem_op *op)
+{
+	if (iop->mem_op.cmd.nbytes != op->cmd.nbytes ||
+	    iop->mem_op.cmd.buswidth != op->cmd.buswidth ||
+	    iop->mem_op.cmd.dtr != op->cmd.dtr ||
+	    iop->mem_op.cmd.opcode != op->cmd.opcode)
+		return false;
+
+	if (iop->mem_op.addr.nbytes != op->addr.nbytes ||
+	    iop->mem_op.addr.dtr != op->addr.dtr)
+		return false;
+
+	if (iop->mem_op.data.dir != op->data.dir ||
+	    iop->mem_op.data.dtr != op->data.dtr)
+		return false;
+
+	if (iop->mem_op.data.dir != SPI_MEM_NO_DATA) {
+		if (iop->mem_op.data.buswidth != op->data.buswidth)
+			return false;
 	}
 
-	if (ispi->swseq_erase) {
-		while (len > 0) {
-			writel(offs, ispi->base + FADDR);
+	return true;
+}
 
-			ret = intel_spi_sw_cycle(ispi, nor->erase_opcode,
-						 0, OPTYPE_WRITE_WITH_ADDR);
-			if (ret)
-				return ret;
+static const struct intel_spi_mem_op *
+intel_spi_match_mem_op(struct intel_spi *ispi, const struct spi_mem_op *op)
+{
+	const struct intel_spi_mem_op *iop;
 
-			offs += erase_size;
-			len -= erase_size;
+	for (iop = ispi->mem_ops; iop->mem_op.cmd.opcode; iop++) {
+		if (intel_spi_cmp_mem_op(iop, op))
+			break;
+	}
+
+	return iop->mem_op.cmd.opcode ? iop : NULL;
+}
+
+static bool intel_spi_supports_mem_op(struct spi_mem *mem,
+				      const struct spi_mem_op *op)
+{
+	struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+	const struct intel_spi_mem_op *iop;
+
+	iop = intel_spi_match_mem_op(ispi, op);
+	if (!iop) {
+		dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode);
+		return false;
+	}
+
+	/*
+	 * For software sequencer check that the opcode is actually
+	 * present in the opmenu if it is locked.
+	 */
+	if (ispi->swseq_reg && ispi->locked) {
+		int i;
+
+		/* Check if it is in the locked opcodes list */
+		for (i = 0; i < ARRAY_SIZE(ispi->opcodes); i++) {
+			if (ispi->opcodes[i] == op->cmd.opcode)
+				return true;
 		}
 
-		return 0;
+		dev_dbg(ispi->dev, "%#x not supported\n", op->cmd.opcode);
+		return false;
 	}
 
-	/* Not needed with HW sequencer erase, make sure it is cleared */
-	ispi->atomic_preopcode = 0;
+	return true;
+}
 
-	while (len > 0) {
-		writel(offs, ispi->base + FADDR);
+static int intel_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+	const struct intel_spi_mem_op *iop;
 
-		val = readl(ispi->base + HSFSTS_CTL);
-		val &= ~(HSFSTS_CTL_FDBC_MASK | HSFSTS_CTL_FCYCLE_MASK);
-		val |= HSFSTS_CTL_AEL | HSFSTS_CTL_FCERR | HSFSTS_CTL_FDONE;
-		val |= cmd;
-		val |= HSFSTS_CTL_FGO;
-		writel(val, ispi->base + HSFSTS_CTL);
+	iop = intel_spi_match_mem_op(ispi, op);
+	if (!iop)
+		return -EOPNOTSUPP;
 
-		ret = intel_spi_wait_hw_busy(ispi);
-		if (ret)
-			return ret;
+	return iop->exec_op(ispi, iop, op);
+}
 
-		status = readl(ispi->base + HSFSTS_CTL);
-		if (status & HSFSTS_CTL_FCERR)
-			return -EIO;
-		else if (status & HSFSTS_CTL_AEL)
-			return -EACCES;
+static const char *intel_spi_get_name(struct spi_mem *mem)
+{
+	const struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+
+	/*
+	 * Return name of the flash controller device to be compatible
+	 * with the MTD version.
+	 */
+	return dev_name(ispi->dev);
+}
+
+static const struct spi_controller_mem_ops intel_spi_mem_ops = {
+	.supports_op = intel_spi_supports_mem_op,
+	.exec_op = intel_spi_exec_mem_op,
+	.get_name = intel_spi_get_name,
+};
+
+#define INTEL_SPI_OP_ADDR(__nbytes)					\
+	{								\
+		.nbytes = __nbytes,					\
+	}
+
+#define INTEL_SPI_OP_NO_DATA						\
+	{								\
+		.dir = SPI_MEM_NO_DATA,					\
+	}
+
+#define INTEL_SPI_OP_DATA_IN(__buswidth)				\
+	{								\
+		.dir = SPI_MEM_DATA_IN,					\
+		.buswidth = __buswidth,					\
+	}
+
+#define INTEL_SPI_OP_DATA_OUT(__buswidth)				\
+	{								\
+		.dir = SPI_MEM_DATA_OUT,				\
+		.buswidth = __buswidth,					\
+	}
+
+#define INTEL_SPI_MEM_OP(__cmd, __addr, __data, __exec_op)		\
+	{								\
+		.mem_op = {						\
+			.cmd = __cmd,					\
+			.addr = __addr,					\
+			.data = __data,					\
+		},							\
+		.exec_op = __exec_op,					\
+	}
+
+#define INTEL_SPI_MEM_OP_REPL(__cmd, __addr, __data, __exec_op, __repl)	\
+	{								\
+		.mem_op = {						\
+			.cmd = __cmd,					\
+			.addr = __addr,					\
+			.data = __data,					\
+		},							\
+		.exec_op = __exec_op,					\
+		.replacement_op = __repl,				\
+	}
+
+/*
+ * The controller handles pretty much everything internally based on the
+ * SFDP data but we want to make sure we only support the operations
+ * actually possible. Only check buswidth and transfer direction, the
+ * core validates data.
+ */
+#define INTEL_SPI_GENERIC_OPS						\
+	/* Status register operations */				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),		\
+			 SPI_MEM_OP_NO_ADDR,				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read_reg),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1),		\
+			 SPI_MEM_OP_NO_ADDR,				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read_reg),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),		\
+			 SPI_MEM_OP_NO_ADDR,				\
+			 INTEL_SPI_OP_DATA_OUT(1),			\
+			 intel_spi_write_reg),				\
+	/* Normal read */						\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	/* Fast read */							\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	/* Read with 4-byte address opcode */				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_4B, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	/* Fast read with 4-byte address opcode */			\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(1),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(2),			\
+			 intel_spi_read),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_READ_FAST_4B, 1),	\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_IN(4),			\
+			 intel_spi_read),				\
+	/* Write operations */						\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1),		\
+			 INTEL_SPI_OP_ADDR(3),				\
+			 INTEL_SPI_OP_DATA_OUT(1),			\
+			 intel_spi_write),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_OUT(1),			\
+			 intel_spi_write),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_PP_4B, 1),		\
+			 INTEL_SPI_OP_ADDR(4),				\
+			 INTEL_SPI_OP_DATA_OUT(1),			\
+			 intel_spi_write),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1),		\
+			 SPI_MEM_OP_NO_ADDR,				\
+			 SPI_MEM_OP_NO_DATA,				\
+			 intel_spi_write_reg),				\
+	INTEL_SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1),		\
+			 SPI_MEM_OP_NO_ADDR,				\
+			 SPI_MEM_OP_NO_DATA,				\
+			 intel_spi_write_reg),				\
+	/* Erase operations */						\
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1),	\
+			      INTEL_SPI_OP_ADDR(3),			\
+			      SPI_MEM_OP_NO_DATA,			\
+			      intel_spi_erase,				\
+			      HSFSTS_CTL_FCYCLE_ERASE),			\
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K, 1),	\
+			      INTEL_SPI_OP_ADDR(4),			\
+			      SPI_MEM_OP_NO_DATA,			\
+			      intel_spi_erase,				\
+			      HSFSTS_CTL_FCYCLE_ERASE),			\
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_BE_4K_4B, 1),	\
+			      INTEL_SPI_OP_ADDR(4),			\
+			      SPI_MEM_OP_NO_DATA,			\
+			      intel_spi_erase,				\
+			      HSFSTS_CTL_FCYCLE_ERASE)			\
+
+static const struct intel_spi_mem_op generic_mem_ops[] = {
+	INTEL_SPI_GENERIC_OPS,
+	{ },
+};
+
+static const struct intel_spi_mem_op erase_64k_mem_ops[] = {
+	INTEL_SPI_GENERIC_OPS,
+	/* 64k sector erase operations */
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1),
+			      INTEL_SPI_OP_ADDR(3),
+			      SPI_MEM_OP_NO_DATA,
+			      intel_spi_erase,
+			      HSFSTS_CTL_FCYCLE_ERASE_64K),
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE, 1),
+			      INTEL_SPI_OP_ADDR(4),
+			      SPI_MEM_OP_NO_DATA,
+			      intel_spi_erase,
+			      HSFSTS_CTL_FCYCLE_ERASE_64K),
+	INTEL_SPI_MEM_OP_REPL(SPI_MEM_OP_CMD(SPINOR_OP_SE_4B, 1),
+			      INTEL_SPI_OP_ADDR(4),
+			      SPI_MEM_OP_NO_DATA,
+			      intel_spi_erase,
+			      HSFSTS_CTL_FCYCLE_ERASE_64K),
+	{ },
+};
+
+static int intel_spi_init(struct intel_spi *ispi)
+{
+	u32 opmenu0, opmenu1, lvscc, uvscc, val;
+	bool erase_64k = false;
+	int i;
+
+	switch (ispi->info->type) {
+	case INTEL_SPI_BYT:
+		ispi->sregs = ispi->base + BYT_SSFSTS_CTL;
+		ispi->pregs = ispi->base + BYT_PR;
+		ispi->nregions = BYT_FREG_NUM;
+		ispi->pr_num = BYT_PR_NUM;
+		ispi->swseq_reg = true;
+		break;
+
+	case INTEL_SPI_LPT:
+		ispi->sregs = ispi->base + LPT_SSFSTS_CTL;
+		ispi->pregs = ispi->base + LPT_PR;
+		ispi->nregions = LPT_FREG_NUM;
+		ispi->pr_num = LPT_PR_NUM;
+		ispi->swseq_reg = true;
+		break;
+
+	case INTEL_SPI_BXT:
+		ispi->sregs = ispi->base + BXT_SSFSTS_CTL;
+		ispi->pregs = ispi->base + BXT_PR;
+		ispi->nregions = BXT_FREG_NUM;
+		ispi->pr_num = BXT_PR_NUM;
+		erase_64k = true;
+		break;
+
+	case INTEL_SPI_CNL:
+		ispi->sregs = NULL;
+		ispi->pregs = ispi->base + CNL_PR;
+		ispi->nregions = CNL_FREG_NUM;
+		ispi->pr_num = CNL_PR_NUM;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Try to disable write protection if user asked to do so */
+	if (writeable && !intel_spi_set_writeable(ispi)) {
+		dev_warn(ispi->dev, "can't disable chip write protection\n");
+		writeable = false;
+	}
 
-		offs += erase_size;
-		len -= erase_size;
+	/* Disable #SMI generation from HW sequencer */
+	val = readl(ispi->base + HSFSTS_CTL);
+	val &= ~HSFSTS_CTL_FSMIE;
+	writel(val, ispi->base + HSFSTS_CTL);
+
+	/*
+	 * Determine whether erase operation should use HW or SW sequencer.
+	 *
+	 * The HW sequencer has a predefined list of opcodes, with only the
+	 * erase opcode being programmable in LVSCC and UVSCC registers.
+	 * If these registers don't contain a valid erase opcode, erase
+	 * cannot be done using HW sequencer.
+	 */
+	lvscc = readl(ispi->base + LVSCC);
+	uvscc = readl(ispi->base + UVSCC);
+	if (!(lvscc & ERASE_OPCODE_MASK) || !(uvscc & ERASE_OPCODE_MASK))
+		ispi->swseq_erase = true;
+	/* SPI controller on Intel BXT supports 64K erase opcode */
+	if (ispi->info->type == INTEL_SPI_BXT && !ispi->swseq_erase)
+		if (!(lvscc & ERASE_64K_OPCODE_MASK) ||
+		    !(uvscc & ERASE_64K_OPCODE_MASK))
+			erase_64k = false;
+
+	if (!ispi->sregs && (ispi->swseq_reg || ispi->swseq_erase)) {
+		dev_err(ispi->dev, "software sequencer not supported, but required\n");
+		return -EINVAL;
 	}
 
+	/*
+	 * Some controllers can only do basic operations using hardware
+	 * sequencer. All other operations are supposed to be carried out
+	 * using software sequencer.
+	 */
+	if (ispi->swseq_reg) {
+		/* Disable #SMI generation from SW sequencer */
+		val = readl(ispi->sregs + SSFSTS_CTL);
+		val &= ~SSFSTS_CTL_FSMIE;
+		writel(val, ispi->sregs + SSFSTS_CTL);
+	}
+
+	/* Check controller's lock status */
+	val = readl(ispi->base + HSFSTS_CTL);
+	ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN);
+
+	if (ispi->locked && ispi->sregs) {
+		/*
+		 * BIOS programs allowed opcodes and then locks down the
+		 * register. So read back what opcodes it decided to support.
+		 * That's the set we are going to support as well.
+		 */
+		opmenu0 = readl(ispi->sregs + OPMENU0);
+		opmenu1 = readl(ispi->sregs + OPMENU1);
+
+		if (opmenu0 && opmenu1) {
+			for (i = 0; i < ARRAY_SIZE(ispi->opcodes) / 2; i++) {
+				ispi->opcodes[i] = opmenu0 >> i * 8;
+				ispi->opcodes[i + 4] = opmenu1 >> i * 8;
+			}
+		}
+	}
+
+	if (erase_64k) {
+		dev_dbg(ispi->dev, "Using erase_64k memory operations");
+		ispi->mem_ops = erase_64k_mem_ops;
+	} else {
+		dev_dbg(ispi->dev, "Using generic memory operations");
+		ispi->mem_ops = generic_mem_ops;
+	}
+
+	intel_spi_dump_regs(ispi);
 	return 0;
 }
 
@@ -896,70 +1177,74 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 	}
 }
 
-static const struct spi_nor_controller_ops intel_spi_controller_ops = {
-	.read_reg = intel_spi_read_reg,
-	.write_reg = intel_spi_write_reg,
-	.read = intel_spi_read,
-	.write = intel_spi_write,
-	.erase = intel_spi_erase,
-};
+static int intel_spi_populate_chip(struct intel_spi *ispi)
+{
+	struct flash_platform_data *pdata;
+	struct spi_board_info chip;
+
+	pdata = devm_kzalloc(ispi->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->nr_parts = 1;
+	pdata->parts = devm_kcalloc(ispi->dev, sizeof(*pdata->parts),
+				    pdata->nr_parts, GFP_KERNEL);
+	if (!pdata->parts)
+		return -ENOMEM;
+
+	intel_spi_fill_partition(ispi, pdata->parts);
+
+	memset(&chip, 0, sizeof(chip));
+	snprintf(chip.modalias, 8, "spi-nor");
+	chip.platform_data = pdata;
 
-struct intel_spi *intel_spi_probe(struct device *dev,
-	struct resource *mem, const struct intel_spi_boardinfo *info)
+	return spi_new_device(ispi->master, &chip) ? 0 : -ENODEV;
+}
+
+/**
+ * intel_spi_probe() - Probe the Intel SPI flash controller
+ * @dev: Pointer to the parent device
+ * @mem: MMIO resource
+ * @info: Platform spefific information
+ *
+ * Probes Intel SPI flash controller and creates the flash chip device.
+ * Returns %0 on success and negative errno in case of failure.
+ */
+int intel_spi_probe(struct device *dev, struct resource *mem,
+		    const struct intel_spi_boardinfo *info)
 {
-	const struct spi_nor_hwcaps hwcaps = {
-		.mask = SNOR_HWCAPS_READ |
-			SNOR_HWCAPS_READ_FAST |
-			SNOR_HWCAPS_PP,
-	};
-	struct mtd_partition part;
+	struct spi_controller *master;
 	struct intel_spi *ispi;
 	int ret;
 
-	if (!info || !mem)
-		return ERR_PTR(-EINVAL);
+	master = devm_spi_alloc_master(dev, sizeof(*ispi));
+	if (!master)
+		return -ENOMEM;
 
-	ispi = devm_kzalloc(dev, sizeof(*ispi), GFP_KERNEL);
-	if (!ispi)
-		return ERR_PTR(-ENOMEM);
+	master->mem_ops = &intel_spi_mem_ops;
+
+	ispi = spi_master_get_devdata(master);
 
 	ispi->base = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(ispi->base))
-		return ERR_CAST(ispi->base);
+		return PTR_ERR(ispi->base);
 
 	ispi->dev = dev;
+	ispi->master = master;
 	ispi->info = info;
 
 	ret = intel_spi_init(ispi);
 	if (ret)
-		return ERR_PTR(ret);
-
-	ispi->nor.dev = ispi->dev;
-	ispi->nor.priv = ispi;
-	ispi->nor.controller_ops = &intel_spi_controller_ops;
-
-	ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
-	if (ret) {
-		dev_info(dev, "failed to locate the chip\n");
-		return ERR_PTR(ret);
-	}
-
-	intel_spi_fill_partition(ispi, &part);
+		return ret;
 
-	ret = mtd_device_register(&ispi->nor.mtd, &part, 1);
+	ret = devm_spi_register_master(dev, master);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
-	return ispi;
+	return intel_spi_populate_chip(ispi);
 }
 EXPORT_SYMBOL_GPL(intel_spi_probe);
 
-int intel_spi_remove(struct intel_spi *ispi)
-{
-	return mtd_device_unregister(&ispi->nor.mtd);
-}
-EXPORT_SYMBOL_GPL(intel_spi_remove);
-
 MODULE_DESCRIPTION("Intel PCH/PCU SPI flash core driver");
 MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-intel.h b/drivers/spi/spi-intel.h
new file mode 100644
index 0000000000000..a4f0327a46ff2
--- /dev/null
+++ b/drivers/spi/spi-intel.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Intel PCH/PCU SPI flash driver.
+ *
+ * Copyright (C) 2016 - 2022, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ */
+
+#ifndef SPI_INTEL_H
+#define SPI_INTEL_H
+
+#include <linux/platform_data/x86/spi-intel.h>
+
+struct resource;
+
+int intel_spi_probe(struct device *dev, struct resource *mem,
+		    const struct intel_spi_boardinfo *info);
+
+#endif /* SPI_INTEL_H */
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index 39967a5eca6d6..ea4a4b1b246a2 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -8,7 +8,7 @@
 #ifndef LPC_ICH_H
 #define LPC_ICH_H
 
-#include <linux/platform_data/x86/intel-spi.h>
+#include <linux/platform_data/x86/spi-intel.h>
 
 /* GPIO resources */
 #define ICH_RES_GPIO	0
diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/spi-intel.h
similarity index 89%
rename from include/linux/platform_data/x86/intel-spi.h
rename to include/linux/platform_data/x86/spi-intel.h
index 7dda3f6904654..a512ec37abbb5 100644
--- a/include/linux/platform_data/x86/intel-spi.h
+++ b/include/linux/platform_data/x86/spi-intel.h
@@ -6,8 +6,8 @@
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
-#ifndef INTEL_SPI_PDATA_H
-#define INTEL_SPI_PDATA_H
+#ifndef SPI_INTEL_PDATA_H
+#define SPI_INTEL_PDATA_H
 
 enum intel_spi_type {
 	INTEL_SPI_BYT = 1,
@@ -28,4 +28,4 @@ struct intel_spi_boardinfo {
 	void *data;
 };
 
-#endif /* INTEL_SPI_PDATA_H */
+#endif /* SPI_INTEL_PDATA_H */
-- 
GitLab


From 4ab13489735d9b5b6e91634eab83922914a3310c Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 9 Feb 2022 15:27:06 +0300
Subject: [PATCH 0370/1586] Documentation / MTD: Rename the intel-spi driver

Since the driver is renamed (and moved) update the BIOS upgrade guide
accordingly from intel-spi to spi-intel. Keep the guide under MTD
documentation because this is pretty much still about MTD and SPI-NOR.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mauro Lima <mauro.lima@eclypsium.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Link: https://lore.kernel.org/r/20220209122706.42439-4-mika.westerberg@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/driver-api/mtd/index.rst                    | 2 +-
 .../driver-api/mtd/{intel-spi.rst => spi-intel.rst}       | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename Documentation/driver-api/mtd/{intel-spi.rst => spi-intel.rst} (94%)

diff --git a/Documentation/driver-api/mtd/index.rst b/Documentation/driver-api/mtd/index.rst
index 436ba5a851d7c..6a4278f409d78 100644
--- a/Documentation/driver-api/mtd/index.rst
+++ b/Documentation/driver-api/mtd/index.rst
@@ -7,6 +7,6 @@ Memory Technology Device (MTD)
 .. toctree::
    :maxdepth: 1
 
-   intel-spi
+   spi-intel
    nand_ecc
    spi-nor
diff --git a/Documentation/driver-api/mtd/intel-spi.rst b/Documentation/driver-api/mtd/spi-intel.rst
similarity index 94%
rename from Documentation/driver-api/mtd/intel-spi.rst
rename to Documentation/driver-api/mtd/spi-intel.rst
index 0465f68792625..df854f20ead15 100644
--- a/Documentation/driver-api/mtd/intel-spi.rst
+++ b/Documentation/driver-api/mtd/spi-intel.rst
@@ -1,5 +1,5 @@
 ==============================
-Upgrading BIOS using intel-spi
+Upgrading BIOS using spi-intel
 ==============================
 
 Many Intel CPUs like Baytrail and Braswell include SPI serial flash host
@@ -11,12 +11,12 @@ avoid accidental (or on purpose) overwrite of the content.
 Not all manufacturers protect the SPI serial flash, mainly because it
 allows upgrading the BIOS image directly from an OS.
 
-The intel-spi driver makes it possible to read and write the SPI serial
+The spi-intel driver makes it possible to read and write the SPI serial
 flash, if certain protection bits are not set and locked. If it finds
 any of them set, the whole MTD device is made read-only to prevent
 partial overwrites. By default the driver exposes SPI serial flash
 contents as read-only but it can be changed from kernel command line,
-passing "intel-spi.writeable=1".
+passing "spi_intel.writeable=1".
 
 Please keep in mind that overwriting the BIOS image on SPI serial flash
 might render the machine unbootable and requires special equipment like
@@ -32,7 +32,7 @@ Linux.
     serial flash. Distros like Debian and Fedora have this prepackaged with
     name "mtd-utils".
 
- 3) Add "intel-spi.writeable=1" to the kernel command line and reboot
+ 3) Add "spi_intel.writeable=1" to the kernel command line and reboot
     the board (you can also reload the driver passing "writeable=1" as
     module parameter to modprobe).
 
-- 
GitLab


From 5790597d7113faabb1714d3d1efa268e36eb4811 Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Mon, 14 Feb 2022 10:20:11 +0800
Subject: [PATCH 0371/1586] spi: Fix warning for Clang build and simplify code

Clang build fails with
spi-sunplus-sp7021.c:405:2: error: variable 'ret' is used
  uninitialized whenever switch default is taken
        default:

simplify code

Restore initializing ret. and add return error at default

Fixes: 47e8fe57a66f ("spi: Modify irq request position and modify parameters")
Reported-by: Tom Rix <trix@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Reported-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/7d91e6ce29f9a8df2c53a47b4b977664020e237a.1644805060.git.lhjeff911@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sunplus-sp7021.c | 38 ++++++++------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
index ba5ed9f7277a3..ade7a0fca8cb3 100644
--- a/drivers/spi/spi-sunplus-sp7021.c
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -69,12 +69,6 @@
 #define SP7021_SPI_DATA_SIZE		(255)
 #define SP7021_FIFO_DATA_LEN		(16)
 
-enum SP_SPI_MODE {
-	SP7021_SLAVE_READ = 0,
-	SP7021_SLAVE_WRITE = 1,
-	SP7021_SPI_IDLE = 2,
-};
-
 enum {
 	SP7021_MASTER_MODE = 0,
 	SP7021_SLAVE_MODE = 1,
@@ -375,40 +369,26 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi
 {
 	struct sp7021_spi_ctlr *pspim = spi_master_get_devdata(ctlr);
 	struct device *dev = pspim->dev;
-	int mode, ret;
+	int ret;
 
-	mode = SP7021_SPI_IDLE;
-	if (xfer->tx_buf && xfer->rx_buf) {
-		dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__);
-		return -EINVAL;
-	} else if (xfer->tx_buf) {
+	if (xfer->tx_buf && !xfer->rx_buf) {
 		xfer->tx_dma = dma_map_single(dev, (void *)xfer->tx_buf,
 					      xfer->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, xfer->tx_dma))
 			return -ENOMEM;
-		mode = SP7021_SLAVE_WRITE;
-	} else if (xfer->rx_buf) {
+		 ret = sp7021_spi_slave_tx(spi, xfer);
+		 dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
+	} else if (xfer->rx_buf && !xfer->tx_buf) {
 		xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len,
 					      DMA_FROM_DEVICE);
 		if (dma_mapping_error(dev, xfer->rx_dma))
 			return -ENOMEM;
-		mode = SP7021_SLAVE_READ;
-	}
-
-	switch (mode) {
-	case SP7021_SLAVE_WRITE:
-		ret = sp7021_spi_slave_tx(spi, xfer);
-		break;
-	case SP7021_SLAVE_READ:
 		ret = sp7021_spi_slave_rx(spi, xfer);
-		break;
-	default:
-		break;
-	}
-	if (xfer->tx_buf)
-		dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
-	if (xfer->rx_buf)
 		dma_unmap_single(dev, xfer->rx_dma, xfer->len, DMA_FROM_DEVICE);
+	} else {
+		dev_dbg(&ctlr->dev, "%s() wrong command\n", __func__);
+		return -EINVAL;
+	}
 
 	spi_finalize_current_transfer(ctlr);
 	return ret;
-- 
GitLab


From f48dc6b9664963107e500aecfc2f4df27dc5afb6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 11 Feb 2022 00:19:54 +0100
Subject: [PATCH 0372/1586] spi: Retire legacy GPIO handling

All drivers using GPIOs as chip select have been rewritten to use
GPIO descriptors passing the ->use_gpio_descriptors flag. Retire
the code and fields used by the legacy GPIO API.

Do not drop the ->use_gpio_descriptors flag: it now only indicates
that we want to use GPIOs in addition to native chip selects.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220210231954.807904-1-linus.walleij@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 125 +++++++++-------------------------------
 include/linux/spi/spi.h |  14 +----
 2 files changed, 30 insertions(+), 109 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index d9832d8e9f446..85f8ae4cc0c0d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -18,7 +18,6 @@
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
-#include <linux/of_gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
@@ -542,7 +541,6 @@ struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 	spi->dev.parent = &ctlr->dev;
 	spi->dev.bus = &spi_bus_type;
 	spi->dev.release = spidev_release;
-	spi->cs_gpio = -ENOENT;
 	spi->mode = ctlr->buswidth_override_bits;
 
 	spin_lock_init(&spi->statistics.lock);
@@ -606,11 +604,8 @@ static int __spi_add_device(struct spi_device *spi)
 		return -ENODEV;
 	}
 
-	/* Descriptors take precedence */
 	if (ctlr->cs_gpiods)
 		spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select];
-	else if (ctlr->cs_gpios)
-		spi->cs_gpio = ctlr->cs_gpios[spi->chip_select];
 
 	/*
 	 * Drivers may modify this initial i/o setup, but will
@@ -940,39 +935,30 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 	spi->controller->last_cs_enable = enable;
 	spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
 
-	if ((spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
-	    !spi->controller->set_cs_timing) && !activate) {
+	if ((spi->cs_gpiod || !spi->controller->set_cs_timing) && !activate) {
 		spi_delay_exec(&spi->cs_hold, NULL);
 	}
 
 	if (spi->mode & SPI_CS_HIGH)
 		enable = !enable;
 
-	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
+	if (spi->cs_gpiod) {
 		if (!(spi->mode & SPI_NO_CS)) {
-			if (spi->cs_gpiod) {
-				/*
-				 * Historically ACPI has no means of the GPIO polarity and
-				 * thus the SPISerialBus() resource defines it on the per-chip
-				 * basis. In order to avoid a chain of negations, the GPIO
-				 * polarity is considered being Active High. Even for the cases
-				 * when _DSD() is involved (in the updated versions of ACPI)
-				 * the GPIO CS polarity must be defined Active High to avoid
-				 * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
-				 * into account.
-				 */
-				if (has_acpi_companion(&spi->dev))
-					gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
-				else
-					/* Polarity handled by GPIO library */
-					gpiod_set_value_cansleep(spi->cs_gpiod, activate);
-			} else {
-				/*
-				 * Invert the enable line, as active low is
-				 * default for SPI.
-				 */
-				gpio_set_value_cansleep(spi->cs_gpio, !enable);
-			}
+			/*
+			 * Historically ACPI has no means of the GPIO polarity and
+			 * thus the SPISerialBus() resource defines it on the per-chip
+			 * basis. In order to avoid a chain of negations, the GPIO
+			 * polarity is considered being Active High. Even for the cases
+			 * when _DSD() is involved (in the updated versions of ACPI)
+			 * the GPIO CS polarity must be defined Active High to avoid
+			 * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
+			 * into account.
+			 */
+			if (has_acpi_companion(&spi->dev))
+				gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
+			else
+				/* Polarity handled by GPIO library */
+				gpiod_set_value_cansleep(spi->cs_gpiod, activate);
 		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
@@ -982,8 +968,7 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 		spi->controller->set_cs(spi, !enable);
 	}
 
-	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio) ||
-	    !spi->controller->set_cs_timing) {
+	if (spi->cs_gpiod || !spi->controller->set_cs_timing) {
 		if (activate)
 			spi_delay_exec(&spi->cs_setup, NULL);
 		else
@@ -2827,46 +2812,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller);
 
-#ifdef CONFIG_OF
-static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
-{
-	int nb, i, *cs;
-	struct device_node *np = ctlr->dev.of_node;
-
-	if (!np)
-		return 0;
-
-	nb = of_gpio_named_count(np, "cs-gpios");
-	ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
-
-	/* Return error only for an incorrectly formed cs-gpios property */
-	if (nb == 0 || nb == -ENOENT)
-		return 0;
-	else if (nb < 0)
-		return nb;
-
-	cs = devm_kcalloc(&ctlr->dev, ctlr->num_chipselect, sizeof(int),
-			  GFP_KERNEL);
-	ctlr->cs_gpios = cs;
-
-	if (!ctlr->cs_gpios)
-		return -ENOMEM;
-
-	for (i = 0; i < ctlr->num_chipselect; i++)
-		cs[i] = -ENOENT;
-
-	for (i = 0; i < nb; i++)
-		cs[i] = of_get_named_gpio(np, "cs-gpios", i);
-
-	return 0;
-}
-#else
-static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
-{
-	return 0;
-}
-#endif
-
 /**
  * spi_get_gpio_descs() - grab chip select GPIOs for the master
  * @ctlr: The SPI master to grab GPIO descriptors for
@@ -3051,22 +2996,15 @@ int spi_register_controller(struct spi_controller *ctlr)
 	 */
 	dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num);
 
-	if (!spi_controller_is_slave(ctlr)) {
-		if (ctlr->use_gpio_descriptors) {
-			status = spi_get_gpio_descs(ctlr);
-			if (status)
-				goto free_bus_id;
-			/*
-			 * A controller using GPIO descriptors always
-			 * supports SPI_CS_HIGH if need be.
-			 */
-			ctlr->mode_bits |= SPI_CS_HIGH;
-		} else {
-			/* Legacy code path for GPIOs from DT */
-			status = of_spi_get_gpio_numbers(ctlr);
-			if (status)
-				goto free_bus_id;
-		}
+	if (!spi_controller_is_slave(ctlr) && ctlr->use_gpio_descriptors) {
+		status = spi_get_gpio_descs(ctlr);
+		if (status)
+			goto free_bus_id;
+		/*
+		 * A controller using GPIO descriptors always
+		 * supports SPI_CS_HIGH if need be.
+		 */
+		ctlr->mode_bits |= SPI_CS_HIGH;
 	}
 
 	/*
@@ -3555,12 +3493,6 @@ int spi_setup(struct spi_device *spi)
 	 */
 	bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD |
 				 SPI_NO_TX | SPI_NO_RX);
-	/*
-	 * Nothing prevents from working with active-high CS in case if it
-	 * is driven by GPIO.
-	 */
-	if (gpio_is_valid(spi->cs_gpio))
-		bad_bits &= ~SPI_CS_HIGH;
 	ugly_bits = bad_bits &
 		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
 		     SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL);
@@ -3686,8 +3618,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 * cs_change is set for each transfer.
 	 */
 	if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
-					  spi->cs_gpiod ||
-					  gpio_is_valid(spi->cs_gpio))) {
+					  spi->cs_gpiod)) {
 		size_t maxsize;
 		int ret;
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index c3746ff35691e..579d71cdf6fab 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -137,9 +137,6 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer);
  *	for driver coldplugging, and in uevents used for hotplugging
  * @driver_override: If the name of a driver is written to this attribute, then
  *	the device will bind to the named driver and only the named driver.
- * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when
- *	not using a GPIO line) use cs_gpiod in new drivers by opting in on
- *	the spi_master.
  * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
  *	not using a GPIO line)
  * @word_delay: delay to be inserted between consecutive
@@ -186,7 +183,6 @@ struct spi_device {
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
 	const char		*driver_override;
-	int			cs_gpio;	/* LEGACY: chip select gpio */
 	struct gpio_desc	*cs_gpiod;	/* chip select gpio desc */
 	struct spi_delay	word_delay; /* inter-word delay */
 	/* CS delays */
@@ -418,17 +414,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  *	     controller has native support for memory like operations.
  * @unprepare_message: undo any work done by prepare_message().
  * @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per
- *	CS number. Any individual value may be -ENOENT for CS lines that
- *	are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods
- *	in new drivers.
  * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
  *	number. Any individual value may be NULL for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
  * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
- *	GPIO descriptors rather than using global GPIO numbers grabbed by the
- *	driver. This will fill in @cs_gpiods and @cs_gpios should not be used,
- *	and SPI devices will have the cs_gpiod assigned rather than cs_gpio.
+ *	GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have
+ *	the cs_gpiod assigned if a GPIO line is found for the chipselect.
  * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will
  *	fill in this field with the first unused native CS, to be used by SPI
  *	controller drivers that need to drive a native CS when using GPIO CS.
@@ -642,7 +633,6 @@ struct spi_controller {
 	const struct spi_controller_mem_ops *mem_ops;
 
 	/* gpio chip select */
-	int			*cs_gpios;
 	struct gpio_desc	**cs_gpiods;
 	bool			use_gpio_descriptors;
 	s8			unused_native_cs;
-- 
GitLab


From 7ba564722d98e3e7bc3922ad4f2885ca0336674e Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:44 -0800
Subject: [PATCH 0373/1586] iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to
 CONFIG_IOMMU_SVA

This CONFIG option originally only referred to the Shared
Virtual Address (SVA) library. But it is now also used for
non-library portions of code.

Drop the "_LIB" suffix so that there is just one configuration
option for all code relating to SVA.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20220207230254.3342514-2-fenghua.yu@intel.com
---
 drivers/iommu/Kconfig         | 6 +++---
 drivers/iommu/Makefile        | 2 +-
 drivers/iommu/intel/Kconfig   | 2 +-
 drivers/iommu/iommu-sva-lib.h | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc0..c79a0df090c08 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,8 +144,8 @@ config IOMMU_DMA
 	select IRQ_MSI_IOMMU
 	select NEED_SG_DMA_LENGTH
 
-# Shared Virtual Addressing library
-config IOMMU_SVA_LIB
+# Shared Virtual Addressing
+config IOMMU_SVA
 	bool
 	select IOASID
 
@@ -379,7 +379,7 @@ config ARM_SMMU_V3
 config ARM_SMMU_V3_SVA
 	bool "Shared Virtual Addressing support for the ARM SMMUv3"
 	depends on ARM_SMMU_V3
-	select IOMMU_SVA_LIB
+	select IOMMU_SVA
 	select MMU_NOTIFIER
 	help
 	  Support for sharing process address spaces with devices using the
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0b..44475a9b3eeaf 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 247d0f2d5fdf7..39a06d245f12e 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM
 	select PCI_PRI
 	select MMU_NOTIFIER
 	select IOASID
-	select IOMMU_SVA_LIB
+	select IOMMU_SVA
 	help
 	  Shared Virtual Memory (SVM) provides a facility for devices
 	  to access DMA resources through process address space by
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 031155010ca85..95dc3ebc19283 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -17,7 +17,7 @@ struct device;
 struct iommu_fault;
 struct iopf_queue;
 
-#ifdef CONFIG_IOMMU_SVA_LIB
+#ifdef CONFIG_IOMMU_SVA
 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
 
 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
@@ -28,7 +28,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
 
-#else /* CONFIG_IOMMU_SVA_LIB */
+#else /* CONFIG_IOMMU_SVA */
 static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
 {
 	return -ENODEV;
@@ -64,5 +64,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_IOMMU_SVA_LIB */
+#endif /* CONFIG_IOMMU_SVA */
 #endif /* _IOMMU_SVA_LIB_H */
-- 
GitLab


From 7a853c2d5951419fdf3c1c9d2b6f5a38f6a6857d Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:45 -0800
Subject: [PATCH 0374/1586] mm: Change CONFIG option for mm->pasid field

This currently depends on CONFIG_IOMMU_SUPPORT. But it is only
needed when CONFIG_IOMMU_SVA option is enabled.

Change the CONFIG guards around definition and initialization
of mm->pasid field.

Suggested-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com
---
 include/linux/mm_types.h | 2 +-
 kernel/fork.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5140e5feb4866..c5cbfd7915adb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -631,7 +631,7 @@ struct mm_struct {
 #endif
 		struct work_struct async_put_work;
 
-#ifdef CONFIG_IOMMU_SUPPORT
+#ifdef CONFIG_IOMMU_SVA
 		u32 pasid;
 #endif
 	} __randomize_layout;
diff --git a/kernel/fork.c b/kernel/fork.c
index d75a528f7b219..6ee7551d3bd25 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1021,7 +1021,7 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 
 static void mm_init_pasid(struct mm_struct *mm)
 {
-#ifdef CONFIG_IOMMU_SUPPORT
+#ifdef CONFIG_IOMMU_SVA
 	mm->pasid = INIT_PASID;
 #endif
 }
-- 
GitLab


From bd265aadd1b7eabd4bf381dc4b0bc48ebd569e88 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 1 Dec 2021 06:27:59 -0800
Subject: [PATCH 0375/1586] MAINTAINERS:  Add Frederic and Neeraj to their RCU
 files

Adding Frederic as an RCU maintainer for kernel/rcu/tree_nocb.h given his
work with offloading and de-offloading callbacks from CPUs.  Also adding
Neeraj for kernel/rcu/tasks.h given his focused work on RCU Tasks Trace.
As in I am reasonably certain that each understands the full contents
of the corresponding file.

Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..5ad49de532b1a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16298,6 +16298,8 @@ F:	tools/testing/selftests/resctrl/
 
 READ-COPY UPDATE (RCU)
 M:	"Paul E. McKenney" <paulmck@kernel.org>
+M:	Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
+M:	Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h)
 M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-- 
GitLab


From 150154aae4311e7e6458903baecdc8fffe981ed3 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Wed, 1 Dec 2021 10:20:53 +0100
Subject: [PATCH 0376/1586] rcu: Fix description of kvfree_rcu()

The kvfree_rcu() header comment's description of the "ptr" parameter
is unclear, therefore rephrase it to make it clear that it is a pointer
to the memory to eventually be passed to kvfree().

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 88b42eb464068..9d7df8d36af07 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  *
  *     kvfree_rcu(ptr);
  *
- * where @ptr is a pointer to kvfree().
+ * where @ptr is the pointer to be freed by kvfree().
  *
  * Please note, head-less way of freeing is permitted to
  * use from a context that has to follow might_sleep()
-- 
GitLab


From 218b957a6959a2fb5b3967fc824072bb89ac2611 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 8 Dec 2021 23:41:53 +0000
Subject: [PATCH 0377/1586] rcu: Add mutex for rcu boost kthread spawning and
 affinity setting

As we handle parallel CPU bringup, we will need to take care to avoid
spawning multiple boost threads, or race conditions when setting their
affinity. Spotted by Paul McKenney.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c        |  1 +
 kernel/rcu/tree.h        |  3 +++
 kernel/rcu/tree_plugin.h | 10 ++++++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0b..d1d1a8c51223b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4570,6 +4570,7 @@ static void __init rcu_init_one(void)
 			init_waitqueue_head(&rnp->exp_wq[2]);
 			init_waitqueue_head(&rnp->exp_wq[3]);
 			spin_lock_init(&rnp->exp_lock);
+			mutex_init(&rnp->boost_kthread_mutex);
 		}
 	}
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd085..3b8b60de07c38 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -110,6 +110,9 @@ struct rcu_node {
 				/*  side effect, not as a lock. */
 	unsigned long boost_time;
 				/* When to start boosting (jiffies). */
+	struct mutex boost_kthread_mutex;
+				/* Exclusion for thread spawning and affinity */
+				/*  manipulation. */
 	struct task_struct *boost_kthread_task;
 				/* kthread that takes care of priority */
 				/*  boosting for this rcu_node structure. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a15..07845dcd33c5e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1172,15 +1172,16 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	struct sched_param sp;
 	struct task_struct *t;
 
+	mutex_lock(&rnp->boost_kthread_mutex);
 	if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
-		return;
+		goto out;
 
 	rcu_state.boost = 1;
 
 	t = kthread_create(rcu_boost_kthread, (void *)rnp,
 			   "rcub/%d", rnp_index);
 	if (WARN_ON_ONCE(IS_ERR(t)))
-		return;
+		goto out;
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->boost_kthread_task = t;
@@ -1188,6 +1189,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
+
+ out:
+	mutex_unlock(&rnp->boost_kthread_mutex);
 }
 
 /*
@@ -1210,6 +1214,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 		return;
 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
 		return;
+	mutex_lock(&rnp->boost_kthread_mutex);
 	for_each_leaf_node_possible_cpu(rnp, cpu)
 		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
 		    cpu != outgoingcpu)
@@ -1218,6 +1223,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	if (cpumask_weight(cm) == 0)
 		cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
 	set_cpus_allowed_ptr(t, cm);
+	mutex_unlock(&rnp->boost_kthread_mutex);
 	free_cpumask_var(cm);
 }
 
-- 
GitLab


From 1fe09ebe7a9c9907f516779fbe4954165dd01529 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 18 Dec 2021 09:30:33 -0800
Subject: [PATCH 0378/1586] rcu: Inline __call_rcu() into call_rcu()

Because __call_rcu() is invoked only by call_rcu(), this commit inlines
the former into the latter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 91 ++++++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 49 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d1d1a8c51223b..f1bb7ccc00847 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2995,9 +2995,47 @@ static void check_cb_ovld(struct rcu_data *rdp)
 	raw_spin_unlock_rcu_node(rnp);
 }
 
-/* Helper function for call_rcu() and friends.  */
-static void
-__call_rcu(struct rcu_head *head, rcu_callback_t func)
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.
+ *
+ * RCU read-side critical sections are delimited by rcu_read_lock()
+ * and rcu_read_unlock(), and may be nested.  In addition, but only in
+ * v5.0 and later, regions of code across which interrupts, preemption,
+ * or softirqs have been disabled also serve as RCU read-side critical
+ * sections.  This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ *
+ * Implementation of these memory-ordering guarantees is described here:
+ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	static atomic_t doublefrees;
 	unsigned long flags;
@@ -3011,7 +3049,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 		/*
 		 * Probable double call_rcu(), so leak the callback.
 		 * Use rcu:rcu_callback trace event to find the previous
-		 * time callback was passed to __call_rcu().
+		 * time callback was passed to call_rcu().
 		 */
 		if (atomic_inc_return(&doublefrees) < 4) {
 			pr_err("%s(): Double-freed CB %p->%pS()!!!  ", __func__, head, head->func);
@@ -3060,51 +3098,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
 		local_irq_restore(flags);
 	}
 }
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.
- *
- * RCU read-side critical sections are delimited by rcu_read_lock()
- * and rcu_read_unlock(), and may be nested.  In addition, but only in
- * v5.0 and later, regions of code across which interrupts, preemption,
- * or softirqs have been disabled also serve as RCU read-side critical
- * sections.  This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- *
- * Implementation of these memory-ordering guarantees is described here:
- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func);
-}
 EXPORT_SYMBOL_GPL(call_rcu);
 
 
-- 
GitLab


From d818cc76e2b4d5f6cebf8c7ce1160d652d7e572b Mon Sep 17 00:00:00 2001
From: Zqiang <qiang1.zhang@intel.com>
Date: Sun, 26 Dec 2021 08:52:04 +0800
Subject: [PATCH 0379/1586] kasan: Record work creation stack trace with
 interrupts enabled

Recording the work creation stack trace for KASAN reports in
call_rcu() is expensive, due to unwinding the stack, but also
due to acquiring depot_lock inside stackdepot (which may be contended).
Because calling kasan_record_aux_stack_noalloc() does not require
interrupts to already be disabled, this may unnecessarily extend
the time with interrupts disabled.

Therefore, move calling kasan_record_aux_stack() before the section
with interrupts disabled.

Acked-by: Marco Elver <elver@google.com>
Signed-off-by: Zqiang <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f1bb7ccc00847..ca8d7dd026eeb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3060,8 +3060,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 	}
 	head->func = func;
 	head->next = NULL;
-	local_irq_save(flags);
 	kasan_record_aux_stack_noalloc(head);
+	local_irq_save(flags);
 	rdp = this_cpu_ptr(&rcu_data);
 
 	/* Add the callback to our list. */
-- 
GitLab


From c09929031018913b5783872a8b8cdddef4a543c7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 4 Jan 2022 10:34:34 -0800
Subject: [PATCH 0380/1586] rcu: Mark writes to the rcu_segcblist structure's
 ->flags field

KCSAN reports data races between the rcu_segcblist_clear_flags() and
rcu_segcblist_set_flags() functions, though misreporting the latter
as a call to rcu_segcblist_is_enabled() from call_rcu().  This commit
converts the updates of this field to WRITE_ONCE(), relying on the
resulting unmarked reads to continue to detect buggy concurrent writes
to this field.

Reported-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
---
 kernel/rcu/rcu_segcblist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index e373fbe44da5e..431cee212467d 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
 static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp,
 					   int flags)
 {
-	rsclp->flags |= flags;
+	WRITE_ONCE(rsclp->flags, rsclp->flags | flags);
 }
 
 static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp,
 					     int flags)
 {
-	rsclp->flags &= ~flags;
+	WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags);
 }
 
 static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp,
-- 
GitLab


From 58d4292bd037b01fbb940a5170817f7d40caa9d5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 14 Jan 2022 16:07:28 -0800
Subject: [PATCH 0381/1586] rcu: Uninline multi-use function: finish_rcuwait()

This is a rarely used function, so uninlining its 3 instructions
is probably a win or a wash - but the main motivation is to
make <linux/rcuwait.h> independent of task_struct details.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcuwait.h | 6 +-----
 kernel/rcu/update.c     | 7 +++++++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 61c56cca95c43..8052d34da7826 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w)
 	rcu_assign_pointer(w->task, current);
 }
 
-static inline void finish_rcuwait(struct rcuwait *w)
-{
-        rcu_assign_pointer(w->task, NULL);
-	__set_current_state(TASK_RUNNING);
-}
+extern void finish_rcuwait(struct rcuwait *w);
 
 #define rcuwait_wait_event(w, condition, state)				\
 ({									\
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 156892c22bb56..180ff9c41fa87 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -407,6 +407,13 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 }
 EXPORT_SYMBOL_GPL(__wait_rcu_gp);
 
+void finish_rcuwait(struct rcuwait *w)
+{
+	rcu_assign_pointer(w->task, NULL);
+	__set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL_GPL(finish_rcuwait);
+
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 void init_rcu_head(struct rcu_head *head)
 {
-- 
GitLab


From e6339d3b443c436c3b8f45eefec2212a8c07065d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 14 Jan 2022 16:16:55 -0800
Subject: [PATCH 0382/1586] rcu: Remove __read_mostly annotations from
 rcu_scheduler_active externs

Remove the __read_mostly attributes from the rcu_scheduler_active
extern declarations, because these attributes are ignored for
prototypes and we'd have to include the full <linux/cache.h> header
to gain this functionally pointless attribute defined.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 2 +-
 include/linux/rcutree.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9d7df8d36af07..e7c39c200e2b0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void)
 
 /* Internal to kernel */
 void rcu_init(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
 void rcu_sched_clock_irq(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 53209d6694001..76665db179fa1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { }
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
-extern int rcu_scheduler_active __read_mostly;
+extern int rcu_scheduler_active;
 void rcu_end_inkernel_boot(void);
 bool rcu_inkernel_boot_has_ended(void);
 bool rcu_is_watching(void);
-- 
GitLab


From 6a2c1d450a6a328027280a854019c55de989e14e Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Sun, 23 Jan 2022 10:38:53 -0800
Subject: [PATCH 0383/1586] rcu: Replace cpumask_weight with cpumask_empty
 where appropriate

In some places, RCU code calls cpumask_weight() to check if any bit of a
given cpumask is set. We can do it more efficiently with cpumask_empty()
because cpumask_empty() stops traversing the cpumask as soon as it finds
first set bit, while cpumask_weight() counts all bits unconditionally.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tree_nocb.h   | 4 ++--
 kernel/rcu/tree_plugin.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index eeafb546a7a09..f83c7b1d61103 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void)
 	struct rcu_data *rdp;
 
 #if defined(CONFIG_NO_HZ_FULL)
-	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+	if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
 		need_rcu_nocb_mask = true;
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
 
@@ -1348,7 +1348,7 @@ static void __init rcu_organize_nocb_kthreads(void)
  */
 void rcu_bind_current_to_nocb(void)
 {
-	if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
+	if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask))
 		WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
 }
 EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 07845dcd33c5e..efd0c87d2ffae 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1220,7 +1220,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 		    cpu != outgoingcpu)
 			cpumask_set_cpu(cpu, cm);
 	cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU));
-	if (cpumask_weight(cm) == 0)
+	if (cpumask_empty(cm))
 		cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU));
 	set_cpus_allowed_ptr(t, cm);
 	mutex_unlock(&rnp->boost_kthread_mutex);
-- 
GitLab


From 631e3893c35e116d16b81b41bea6ba2143db4fa4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 1 Feb 2022 20:18:10 +0100
Subject: [PATCH 0384/1586] ACPI: PM: Print additional debug message in
 acpi_s2idle_wake()

Make acpi_s2idle_wake() print an additional debug message when the
SCI is going to be rearmed for system wakeup to help diagnose
wakeup-related issues.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index d4fbea91ab6b8..d068ff42fce49 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -758,6 +758,8 @@ bool acpi_s2idle_wake(void)
 			return true;
 		}
 
+		pm_pr_dbg("Rearming ACPI SCI for wakeup\n");
+
 		pm_wakeup_clear(acpi_sci_irq);
 		rearm_wake_irq(acpi_sci_irq);
 	}
-- 
GitLab


From 977dc3082285206e7b1fcbc4496671194cfb7980 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 1 Feb 2022 20:18:56 +0100
Subject: [PATCH 0385/1586] ACPI: EC / PM: Print additional debug message in
 acpi_ec_dispatch_gpe()

Make acpi_ec_dispatch_gpe() print an additional debug message after
seeing the EC GPE status bit set to help diagnose wakeup-related
issues.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 46710380a4029..f039565d59ad7 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -2081,8 +2081,11 @@ bool acpi_ec_dispatch_gpe(void)
 	 */
 	spin_lock_irq(&first_ec->lock);
 
-	if (acpi_ec_gpe_status_set(first_ec))
+	if (acpi_ec_gpe_status_set(first_ec)) {
+		pm_pr_dbg("ACPI EC GPE status set\n");
+
 		work_in_progress = advance_transaction(first_ec, false);
+	}
 
 	spin_unlock_irq(&first_ec->lock);
 
-- 
GitLab


From 7a5fbc9bcba5325a45297a4ba00091f39a63a1ed Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:46 -0800
Subject: [PATCH 0386/1586] iommu/ioasid: Introduce a helper to check for valid
 PASIDs

Define a pasid_valid() helper to check if a given PASID is valid.

  [ bp: Massage commit message. ]

Suggested-by: Ashok Raj <ashok.raj@intel.com>
Suggested-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com
---
 include/linux/ioasid.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index e9dacd4b9f6bb..2237f64dbaae4 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
 int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
 void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
 int ioasid_set_data(ioasid_t ioasid, void *data);
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+	return ioasid != INVALID_IOASID;
+}
 
 #else /* !CONFIG_IOASID */
 static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
 	return -ENOTSUPP;
 }
 
+static inline bool pasid_valid(ioasid_t ioasid)
+{
+	return false;
+}
+
 #endif /* CONFIG_IOASID */
 #endif /* __LINUX_IOASID_H */
-- 
GitLab


From a6cbd44093ef305b02ad5f80ed54abf0148a696c Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:47 -0800
Subject: [PATCH 0387/1586] kernel/fork: Initialize mm's PASID

A new mm doesn't have a PASID yet when it's created. Initialize
the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1).

INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be
allocated to a user process. Initializing the process's PASID to 0 may
cause confusion that's why the process uses the reserved kernel legacy
DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly
tells the process doesn't have a valid PASID yet.

Even though the only user of mm_pasid_init() is in fork.c, define it in
<linux/sched/mm.h> as the first of three mm/pasid life cycle functions
(init/set/drop) to keep these all together.

Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com
---
 include/linux/sched/mm.h | 10 ++++++++++
 kernel/fork.c            | 10 ++--------
 mm/init-mm.c             |  4 ++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index aa5f09ca5bcf4..c74d1edbac2f0 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -8,6 +8,7 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 #include <linux/sync_core.h>
+#include <linux/ioasid.h>
 
 /*
  * Routines for handling mm_structs
@@ -433,4 +434,13 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
 }
 #endif
 
+#ifdef CONFIG_IOMMU_SVA
+static inline void mm_pasid_init(struct mm_struct *mm)
+{
+	mm->pasid = INVALID_IOASID;
+}
+#else
+static inline void mm_pasid_init(struct mm_struct *mm) {}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 6ee7551d3bd25..deacd2c17a7f1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,6 +97,7 @@
 #include <linux/scs.h>
 #include <linux/io_uring.h>
 #include <linux/bpf.h>
+#include <linux/sched/mm.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -1019,13 +1020,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif
 }
 
-static void mm_init_pasid(struct mm_struct *mm)
-{
-#ifdef CONFIG_IOMMU_SVA
-	mm->pasid = INIT_PASID;
-#endif
-}
-
 static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
@@ -1054,7 +1048,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_cpumask(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
-	mm_init_pasid(mm);
+	mm_pasid_init(mm);
 	RCU_INIT_POINTER(mm->exe_file, NULL);
 	mmu_notifier_subscriptions_init(mm);
 	init_tlb_flush_pending(mm);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index b4a6f38fb51dd..fbe7844d0912f 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -10,6 +10,7 @@
 
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
+#include <linux/ioasid.h>
 #include <asm/mmu.h>
 
 #ifndef INIT_MM_CONTEXT
@@ -38,6 +39,9 @@ struct mm_struct init_mm = {
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.user_ns	= &init_user_ns,
 	.cpu_bitmap	= CPU_BITS_NONE,
+#ifdef CONFIG_IOMMU_SVA
+	.pasid		= INVALID_IOASID,
+#endif
 	INIT_MM_CONTEXT(init_mm)
 };
 
-- 
GitLab


From 8cb37a5974a48569aab8a1736d21399fddbdbdb2 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 31 Jan 2022 10:05:20 +0100
Subject: [PATCH 0388/1586] stack: Introduce CONFIG_RANDOMIZE_KSTACK_OFFSET

The randomize_kstack_offset feature is unconditionally compiled in when
the architecture supports it.

To add constraints on compiler versions, we require a dedicated Kconfig
variable. Therefore, introduce RANDOMIZE_KSTACK_OFFSET.

Furthermore, this option is now also configurable by EXPERT kernels:
while the feature is supposed to have zero performance overhead when
disabled, due to its use of static branches, there are few cases where
giving a distribution the option to disable the feature entirely makes
sense. For example, in very resource constrained environments, which
would never enable the feature to begin with, in which case the
additional kernel code size increase would be redundant.

Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220131090521.1947110-1-elver@google.com
---
 arch/Kconfig                     | 23 ++++++++++++++++++-----
 include/linux/randomize_kstack.h |  5 +++++
 init/main.c                      |  2 +-
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 678a80713b213..2cde48d9b77c4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1159,16 +1159,29 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	  to the compiler, so it will attempt to add canary checks regardless
 	  of the static branch state.
 
-config RANDOMIZE_KSTACK_OFFSET_DEFAULT
-	bool "Randomize kernel stack offset on syscall entry"
+config RANDOMIZE_KSTACK_OFFSET
+	bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT
+	default y
 	depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	help
 	  The kernel stack offset can be randomized (after pt_regs) by
 	  roughly 5 bits of entropy, frustrating memory corruption
 	  attacks that depend on stack address determinism or
-	  cross-syscall address exposures. This feature is controlled
-	  by kernel boot param "randomize_kstack_offset=on/off", and this
-	  config chooses the default boot state.
+	  cross-syscall address exposures.
+
+	  The feature is controlled via the "randomize_kstack_offset=on/off"
+	  kernel boot param, and if turned off has zero overhead due to its use
+	  of static branches (see JUMP_LABEL).
+
+	  If unsure, say Y.
+
+config RANDOMIZE_KSTACK_OFFSET_DEFAULT
+	bool "Default state of kernel stack offset randomization"
+	depends on RANDOMIZE_KSTACK_OFFSET
+	help
+	  Kernel stack offset randomization is controlled by kernel boot param
+	  "randomize_kstack_offset=on/off", and this config chooses the default
+	  boot state.
 
 config ARCH_OPTIONAL_KERNEL_RWX
 	def_bool n
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index bebc911161b6f..91f1b990a3c35 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_RANDOMIZE_KSTACK_H
 #define _LINUX_RANDOMIZE_KSTACK_H
 
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
 #include <linux/percpu-defs.h>
@@ -50,5 +51,9 @@ void *__builtin_alloca(size_t size);
 		raw_cpu_write(kstack_offset, offset);			\
 	}								\
 } while (0)
+#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+#define add_random_kstack_offset()		do { } while (0)
+#define choose_random_kstack_offset(rand)	do { } while (0)
+#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 
 #endif
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c09..560f45c27ffe8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -853,7 +853,7 @@ static void __init mm_init(void)
 	pti_init();
 }
 
-#ifdef CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			   randomize_kstack_offset);
 DEFINE_PER_CPU(u32, kstack_offset);
-- 
GitLab


From efa90c11f62e6b7252fb75efe2787056872a627c Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 31 Jan 2022 10:05:21 +0100
Subject: [PATCH 0389/1586] stack: Constrain and fix stack offset randomization
 with Clang builds

All supported versions of Clang perform auto-init of __builtin_alloca()
when stack auto-init is on (CONFIG_INIT_STACK_ALL_{ZERO,PATTERN}).

add_random_kstack_offset() uses __builtin_alloca() to add a stack
offset. This means, when CONFIG_INIT_STACK_ALL_{ZERO,PATTERN} is
enabled, add_random_kstack_offset() will auto-init that unused portion
of the stack used to add an offset.

There are several problems with this:

	1. These offsets can be as large as 1023 bytes. Performing
	   memset() on them isn't exactly cheap, and this is done on
	   every syscall entry.

	2. Architectures adding add_random_kstack_offset() to syscall
	   entry implemented in C require them to be 'noinstr' (e.g. see
	   x86 and s390). The potential problem here is that a call to
	   memset may occur, which is not noinstr.

A x86_64 defconfig kernel with Clang 11 and CONFIG_VMLINUX_VALIDATION shows:

 | vmlinux.o: warning: objtool: do_syscall_64()+0x9d: call to memset() leaves .noinstr.text section
 | vmlinux.o: warning: objtool: do_int80_syscall_32()+0xab: call to memset() leaves .noinstr.text section
 | vmlinux.o: warning: objtool: __do_fast_syscall_32()+0xe2: call to memset() leaves .noinstr.text section
 | vmlinux.o: warning: objtool: fixup_bad_iret()+0x2f: call to memset() leaves .noinstr.text section

Clang 14 (unreleased) will introduce a way to skip alloca initialization
via __builtin_alloca_uninitialized() (https://reviews.llvm.org/D115440).

Constrain RANDOMIZE_KSTACK_OFFSET to only be enabled if no stack
auto-init is enabled, the compiler is GCC, or Clang is version 14+. Use
__builtin_alloca_uninitialized() if the compiler provides it, as is done
by Clang 14.

Link: https://lkml.kernel.org/r/YbHTKUjEejZCLyhX@elver.google.com
Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220131090521.1947110-2-elver@google.com
---
 arch/Kconfig                     |  1 +
 include/linux/randomize_kstack.h | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 2cde48d9b77c4..c5b50bfe31c1a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1163,6 +1163,7 @@ config RANDOMIZE_KSTACK_OFFSET
 	bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT
 	default y
 	depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+	depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000
 	help
 	  The kernel stack offset can be randomized (after pt_regs) by
 	  roughly 5 bits of entropy, frustrating memory corruption
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 91f1b990a3c35..1468caf001c05 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -17,8 +17,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
  * alignment. Also, since this use is being explicitly masked to a max of
  * 10 bits, stack-clash style attacks are unlikely. For more details see
  * "VLAs" in Documentation/process/deprecated.rst
+ *
+ * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
+ * only with Clang and not GCC). Initializing the unused area on each syscall
+ * entry is expensive, and generating an implicit call to memset() may also be
+ * problematic (such as in noinstr functions). Therefore, if the compiler
+ * supports it (which it should if it initializes allocas), always use the
+ * "uninitialized" variant of the builtin.
  */
-void *__builtin_alloca(size_t size);
+#if __has_builtin(__builtin_alloca_uninitialized)
+#define __kstack_alloca __builtin_alloca_uninitialized
+#else
+#define __kstack_alloca __builtin_alloca
+#endif
+
 /*
  * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
  * "VLA" from being unbounded (see above). 10 bits leaves enough room for
@@ -37,7 +49,7 @@ void *__builtin_alloca(size_t size);
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
 		u32 offset = raw_cpu_read(kstack_offset);		\
-		u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset));	\
+		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
 		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
-- 
GitLab


From 9aa60f3cbafb0facc15a6114df7616a1bf23a88d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 4 Feb 2022 18:40:20 +0100
Subject: [PATCH 0390/1586] ACPI: EC: Do not return result from
 advance_transaction()

Notice that the if the event state is EC_EVENT_READY, the event
handling work cannot be pending, so it is not necessary to check
the return value of queue_work() in acpi_ec_submit_event().
Moreover, whether or not there is any EC work pending at the
moment can always be checked by looking at the events_in_progress
and queries_in_progress counters, so acpi_ec_submit_event() and
consequently advance_transaction() need not return results.

Accordingly, make acpi_ec_dispatch_gpe() always use the counters
mentioned above (for first_ec) to check if there is any pending EC
work to flush and turn both acpi_ec_submit_event() and
advance_transaction() into void functions (again, because they were
void functions in the past).

While at it, add a clarifying comment about the acpi_ec_mask_events()
call in advance_transaction().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index f039565d59ad7..e202c0c370b1a 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -168,7 +168,7 @@ struct acpi_ec_query {
 };
 
 static int acpi_ec_submit_query(struct acpi_ec *ec);
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt);
+static void advance_transaction(struct acpi_ec *ec, bool interrupt);
 static void acpi_ec_event_handler(struct work_struct *work);
 
 struct acpi_ec *first_ec;
@@ -441,11 +441,15 @@ static bool acpi_ec_submit_flushable_request(struct acpi_ec *ec)
 	return true;
 }
 
-static bool acpi_ec_submit_event(struct acpi_ec *ec)
+static void acpi_ec_submit_event(struct acpi_ec *ec)
 {
+	/*
+	 * It is safe to mask the events here, because acpi_ec_close_event()
+	 * will run at least once after this.
+	 */
 	acpi_ec_mask_events(ec);
 	if (!acpi_ec_event_enabled(ec))
-		return false;
+		return;
 
 	if (ec->event_state == EC_EVENT_READY) {
 		ec_dbg_evt("Command(%s) submitted/blocked",
@@ -460,17 +464,11 @@ static bool acpi_ec_submit_event(struct acpi_ec *ec)
 		 * queue up the event work to start the same loop again.
 		 */
 		if (ec->events_to_process++ > 0)
-			return true;
+			return;
 
 		ec->events_in_progress++;
-		return queue_work(ec_wq, &ec->work);
+		queue_work(ec_wq, &ec->work);
 	}
-
-	/*
-	 * The event handling work has not been completed yet, so it needs to be
-	 * flushed.
-	 */
-	return true;
 }
 
 static void acpi_ec_complete_event(struct acpi_ec *ec)
@@ -655,11 +653,10 @@ static void acpi_ec_spurious_interrupt(struct acpi_ec *ec, struct transaction *t
 		acpi_ec_mask_events(ec);
 }
 
-static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
+static void advance_transaction(struct acpi_ec *ec, bool interrupt)
 {
 	struct transaction *t = ec->curr;
 	bool wakeup = false;
-	bool ret = false;
 	u8 status;
 
 	ec_dbg_stm("%s (%d)", interrupt ? "IRQ" : "TASK", smp_processor_id());
@@ -724,12 +721,10 @@ static bool advance_transaction(struct acpi_ec *ec, bool interrupt)
 
 out:
 	if (status & ACPI_EC_FLAG_SCI)
-		ret = acpi_ec_submit_event(ec);
+		acpi_ec_submit_event(ec);
 
 	if (wakeup && interrupt)
 		wake_up(&ec->wait);
-
-	return ret;
 }
 
 static void start_transaction(struct acpi_ec *ec)
@@ -2051,6 +2046,11 @@ void acpi_ec_set_gpe_wake_mask(u8 action)
 		acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
 }
 
+static bool acpi_ec_work_in_progress(struct acpi_ec *ec)
+{
+	return ec->events_in_progress + ec->queries_in_progress > 0;
+}
+
 bool acpi_ec_dispatch_gpe(void)
 {
 	bool work_in_progress = false;
@@ -2084,7 +2084,8 @@ bool acpi_ec_dispatch_gpe(void)
 	if (acpi_ec_gpe_status_set(first_ec)) {
 		pm_pr_dbg("ACPI EC GPE status set\n");
 
-		work_in_progress = advance_transaction(first_ec, false);
+		advance_transaction(first_ec, false);
+		work_in_progress = acpi_ec_work_in_progress(first_ec);
 	}
 
 	spin_unlock_irq(&first_ec->lock);
@@ -2102,8 +2103,7 @@ bool acpi_ec_dispatch_gpe(void)
 
 		spin_lock_irq(&first_ec->lock);
 
-		work_in_progress = first_ec->events_in_progress +
-			first_ec->queries_in_progress > 0;
+		work_in_progress = acpi_ec_work_in_progress(first_ec);
 
 		spin_unlock_irq(&first_ec->lock);
 	} while (work_in_progress && !pm_wakeup_pending());
-- 
GitLab


From 54b86141d71db2a16743cfa4a3417180d7feb24a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 4 Feb 2022 18:40:55 +0100
Subject: [PATCH 0391/1586] ACPI: EC: Reduce indentation level in
 acpi_ec_submit_event()

The indentation level in acpi_ec_submit_event() can be reduced, so
do that and while at it fix a typo in the comment affected by that
change.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e202c0c370b1a..bba96df3b88a9 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -451,24 +451,25 @@ static void acpi_ec_submit_event(struct acpi_ec *ec)
 	if (!acpi_ec_event_enabled(ec))
 		return;
 
-	if (ec->event_state == EC_EVENT_READY) {
-		ec_dbg_evt("Command(%s) submitted/blocked",
-			   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
+	if (ec->event_state != EC_EVENT_READY)
+		return;
 
-		ec->event_state = EC_EVENT_IN_PROGRESS;
-		/*
-		 * If events_to_process is greqter than 0 at this point, the
-		 * while () loop in acpi_ec_event_handler() is still running
-		 * and incrementing events_to_process will cause it to invoke
-		 * acpi_ec_submit_query() once more, so it is not necessary to
-		 * queue up the event work to start the same loop again.
-		 */
-		if (ec->events_to_process++ > 0)
-			return;
+	ec_dbg_evt("Command(%s) submitted/blocked",
+		   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
 
-		ec->events_in_progress++;
-		queue_work(ec_wq, &ec->work);
-	}
+	ec->event_state = EC_EVENT_IN_PROGRESS;
+	/*
+	 * If events_to_process is greater than 0 at this point, the while ()
+	 * loop in acpi_ec_event_handler() is still running and incrementing
+	 * events_to_process will cause it to invoke acpi_ec_submit_query() once
+	 * more, so it is not necessary to queue up the event work to start the
+	 * same loop again.
+	 */
+	if (ec->events_to_process++ > 0)
+		return;
+
+	ec->events_in_progress++;
+	queue_work(ec_wq, &ec->work);
 }
 
 static void acpi_ec_complete_event(struct acpi_ec *ec)
-- 
GitLab


From 13a62d0e13308d62426c38223a3b6f78878f2173 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 4 Feb 2022 18:43:14 +0100
Subject: [PATCH 0392/1586] ACPI: EC: Rearrange code in acpi_ec_submit_event()

Rearange acpi_ec_event_handler() so as to avoid releasing ec->lock
and acquiring it again right away in the case when ec_event_clearing
is not ACPI_EC_EVT_TIMING_EVENT.

This also reduces the number of checks done by acpi_ec_event_handler()
in that case.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index bba96df3b88a9..a1b871a418f87 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1238,6 +1238,7 @@ static void acpi_ec_event_handler(struct work_struct *work)
 		acpi_ec_submit_query(ec);
 
 		spin_lock_irq(&ec->lock);
+
 		ec->events_to_process--;
 	}
 
@@ -1246,27 +1247,30 @@ static void acpi_ec_event_handler(struct work_struct *work)
 	 * event handling work again regardless of whether or not the query
 	 * queued up above is processed successfully.
 	 */
-	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT)
+	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT) {
+		bool guard_timeout;
+
 		acpi_ec_complete_event(ec);
-	else
-		acpi_ec_close_event(ec);
 
-	spin_unlock_irq(&ec->lock);
+		ec_dbg_evt("Event stopped");
+
+		spin_unlock_irq(&ec->lock);
 
-	ec_dbg_evt("Event stopped");
+		guard_timeout = !!ec_guard(ec);
 
-	if (ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT && ec_guard(ec)) {
 		spin_lock_irq(&ec->lock);
 
 		/* Take care of SCI_EVT unless someone else is doing that. */
-		if (!ec->curr)
+		if (guard_timeout && !ec->curr)
 			advance_transaction(ec, false);
+	} else {
+		acpi_ec_close_event(ec);
 
-		spin_unlock_irq(&ec->lock);
+		ec_dbg_evt("Event stopped");
 	}
 
-	spin_lock_irq(&ec->lock);
 	ec->events_in_progress--;
+
 	spin_unlock_irq(&ec->lock);
 }
 
-- 
GitLab


From 6b22ce002f755e34e9a92ce5a41ffda0b729b892 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Thu, 10 Feb 2022 10:26:53 +0900
Subject: [PATCH 0393/1586] docs: Makefile: Add -no-shell-escape option to
 LATEXOPTS

It turns out that LaTeX enables \write18, which allows (some) shell
commands to be executed from the document source, by default.  This the
often-seen warning during a pdfdocs build:

  restricted \write18 enabled

That is a potential security problem and is entirely unnecessary; nothing
in the kernel PDF docs build needs that capability.  So disable \write18
explicitly.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/519bd2d9-1bee-03e1-eeb4-d9883c18be0c@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9f4bd42cef189..64d44c1ecad31 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -26,7 +26,7 @@ SPHINX_CONF   = conf.py
 PAPER         =
 BUILDDIR      = $(obj)/output
 PDFLATEX      = xelatex
-LATEXOPTS     = -interaction=batchmode
+LATEXOPTS     = -interaction=batchmode -no-shell-escape
 
 ifeq ($(KBUILD_VERBOSE),0)
 SPHINXOPTS    += "-q"
-- 
GitLab


From 701fac40384f07197b106136012804c3cae0b3de Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:48 -0800
Subject: [PATCH 0394/1586] iommu/sva: Assign a PASID to mm on PASID allocation
 and free it on mm exit

PASIDs are process-wide. It was attempted to use refcounted PASIDs to
free them when the last thread drops the refcount. This turned out to
be complex and error prone. Given the fact that the PASID space is 20
bits, which allows up to 1M processes to have a PASID associated
concurrently, PASID resource exhaustion is not a realistic concern.

Therefore, it was decided to simplify the approach and stick with lazy
on demand PASID allocation, but drop the eager free approach and make an
allocated PASID's lifetime bound to the lifetime of the process.

Get rid of the refcounting mechanisms and replace/rename the interfaces
to reflect this new approach.

  [ bp: Massage commit message. ]

Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  5 +--
 drivers/iommu/intel/iommu.c                   |  4 +-
 drivers/iommu/intel/svm.c                     |  9 -----
 drivers/iommu/ioasid.c                        | 39 ++-----------------
 drivers/iommu/iommu-sva-lib.c                 | 39 ++++++-------------
 drivers/iommu/iommu-sva-lib.h                 |  1 -
 include/linux/ioasid.h                        | 12 +-----
 include/linux/sched/mm.h                      | 16 ++++++++
 kernel/fork.c                                 |  1 +
 9 files changed, 38 insertions(+), 88 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index a737ba5f727e6..22ddd05bbdcd0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 	bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
 	if (IS_ERR(bond->smmu_mn)) {
 		ret = PTR_ERR(bond->smmu_mn);
-		goto err_free_pasid;
+		goto err_free_bond;
 	}
 
 	list_add(&bond->list, &master->bonds);
 	return &bond->sva;
 
-err_free_pasid:
-	iommu_sva_free_pasid(mm);
 err_free_bond:
 	kfree(bond);
 	return ERR_PTR(ret);
@@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
 	if (refcount_dec_and_test(&bond->refs)) {
 		list_del(&bond->list);
 		arm_smmu_mmu_notifier_put(bond->smmu_mn);
-		iommu_sva_free_pasid(bond->mm);
 		kfree(bond);
 	}
 	mutex_unlock(&sva_lock);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 92fea3fbbb114..ef03b2176bbd3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4781,7 +4781,7 @@ attach_failed:
 link_failed:
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
+		ioasid_free(domain->default_pasid);
 
 	return ret;
 }
@@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
+		ioasid_free(domain->default_pasid);
 }
 
 static int prepare_domain_attach_device(struct iommu_domain *domain,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5b5d69b04fcc8..51ac2096b3da7 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
 	return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
 }
 
-static void intel_svm_free_pasid(struct mm_struct *mm)
-{
-	iommu_sva_free_pasid(mm);
-}
-
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 					   struct device *dev,
 					   struct mm_struct *mm,
@@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 				kfree(svm);
 			}
 		}
-		/* Drop a PASID reference and free it if no reference. */
-		intel_svm_free_pasid(mm);
 	}
 out:
 	return ret;
@@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void
 	}
 
 	sva = intel_svm_bind_mm(iommu, dev, mm, flags);
-	if (IS_ERR_OR_NULL(sva))
-		intel_svm_free_pasid(mm);
 	mutex_unlock(&pasid_mutex);
 
 	return sva;
diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
index 06fee7416816b..a786c034907c4 100644
--- a/drivers/iommu/ioasid.c
+++ b/drivers/iommu/ioasid.c
@@ -2,7 +2,7 @@
 /*
  * I/O Address Space ID allocator. There is one global IOASID space, split into
  * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
- * free IOASIDs with ioasid_alloc and ioasid_put.
+ * free IOASIDs with ioasid_alloc() and ioasid_free().
  */
 #include <linux/ioasid.h>
 #include <linux/module.h>
@@ -15,7 +15,6 @@ struct ioasid_data {
 	struct ioasid_set *set;
 	void *private;
 	struct rcu_head rcu;
-	refcount_t refs;
 };
 
 /*
@@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
 
 	data->set = set;
 	data->private = private;
-	refcount_set(&data->refs, 1);
 
 	/*
 	 * Custom allocator needs allocator data to perform platform specific
@@ -348,35 +346,11 @@ exit_free:
 EXPORT_SYMBOL_GPL(ioasid_alloc);
 
 /**
- * ioasid_get - obtain a reference to the IOASID
- * @ioasid: the ID to get
- */
-void ioasid_get(ioasid_t ioasid)
-{
-	struct ioasid_data *ioasid_data;
-
-	spin_lock(&ioasid_allocator_lock);
-	ioasid_data = xa_load(&active_allocator->xa, ioasid);
-	if (ioasid_data)
-		refcount_inc(&ioasid_data->refs);
-	else
-		WARN_ON(1);
-	spin_unlock(&ioasid_allocator_lock);
-}
-EXPORT_SYMBOL_GPL(ioasid_get);
-
-/**
- * ioasid_put - Release a reference to an ioasid
+ * ioasid_free - Free an ioasid
  * @ioasid: the ID to remove
- *
- * Put a reference to the IOASID, free it when the number of references drops to
- * zero.
- *
- * Return: %true if the IOASID was freed, %false otherwise.
  */
-bool ioasid_put(ioasid_t ioasid)
+void ioasid_free(ioasid_t ioasid)
 {
-	bool free = false;
 	struct ioasid_data *ioasid_data;
 
 	spin_lock(&ioasid_allocator_lock);
@@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid)
 		goto exit_unlock;
 	}
 
-	free = refcount_dec_and_test(&ioasid_data->refs);
-	if (!free)
-		goto exit_unlock;
-
 	active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
 	/* Custom allocator needs additional steps to free the xa element */
 	if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
@@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid)
 
 exit_unlock:
 	spin_unlock(&ioasid_allocator_lock);
-	return free;
 }
-EXPORT_SYMBOL_GPL(ioasid_put);
+EXPORT_SYMBOL_GPL(ioasid_free);
 
 /**
  * ioasid_find - Find IOASID data
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index bd41405d34e95..1065061438960 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
  *
  * Try to allocate a PASID for this mm, or take a reference to the existing one
  * provided it fits within the [@min, @max] range. On success the PASID is
- * available in mm->pasid, and must be released with iommu_sva_free_pasid().
- * @min must be greater than 0, because 0 indicates an unused mm->pasid.
+ * available in mm->pasid and will be available for the lifetime of the mm.
  *
  * Returns 0 on success and < 0 on error.
  */
@@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
 		return -EINVAL;
 
 	mutex_lock(&iommu_sva_lock);
-	if (mm->pasid) {
-		if (mm->pasid >= min && mm->pasid <= max)
-			ioasid_get(mm->pasid);
-		else
+	/* Is a PASID already associated with this mm? */
+	if (pasid_valid(mm->pasid)) {
+		if (mm->pasid < min || mm->pasid >= max)
 			ret = -EOVERFLOW;
-	} else {
-		pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
-		if (pasid == INVALID_IOASID)
-			ret = -ENOMEM;
-		else
-			mm->pasid = pasid;
+		goto out;
 	}
+
+	pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
+	if (!pasid_valid(pasid))
+		ret = -ENOMEM;
+	else
+		mm_pasid_set(mm, pasid);
+out:
 	mutex_unlock(&iommu_sva_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
 
-/**
- * iommu_sva_free_pasid - Release the mm's PASID
- * @mm: the mm
- *
- * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
- */
-void iommu_sva_free_pasid(struct mm_struct *mm)
-{
-	mutex_lock(&iommu_sva_lock);
-	if (ioasid_put(mm->pasid))
-		mm->pasid = 0;
-	mutex_unlock(&iommu_sva_lock);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
-
 /* ioasid_find getter() requires a void * argument */
 static bool __mmget_not_zero(void *mm)
 {
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 95dc3ebc19283..8909ea1094e3a 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -9,7 +9,6 @@
 #include <linux/mm_types.h>
 
 int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
-void iommu_sva_free_pasid(struct mm_struct *mm);
 struct mm_struct *iommu_sva_find(ioasid_t pasid);
 
 /* I/O Page fault */
diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index 2237f64dbaae4..af1c9d62e6428 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -34,8 +34,7 @@ struct ioasid_allocator_ops {
 #if IS_ENABLED(CONFIG_IOASID)
 ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
 		      void *private);
-void ioasid_get(ioasid_t ioasid);
-bool ioasid_put(ioasid_t ioasid);
+void ioasid_free(ioasid_t ioasid);
 void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
 		  bool (*getter)(void *));
 int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
@@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
 	return INVALID_IOASID;
 }
 
-static inline void ioasid_get(ioasid_t ioasid)
-{
-}
-
-static inline bool ioasid_put(ioasid_t ioasid)
-{
-	return false;
-}
+static inline void ioasid_free(ioasid_t ioasid) { }
 
 static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
 				bool (*getter)(void *))
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c74d1edbac2f0..a80356e9dc69a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -439,8 +439,24 @@ static inline void mm_pasid_init(struct mm_struct *mm)
 {
 	mm->pasid = INVALID_IOASID;
 }
+
+/* Associate a PASID with an mm_struct: */
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
+{
+	mm->pasid = pasid;
+}
+
+static inline void mm_pasid_drop(struct mm_struct *mm)
+{
+	if (pasid_valid(mm->pasid)) {
+		ioasid_free(mm->pasid);
+		mm->pasid = INVALID_IOASID;
+	}
+}
 #else
 static inline void mm_pasid_init(struct mm_struct *mm) {}
+static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
+static inline void mm_pasid_drop(struct mm_struct *mm) {}
 #endif
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index deacd2c17a7f1..c03c6682464c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1115,6 +1115,7 @@ static inline void __mmput(struct mm_struct *mm)
 	}
 	if (mm->binfmt)
 		module_put(mm->binfmt->module);
+	mm_pasid_drop(mm);
 	mmdrop(mm);
 }
 
-- 
GitLab


From dc7507ddce593cbd9c93330024a5658db6f8ef73 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:49 -0800
Subject: [PATCH 0395/1586] x86/fpu: Clear PASID when copying fpstate

The kernel must allocate a Process Address Space ID (PASID) on behalf of
each process which will use ENQCMD and program it into the new MSR to
communicate the process identity to platform hardware. ENQCMD uses the
PASID stored in this MSR to tag requests from this process.

The PASID state must be cleared on fork() since fork creates a
new address space.

For clone(), it would be functionally OK to copy the PASID. However,
clearing it is _also_ functionally OK since any PASID use will trigger
the #GP handler to populate the MSR.

Copying the PASID state has two main downsides:
 * It requires differentiating fork() and clone() in the code,
   both in the FPU code and keeping tsk->pasid_activated consistent.
 * It guarantees that the PASID is out of its init state, which
   incurs small but non-zero cost on every XSAVE/XRSTOR.

The main downside of clearing the PASID at fpstate copy is the future,
one-time #GP for the thread.

Use the simplest approach: clear the PASID state both on clone() and
fork().  Rely on the #GP handler for MSR population in children.

Also, just clear the PASID bit from xfeatures if XSAVE is supported.
This will have no effect on systems that do not have PASID support.  It
is virtually zero overhead because 'dst_fpu' was just written and
the whole thing is cache hot.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-7-fenghua.yu@intel.com
---
 arch/x86/kernel/fpu/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8dea01ffc5c18..19821f027cb39 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
 		fpu_inherit_perms(dst_fpu);
 	fpregs_unlock();
 
+	/*
+	 * Children never inherit PASID state.
+	 * Force it to have its init value:
+	 */
+	if (use_xsave())
+		dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
+
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
 
-- 
GitLab


From a3d29e8291b622780eb6e4e3eeaf2b24ec78fd43 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Feb 2022 15:02:50 -0800
Subject: [PATCH 0396/1586] sched: Define and initialize a flag to identify
 valid PASID in the task

Add a new single bit field to the task structure to track whether this task
has initialized the IA32_PASID MSR to the mm's PASID.

Initialize the field to zero when creating a new task with fork/clone.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com
---
 include/linux/sched.h | 3 +++
 kernel/fork.c         | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248b..4e5de3aed410d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -938,6 +938,9 @@ struct task_struct {
 	/* Recursion prevention for eventfd_signal() */
 	unsigned			in_eventfd_signal:1;
 #endif
+#ifdef CONFIG_IOMMU_SVA
+	unsigned			pasid_activated:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index c03c6682464c1..51fd1df994b78 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -968,6 +968,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->use_memdelay = 0;
 #endif
 
+#ifdef CONFIG_IOMMU_SVA
+	tsk->pasid_activated = 0;
+#endif
+
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
-- 
GitLab


From fa6af69f38d3f409bedc55d0112eec36ed526d4b Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:51 -0800
Subject: [PATCH 0397/1586] x86/traps: Demand-populate PASID MSR via #GP

All tasks start with PASID state disabled. This means that the first
time they execute an ENQCMD instruction they will take a #GP fault.

Modify the #GP fault handler to check if the "mm" for the task has
already been allocated a PASID. If so, try to fix the #GP fault by
loading the IA32_PASID MSR.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-9-fenghua.yu@intel.com
---
 arch/x86/kernel/traps.c | 55 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c9d566dcf89a0..7ef00dee35be4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -39,6 +39,7 @@
 #include <linux/io.h>
 #include <linux/hardirq.h>
 #include <linux/atomic.h>
+#include <linux/ioasid.h>
 
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
@@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
 	return true;
 }
 
+/*
+ * The unprivileged ENQCMD instruction generates #GPs if the
+ * IA32_PASID MSR has not been populated.  If possible, populate
+ * the MSR from a PASID previously allocated to the mm.
+ */
+static bool try_fixup_enqcmd_gp(void)
+{
+#ifdef CONFIG_IOMMU_SVA
+	u32 pasid;
+
+	/*
+	 * MSR_IA32_PASID is managed using XSAVE.  Directly
+	 * writing to the MSR is only possible when fpregs
+	 * are valid and the fpstate is not.  This is
+	 * guaranteed when handling a userspace exception
+	 * in *before* interrupts are re-enabled.
+	 */
+	lockdep_assert_irqs_disabled();
+
+	/*
+	 * Hardware without ENQCMD will not generate
+	 * #GPs that can be fixed up here.
+	 */
+	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+		return false;
+
+	pasid = current->mm->pasid;
+
+	/*
+	 * If the mm has not been allocated a
+	 * PASID, the #GP can not be fixed up.
+	 */
+	if (!pasid_valid(pasid))
+		return false;
+
+	/*
+	 * Did this thread already have its PASID activated?
+	 * If so, the #GP must be from something else.
+	 */
+	if (current->pasid_activated)
+		return false;
+
+	wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
+	current->pasid_activated = 1;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	unsigned long gp_addr;
 	int ret;
 
+	if (user_mode(regs) && try_fixup_enqcmd_gp())
+		return;
+
 	cond_local_irq_enable(regs);
 
 	if (static_cpu_has(X86_FEATURE_UMIP)) {
-- 
GitLab


From 7c1ef59145f1c8bf9a2cc7a6ebf2fd56bbb440de Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:52 -0800
Subject: [PATCH 0398/1586] x86/cpufeatures: Re-enable ENQCMD

The ENQCMD feature can only be used if CONFIG_INTEL_IOMMU_SVM is set.
Add X86_FEATURE_ENQCMD to the disabled features mask as appropriate so
that cpu_feature_enabled() can be used to check the feature.

  [ bp: Massage commit message. ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-10-fenghua.yu@intel.com
---
 arch/x86/include/asm/disabled-features.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 8f28fafa98b32..1231d63f836d8 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,8 +56,11 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
-/* Force disable because it's broken beyond repair */
-#define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
+#ifdef CONFIG_INTEL_IOMMU_SVM
+# define DISABLE_ENQCMD		0
+#else
+# define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
+#endif
 
 #ifdef CONFIG_X86_SGX
 # define DISABLE_SGX	0
-- 
GitLab


From 6e3133d901e89a4ba83ce7ebd8c27bbeaa9ed1f2 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:53 -0800
Subject: [PATCH 0399/1586] tools/objtool: Check for use of the ENQCMD
 instruction in the kernel

The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the
pasid field of the descriptor being submitted to an accelerator. But
there is no precise (and stable across kernel changes) point at which
the PASID_MSR is updated from the value for one task to the next.

Kernel code that uses accelerators must always use the ENQCMDS instruction
which does not access the PASID_MSR.

Check for use of the ENQCMD instruction in the kernel and warn on its
usage.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20220207230254.3342514-11-fenghua.yu@intel.com
---
 tools/objtool/arch/x86/decode.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index c10ef78df050a..479e769ca3241 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	const struct elf *elf = file->elf;
 	struct insn insn;
 	int x86_64, ret;
-	unsigned char op1, op2,
+	unsigned char op1, op2, op3,
 		      rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
 		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
 		      sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
@@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 	op1 = insn.opcode.bytes[0];
 	op2 = insn.opcode.bytes[1];
+	op3 = insn.opcode.bytes[2];
 
 	if (insn.rex_prefix.nbytes) {
 		rex = insn.rex_prefix.bytes[0];
@@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 			/* nopl/nopw */
 			*type = INSN_NOP;
 
+		} else if (op2 == 0x38 && op3 == 0xf8) {
+			if (insn.prefixes.nbytes == 1 &&
+			    insn.prefixes.bytes[0] == 0xf2) {
+				/* ENQCMD cannot be used in the kernel. */
+				WARN("ENQCMD instruction at %s:%lx", sec->name,
+				     offset);
+			}
+
 		} else if (op2 == 0xa0 || op2 == 0xa8) {
 
 			/* push fs/gs */
-- 
GitLab


From 83aa52ffed5d35a08e24452d0471e1684075cdf8 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 7 Feb 2022 15:02:54 -0800
Subject: [PATCH 0400/1586] Documentation/x86: Update documentation for SVA
 (Shared Virtual Addressing)

Adjust the documentation to the new way how a PASID is being allocated,
freed and fixed up.

Based on a patch by Ashok Raj <ashok.raj@intel.com>

  [ bp: Massage commit message, fix htmldocs build warning ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220207230254.3342514-12-fenghua.yu@intel.com
---
 Documentation/x86/sva.rst | 53 ++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
index 076efd51ef1fe..2e9b8b0f9a0f4 100644
--- a/Documentation/x86/sva.rst
+++ b/Documentation/x86/sva.rst
@@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
 thread can interact with a device. Threads that belong to the same
 process share the same page tables, thus the same MSR value.
 
-PASID is cleared when a process is created. The PASID allocation and MSR
-programming may occur long after a process and its threads have been created.
-One thread must call iommu_sva_bind_device() to allocate the PASID for the
-process. If a thread uses ENQCMD without the MSR first being populated, a #GP
-will be raised. The kernel will update the PASID MSR with the PASID for all
-threads in the process. A single process PASID can be used simultaneously
-with multiple devices since they all share the same address space.
-
-One thread can call iommu_sva_unbind_device() to free the allocated PASID.
-The kernel will clear the PASID MSR for all threads belonging to the process.
-
-New threads inherit the MSR value from the parent.
+PASID Life Cycle Management
+===========================
+
+PASID is initialized as INVALID_IOASID (-1) when a process is created.
+
+Only processes that access SVA-capable devices need to have a PASID
+allocated. This allocation happens when a process opens/binds an SVA-capable
+device but finds no PASID for this process. Subsequent binds of the same, or
+other devices will share the same PASID.
+
+Although the PASID is allocated to the process by opening a device,
+it is not active in any of the threads of that process. It's loaded to the
+IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+to a device using the ENQCMD.
+
+That first access will trigger a #GP fault because the IA32_PASID MSR
+has not been initialized with the PASID value assigned to the process
+when the device was opened. The Linux #GP handler notes that a PASID has
+been allocated for the process, and so initializes the IA32_PASID MSR
+and returns so that the ENQCMD instruction is re-executed.
+
+On fork(2) or exec(2) the PASID is removed from the process as it no
+longer has the same address space that it had when the device was opened.
+
+On clone(2) the new task shares the same address space, so will be
+able to use the PASID allocated to the process. The IA32_PASID is not
+preemptively initialized as the PASID value might not be allocated yet or
+the kernel does not know whether this thread is going to access the device
+and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+init optimization. Since #GP faults have to be handled on any threads that
+were created before the PASID was assigned to the mm of the process, newly
+created threads might as well be treated in a consistent way.
+
+Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+all threads in unbind, free the PASID lazily only on mm exit.
+
+If a process does a close(2) of the device file descriptor and munmap(2)
+of the device MMIO portal, then the driver will unbind the device. The
+PASID is still marked VALID in the PASID_MSR for any threads in the
+process that accessed the device. But this is harmless as without the
+MMIO portal they cannot submit new work to the device.
 
 Relationships
 =============
-- 
GitLab


From 45ec846c1cd11835a29c85645065115dd791aa45 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:25:58 +0000
Subject: [PATCH 0401/1586] irqdomain: Let irq_domain_set_{info,hwirq_and_chip}
 take a const irq_chip

In order to let a const irqchip be fed to the irqchip layer, adjust
the various prototypes. An extra cast in irq_domain_set_hwirq_and_chip()
is required to avoid a warning.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-2-maz@kernel.org
---
 include/linux/irqdomain.h | 5 +++--
 kernel/irq/irqdomain.c    | 9 +++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index be25a33293e57..00d577f908833 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -479,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
 extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-				irq_hw_number_t hwirq, struct irq_chip *chip,
+				irq_hw_number_t hwirq,
+				const struct irq_chip *chip,
 				void *chip_data, irq_flow_handler_t handler,
 				void *handler_data, const char *handler_name);
 extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
@@ -522,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
 					 unsigned int virq,
 					 irq_hw_number_t hwirq,
-					 struct irq_chip *chip,
+					 const struct irq_chip *chip,
 					 void *chip_data);
 extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 					unsigned int virq,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index bf38c546aa256..d5ce965105493 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1319,7 +1319,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
  * @chip_data:	The associated chip data
  */
 int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
-				  irq_hw_number_t hwirq, struct irq_chip *chip,
+				  irq_hw_number_t hwirq,
+				  const struct irq_chip *chip,
 				  void *chip_data)
 {
 	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
@@ -1328,7 +1329,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
 		return -ENOENT;
 
 	irq_data->hwirq = hwirq;
-	irq_data->chip = chip ? chip : &no_irq_chip;
+	irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
 	irq_data->chip_data = chip_data;
 
 	return 0;
@@ -1347,7 +1348,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
  * @handler_name:	The interrupt handler name
  */
 void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-			 irq_hw_number_t hwirq, struct irq_chip *chip,
+			 irq_hw_number_t hwirq, const struct irq_chip *chip,
 			 void *chip_data, irq_flow_handler_t handler,
 			 void *handler_data, const char *handler_name)
 {
@@ -1853,7 +1854,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
  * @handler_name:	The interrupt handler name
  */
 void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
-			 irq_hw_number_t hwirq, struct irq_chip *chip,
+			 irq_hw_number_t hwirq, const struct irq_chip *chip,
 			 void *chip_data, irq_flow_handler_t handler,
 			 void *handler_data, const char *handler_name)
 {
-- 
GitLab


From 393e1280f765661cf39785e967676a4e57324126 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:25:59 +0000
Subject: [PATCH 0402/1586] genirq: Allow irq_chip registration functions to
 take a const irq_chip

In order to let a const irqchip be fed to the irqchip layer, adjust
the various prototypes. An extra cast in irq_set_chip()() is required
to avoid a warning.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-3-maz@kernel.org
---
 include/linux/irq.h | 7 ++++---
 kernel/irq/chip.c   | 9 +++------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2cb2e2ac2703c..f92788ccdba27 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -710,10 +710,11 @@ extern struct irq_chip no_irq_chip;
 extern struct irq_chip dummy_irq_chip;
 
 extern void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
 			      irq_flow_handler_t handle, const char *name);
 
-static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+static inline void irq_set_chip_and_handler(unsigned int irq,
+					    const struct irq_chip *chip,
 					    irq_flow_handler_t handle)
 {
 	irq_set_chip_and_handler_name(irq, chip, handle, NULL);
@@ -803,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 }
 
 /* Set/get chip/data for an IRQ: */
-extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
+extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip);
 extern int irq_set_handler_data(unsigned int irq, void *data);
 extern int irq_set_chip_data(unsigned int irq, void *data);
 extern int irq_set_irq_type(unsigned int irq, unsigned int type);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 24b6f2b40e5e9..54af0deb239b8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -38,7 +38,7 @@ struct irqaction chained_action = {
  *	@irq:	irq number
  *	@chip:	pointer to irq chip description structure
  */
-int irq_set_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
 {
 	unsigned long flags;
 	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
@@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
 	if (!desc)
 		return -EINVAL;
 
-	if (!chip)
-		chip = &no_irq_chip;
-
-	desc->irq_data.chip = chip;
+	desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
 	irq_put_desc_unlock(desc, flags);
 	/*
 	 * For !CONFIG_SPARSE_IRQ make the irq show up in
@@ -1073,7 +1070,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
 EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
 
 void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
 			      irq_flow_handler_t handle, const char *name)
 {
 	irq_set_chip(irq, chip);
-- 
GitLab


From 0a25cb5544f4f01d2e7c06164555fd9cd6eb64fd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 10 Feb 2022 09:27:21 +0000
Subject: [PATCH 0403/1586] genirq/debugfs: Use irq_print_chip() when provided
 by irqchip

Since irqchips have the option to implement irq_print_chip, use this
when available to output the irqchip name in debugfs.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220215112154.1360040-1-maz@kernel.org
---
 kernel/irq/debugfs.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index e4cff358b437e..2b43f5f5033d1 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -69,8 +69,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
 		seq_printf(m, "chip: None\n");
 		return;
 	}
-	seq_printf(m, "%*schip:    %s\n", ind, "", chip->name);
-	seq_printf(m, "%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
+	seq_printf(m, "%*schip:    ", ind, "");
+	if (chip->irq_print_chip)
+		chip->irq_print_chip(data, m);
+	else
+		seq_printf(m, "%s", chip->name);
+	seq_printf(m, "\n%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
 	irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
 			    ARRAY_SIZE(irqchip_flags));
 }
-- 
GitLab


From 745f1fb91fe51b4a36bc1c3dbccdbbbb978f93d2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:26:00 +0000
Subject: [PATCH 0404/1586] irqchip/gic: Switch to dynamic chip name output

The last dynamic aspect of the GIC's irq_chip structure is the
name that is associated to it.

Move the output of that name to the relevant callback, which
allows us to do a bit of cleanup and mark the structures const.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-4-maz@kernel.org
---
 drivers/irqchip/irq-gic.c | 102 ++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 48 deletions(-)

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index fb741b42ca2d2..58ba835bee1f3 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -34,6 +34,7 @@
 #include <linux/irqdomain.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
@@ -66,7 +67,6 @@ union gic_base {
 };
 
 struct gic_chip_data {
-	struct irq_chip chip;
 	union gic_base dist_base;
 	union gic_base cpu_base;
 	void __iomem *raw_dist_base;
@@ -397,18 +397,15 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-static const struct irq_chip gic_chip = {
-	.irq_mask		= gic_mask_irq,
-	.irq_unmask		= gic_unmask_irq,
-	.irq_eoi		= gic_eoi_irq,
-	.irq_set_type		= gic_set_type,
-	.irq_retrigger          = gic_retrigger,
-	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
-	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
-	.flags			= IRQCHIP_SET_TYPE_MASKED |
-				  IRQCHIP_SKIP_SET_WAKE |
-				  IRQCHIP_MASK_ON_SUSPEND,
-};
+static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
+
+	if (gic->domain->dev)
+		seq_printf(p, gic->domain->dev->of_node->name);
+	else
+		seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0]));
+}
 
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
 {
@@ -799,8 +796,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d);
+	struct gic_chip_data *gic = irq_data_get_irq_chip_data(d);
 	unsigned int cpu;
 
+	if (unlikely(gic != &gic_data[0]))
+		return -EINVAL;
+
 	if (!force)
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	else
@@ -880,6 +881,39 @@ static __init void gic_smp_init(void)
 #define gic_ipi_send_mask	NULL
 #endif
 
+static const struct irq_chip gic_chip = {
+	.irq_mask		= gic_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_retrigger          = gic_retrigger,
+	.irq_set_affinity	= gic_set_affinity,
+	.ipi_send_mask		= gic_ipi_send_mask,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_print_chip		= gic_irq_print_chip,
+	.flags			= IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static const struct irq_chip gic_chip_mode1 = {
+	.name			= "GICv2",
+	.irq_mask		= gic_eoimode1_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoimode1_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_retrigger          = gic_retrigger,
+	.irq_set_affinity	= gic_set_affinity,
+	.ipi_send_mask		= gic_ipi_send_mask,
+	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
+	.flags			= IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
 #ifdef CONFIG_BL_SWITCHER
 /*
  * gic_send_sgi - send a SGI directly to given CPU interface number
@@ -1024,15 +1058,19 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 {
 	struct gic_chip_data *gic = d->host_data;
 	struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq));
+	const struct irq_chip *chip;
+
+	chip = (static_branch_likely(&supports_deactivate_key) &&
+		gic == &gic_data[0]) ? &gic_chip_mode1 : &gic_chip;
 
 	switch (hw) {
 	case 0 ... 31:
 		irq_set_percpu_devid(irq);
-		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
 		break;
 	default:
-		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
+		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irq_set_probe(irq);
 		irqd_set_single_target(irqd);
@@ -1127,25 +1165,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 	.unmap = gic_irq_domain_unmap,
 };
 
-static void gic_init_chip(struct gic_chip_data *gic, const char *name,
-			  bool use_eoimode1)
-{
-	/* Initialize irq_chip */
-	gic->chip = gic_chip;
-	gic->chip.name = name;
-
-	if (use_eoimode1) {
-		gic->chip.irq_mask = gic_eoimode1_mask_irq;
-		gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
-		gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
-	}
-
-	if (gic == &gic_data[0]) {
-		gic->chip.irq_set_affinity = gic_set_affinity;
-		gic->chip.ipi_send_mask = gic_ipi_send_mask;
-	}
-}
-
 static int gic_init_bases(struct gic_chip_data *gic,
 			  struct fwnode_handle *handle)
 {
@@ -1245,7 +1264,6 @@ error:
 static int __init __gic_init_bases(struct gic_chip_data *gic,
 				   struct fwnode_handle *handle)
 {
-	char *name;
 	int i, ret;
 
 	if (WARN_ON(!gic || gic->domain))
@@ -1265,18 +1283,8 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
 			pr_info("GIC: Using split EOI/Deactivate mode\n");
 	}
 
-	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
-		name = kasprintf(GFP_KERNEL, "GICv2");
-		gic_init_chip(gic, name, true);
-	} else {
-		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
-		gic_init_chip(gic, name, false);
-	}
-
 	ret = gic_init_bases(gic, handle);
-	if (ret)
-		kfree(name);
-	else if (gic == &gic_data[0])
+	if (gic == &gic_data[0])
 		gic_smp_init();
 
 	return ret;
@@ -1459,8 +1467,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 	if (!*gic)
 		return -ENOMEM;
 
-	gic_init_chip(*gic, dev->of_node->name, false);
-
 	ret = gic_of_setup(*gic, dev->of_node);
 	if (ret)
 		return ret;
-- 
GitLab


From 365550239f87342e3d29802fce0f5e9a044e8070 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:26:01 +0000
Subject: [PATCH 0405/1586] irqchip/lpc32xx: Switch to dynamic chip name output

Instead of overriding the name field with the device name, use
the relevant callback. This allows us to make the irq_chip structure
const.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-5-maz@kernel.org
---
 drivers/irqchip/irq-lpc32xx.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index a29357f394506..4d70a857133f4 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -11,6 +11,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <asm/exception.h>
 
@@ -25,8 +26,8 @@
 
 struct lpc32xx_irq_chip {
 	void __iomem *base;
+	phys_addr_t addr;
 	struct irq_domain *domain;
-	struct irq_chip chip;
 };
 
 static struct lpc32xx_irq_chip *lpc32xx_mic_irqc;
@@ -118,6 +119,24 @@ static int lpc32xx_irq_set_type(struct irq_data *d, unsigned int type)
 	return 0;
 }
 
+static void lpc32xx_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct lpc32xx_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	if (ic == lpc32xx_mic_irqc)
+		seq_printf(p, "%08x.mic", ic->addr);
+	else
+		seq_printf(p, "%08x.sic", ic->addr);
+}
+
+static const struct irq_chip lpc32xx_chip = {
+	.irq_ack	= lpc32xx_irq_ack,
+	.irq_mask	= lpc32xx_irq_mask,
+	.irq_unmask	= lpc32xx_irq_unmask,
+	.irq_set_type	= lpc32xx_irq_set_type,
+	.irq_print_chip	= lpc32xx_irq_print_chip,
+};
+
 static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs)
 {
 	struct lpc32xx_irq_chip *ic = lpc32xx_mic_irqc;
@@ -153,7 +172,7 @@ static int lpc32xx_irq_domain_map(struct irq_domain *id, unsigned int virq,
 	struct lpc32xx_irq_chip *ic = id->host_data;
 
 	irq_set_chip_data(virq, ic);
-	irq_set_chip_and_handler(virq, &ic->chip, handle_level_irq);
+	irq_set_chip_and_handler(virq, &lpc32xx_chip, handle_level_irq);
 	irq_set_status_flags(virq, IRQ_LEVEL);
 	irq_set_noprobe(virq);
 
@@ -183,6 +202,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
 	if (!irqc)
 		return -ENOMEM;
 
+	irqc->addr = addr;
 	irqc->base = of_iomap(node, 0);
 	if (!irqc->base) {
 		pr_err("%pOF: unable to map registers\n", node);
@@ -190,21 +210,11 @@ static int __init lpc32xx_of_ic_init(struct device_node *node,
 		return -EINVAL;
 	}
 
-	irqc->chip.irq_ack = lpc32xx_irq_ack;
-	irqc->chip.irq_mask = lpc32xx_irq_mask;
-	irqc->chip.irq_unmask = lpc32xx_irq_unmask;
-	irqc->chip.irq_set_type = lpc32xx_irq_set_type;
-	if (is_mic)
-		irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.mic", addr);
-	else
-		irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.sic", addr);
-
 	irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS,
 					     &lpc32xx_irq_domain_ops, irqc);
 	if (!irqc->domain) {
 		pr_err("unable to add irq domain\n");
 		iounmap(irqc->base);
-		kfree(irqc->chip.name);
 		kfree(irqc);
 		return -ENODEV;
 	}
-- 
GitLab


From 421f16238a9da88ce1f2e3a103866de095cc15f6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:26:02 +0000
Subject: [PATCH 0406/1586] irqchip/mvebu-pic: Switch to dynamic chip name
 output

Instead of overriding the name field, track the corresponding device
and use the relevant callback to output its name.

This allows us to make the irq_chip structure const.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-6-maz@kernel.org
---
 drivers/irqchip/irq-mvebu-pic.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index 870f9866b8da4..ef3d3646ccc2f 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/seq_file.h>
 
 #define PIC_CAUSE	       0x0
 #define PIC_MASK	       0x4
@@ -29,7 +30,7 @@ struct mvebu_pic {
 	void __iomem *base;
 	u32 parent_irq;
 	struct irq_domain *domain;
-	struct irq_chip irq_chip;
+	struct platform_device *pdev;
 };
 
 static void mvebu_pic_reset(struct mvebu_pic *pic)
@@ -66,6 +67,20 @@ static void mvebu_pic_unmask_irq(struct irq_data *d)
 	writel(reg, pic->base + PIC_MASK);
 }
 
+static void mvebu_pic_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, dev_name(&pic->pdev->dev));
+}
+
+static const struct irq_chip mvebu_pic_chip = {
+	.irq_mask	= mvebu_pic_mask_irq,
+	.irq_unmask	= mvebu_pic_unmask_irq,
+	.irq_eoi	= mvebu_pic_eoi_irq,
+	.irq_print_chip	= mvebu_pic_print_chip,
+};
+
 static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
@@ -73,8 +88,7 @@ static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
 
 	irq_set_percpu_devid(virq);
 	irq_set_chip_data(virq, pic);
-	irq_set_chip_and_handler(virq, &pic->irq_chip,
-				 handle_percpu_devid_irq);
+	irq_set_chip_and_handler(virq, &mvebu_pic_chip, handle_percpu_devid_irq);
 	irq_set_status_flags(virq, IRQ_LEVEL);
 	irq_set_probe(virq);
 
@@ -120,22 +134,16 @@ static int mvebu_pic_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct mvebu_pic *pic;
-	struct irq_chip *irq_chip;
 
 	pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL);
 	if (!pic)
 		return -ENOMEM;
 
+	pic->pdev = pdev;
 	pic->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pic->base))
 		return PTR_ERR(pic->base);
 
-	irq_chip = &pic->irq_chip;
-	irq_chip->name = dev_name(&pdev->dev);
-	irq_chip->irq_mask = mvebu_pic_mask_irq;
-	irq_chip->irq_unmask = mvebu_pic_unmask_irq;
-	irq_chip->irq_eoi = mvebu_pic_eoi_irq;
-
 	pic->parent_irq = irq_of_parse_and_map(node, 0);
 	if (pic->parent_irq <= 0) {
 		dev_err(&pdev->dev, "Failed to parse parent interrupt\n");
-- 
GitLab


From 3344265a2692414831c15964dd27a5b4adc4ed83 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:26:03 +0000
Subject: [PATCH 0407/1586] irqchip/ts4800: Switch to dynamic chip name output

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-7-maz@kernel.org
---
 drivers/irqchip/irq-ts4800.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index f032db23b30f5..b2d61d4f6fe6f 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -19,14 +19,15 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
+#include <linux/seq_file.h>
 
 #define IRQ_MASK        0x4
 #define IRQ_STATUS      0x8
 
 struct ts4800_irq_data {
 	void __iomem            *base;
+	struct platform_device	*pdev;
 	struct irq_domain       *domain;
-	struct irq_chip         irq_chip;
 };
 
 static void ts4800_irq_mask(struct irq_data *d)
@@ -47,12 +48,25 @@ static void ts4800_irq_unmask(struct irq_data *d)
 	writew(reg & ~mask, data->base + IRQ_MASK);
 }
 
+static void ts4800_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, "%s", dev_name(&data->pdev->dev));
+}
+
+static const struct irq_chip ts4800_chip = {
+	.irq_mask	= ts4800_irq_mask,
+	.irq_unmask	= ts4800_irq_unmask,
+	.irq_print_chip	= ts4800_irq_print_chip,
+};
+
 static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hwirq)
 {
 	struct ts4800_irq_data *data = d->host_data;
 
-	irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq);
+	irq_set_chip_and_handler(irq, &ts4800_chip, handle_simple_irq);
 	irq_set_chip_data(irq, data);
 	irq_set_noprobe(irq);
 
@@ -92,13 +106,13 @@ static int ts4800_ic_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct ts4800_irq_data *data;
-	struct irq_chip *irq_chip;
 	int parent_irq;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
+	data->pdev = pdev;
 	data->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(data->base))
 		return PTR_ERR(data->base);
@@ -111,11 +125,6 @@ static int ts4800_ic_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	irq_chip = &data->irq_chip;
-	irq_chip->name = dev_name(&pdev->dev);
-	irq_chip->irq_mask = ts4800_irq_mask;
-	irq_chip->irq_unmask = ts4800_irq_unmask;
-
 	data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
 	if (!data->domain) {
 		dev_err(&pdev->dev, "cannot add IRQ domain\n");
-- 
GitLab


From 3fb212a042fbd8eccbb2af1852e03ed7757b9600 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Feb 2022 16:26:04 +0000
Subject: [PATCH 0408/1586] irqchip/versatile-fpga: Switch to dynamic chip name
 output

Move the name output to the relevant callback, which allows us
some nice cleanups (mostly owing to the fact that the driver is
now DT only.

We also drop a random include directive from the ftintc010 driver.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220209162607.1118325-8-maz@kernel.org
---
 drivers/irqchip/irq-ftintc010.c        |  1 -
 drivers/irqchip/irq-versatile-fpga.c   | 46 +++++++++++++++-----------
 include/linux/irqchip/versatile-fpga.h | 14 --------
 3 files changed, 26 insertions(+), 35 deletions(-)
 delete mode 100644 include/linux/irqchip/versatile-fpga.h

diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index 5cc268880f8e1..46a3aa60e50e3 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -11,7 +11,6 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
-#include <linux/irqchip/versatile-fpga.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index f2757b6aecc81..ba543ed9c1543 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -7,12 +7,12 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
-#include <linux/irqchip/versatile-fpga.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/seq_file.h>
 
 #include <asm/exception.h>
 #include <asm/mach/irq.h>
@@ -34,14 +34,12 @@
 /**
  * struct fpga_irq_data - irq data container for the FPGA IRQ controller
  * @base: memory offset in virtual memory
- * @chip: chip container for this instance
  * @domain: IRQ domain for this instance
  * @valid: mask for valid IRQs on this controller
  * @used_irqs: number of active IRQs on this controller
  */
 struct fpga_irq_data {
 	void __iomem *base;
-	struct irq_chip chip;
 	u32 valid;
 	struct irq_domain *domain;
 	u8 used_irqs;
@@ -67,6 +65,20 @@ static void fpga_irq_unmask(struct irq_data *d)
 	writel(mask, f->base + IRQ_ENABLE_SET);
 }
 
+static void fpga_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
+
+	seq_printf(p, irq_domain_get_of_node(f->domain)->name);
+}
+
+static const struct irq_chip fpga_chip = {
+	.irq_ack	= fpga_irq_mask,
+	.irq_mask	= fpga_irq_mask,
+	.irq_unmask	= fpga_irq_unmask,
+	.irq_print_chip	= fpga_irq_print_chip,
+};
+
 static void fpga_irq_handle(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -116,7 +128,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
  * Keep iterating over all registered FPGA IRQ controllers until there are
  * no pending interrupts.
  */
-asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
+static asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs)
 {
 	int i, handled;
 
@@ -135,8 +147,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	if (!(f->valid & BIT(hwirq)))
 		return -EPERM;
 	irq_set_chip_data(irq, f);
-	irq_set_chip_and_handler(irq, &f->chip,
-				handle_level_irq);
+	irq_set_chip_and_handler(irq, &fpga_chip, handle_level_irq);
 	irq_set_probe(irq);
 	return 0;
 }
@@ -146,8 +157,8 @@ static const struct irq_domain_ops fpga_irqdomain_ops = {
 	.xlate = irq_domain_xlate_onetwocell,
 };
 
-void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
-			  int parent_irq, u32 valid, struct device_node *node)
+static void __init fpga_irq_init(void __iomem *base, int parent_irq,
+				 u32 valid, struct device_node *node)
 {
 	struct fpga_irq_data *f;
 	int i;
@@ -158,10 +169,6 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 	}
 	f = &fpga_irq_devices[fpga_irq_id];
 	f->base = base;
-	f->chip.name = name;
-	f->chip.irq_ack = fpga_irq_mask;
-	f->chip.irq_mask = fpga_irq_mask;
-	f->chip.irq_unmask = fpga_irq_unmask;
 	f->valid = valid;
 
 	if (parent_irq != -1) {
@@ -169,20 +176,19 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 						 f);
 	}
 
-	/* This will also allocate irq descriptors */
-	f->domain = irq_domain_add_simple(node, fls(valid), irq_start,
+	f->domain = irq_domain_add_linear(node, fls(valid),
 					  &fpga_irqdomain_ops, f);
 
 	/* This will allocate all valid descriptors in the linear case */
 	for (i = 0; i < fls(valid); i++)
 		if (valid & BIT(i)) {
-			if (!irq_start)
-				irq_create_mapping(f->domain, i);
+			/* Is this still required? */
+			irq_create_mapping(f->domain, i);
 			f->used_irqs++;
 		}
 
 	pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs",
-		fpga_irq_id, name, base, f->used_irqs);
+		fpga_irq_id, node->name, base, f->used_irqs);
 	if (parent_irq != -1)
 		pr_cont(", parent IRQ: %d\n", parent_irq);
 	else
@@ -192,8 +198,8 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
 }
 
 #ifdef CONFIG_OF
-int __init fpga_irq_of_init(struct device_node *node,
-			    struct device_node *parent)
+static int __init fpga_irq_of_init(struct device_node *node,
+				   struct device_node *parent)
 {
 	void __iomem *base;
 	u32 clear_mask;
@@ -222,7 +228,7 @@ int __init fpga_irq_of_init(struct device_node *node,
 		parent_irq = -1;
 	}
 
-	fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
+	fpga_irq_init(base, parent_irq, valid_mask, node);
 
 	/*
 	 * On Versatile AB/PB, some secondary interrupts have a direct
diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h
deleted file mode 100644
index a978fc8c79965..0000000000000
--- a/include/linux/irqchip/versatile-fpga.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PLAT_FPGA_IRQ_H
-#define PLAT_FPGA_IRQ_H
-
-struct device_node;
-struct pt_regs;
-
-void fpga_handle_irq(struct pt_regs *regs);
-void fpga_irq_init(void __iomem *, const char *, int, int, u32,
-		struct device_node *node);
-int fpga_irq_of_init(struct device_node *node,
-		     struct device_node *parent);
-
-#endif
-- 
GitLab


From 95ed57c73bbcffa02cbb1d61c19484c2ec6de094 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Thu, 10 Feb 2022 14:48:56 -0800
Subject: [PATCH 0409/1586] perf: replace bitmap_weight with bitmap_empty where
 appropriate

In some places, drivers/perf code calls bitmap_weight() to check if any
bit of a given bitmap is set. It's better to use bitmap_empty() in that
case because bitmap_empty() stops traversing the bitmap as soon as it
finds first set bit, while bitmap_weight() counts all bits unconditionally.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20220210224933.379149-13-yury.norov@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cci.c                   | 2 +-
 drivers/perf/arm_pmu.c                   | 4 ++--
 drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +-
 drivers/perf/xgene_pmu.c                 | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 54aca3a628147..96e09fa409095 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1096,7 +1096,7 @@ static void cci_pmu_enable(struct pmu *pmu)
 {
 	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
 	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
-	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
+	bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs);
 	unsigned long flags;
 
 	if (!enabled)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 295cc7952d0ed..a31b302b0aded 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -524,7 +524,7 @@ static void armpmu_enable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
 
 	/* For task-bound events we may be called on other CPUs */
 	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -785,7 +785,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
 {
 	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
 	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
 
 	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
 		return NOTIFY_DONE;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index a738aeab5c049..358e4e284a629 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
 void hisi_uncore_pmu_enable(struct pmu *pmu)
 {
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
-	int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
+	bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask,
 				    hisi_pmu->num_counters);
 
 	if (!enabled)
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 5283608dc055b..0c32dffc7edef 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -867,7 +867,7 @@ static void xgene_perf_pmu_enable(struct pmu *pmu)
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
 	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
-	int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
+	bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask,
 			pmu_dev->max_counters);
 
 	if (!enabled)
-- 
GitLab


From 69591a402d18fd60c8c95b061279f17bf0b78129 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 09:10:47 +0100
Subject: [PATCH 0410/1586] block: remove biodoc.rst

This document is completely out of date and extremely misleading. In
general the existing kerneldoc comment serve as a much better
documentation of the still existing functionality, while the history
blurbs are pretty much irrelevant today.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220215081047.3693582-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.rst | 1155 --------------------------------
 Documentation/block/index.rst  |    1 -
 2 files changed, 1156 deletions(-)
 delete mode 100644 Documentation/block/biodoc.rst

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
deleted file mode 100644
index fdebc0fd8468f..0000000000000
--- a/Documentation/block/biodoc.rst
+++ /dev/null
@@ -1,1155 +0,0 @@
-=====================================================
-Notes on the Generic Block Layer Rewrite in Linux 2.5
-=====================================================
-
-.. note::
-
-	It seems that there are lot of outdated stuff here. This seems
-	to be written somewhat as a task list. Yet, eventually, something
-	here might still be useful.
-
-Notes Written on Jan 15, 2002:
-
-	- Jens Axboe <jens.axboe@oracle.com>
-	- Suparna Bhattacharya <suparna@in.ibm.com>
-
-Last Updated May 2, 2002
-
-September 2003: Updated I/O Scheduler portions
-	- Nick Piggin <npiggin@kernel.dk>
-
-Introduction
-============
-
-These are some notes describing some aspects of the 2.5 block layer in the
-context of the bio rewrite. The idea is to bring out some of the key
-changes and a glimpse of the rationale behind those changes.
-
-Please mail corrections & suggestions to suparna@in.ibm.com.
-
-Credits
-=======
-
-2.5 bio rewrite:
-	- Jens Axboe <jens.axboe@oracle.com>
-
-Many aspects of the generic block layer redesign were driven by and evolved
-over discussions, prior patches and the collective experience of several
-people. See sections 8 and 9 for a list of some related references.
-
-The following people helped with review comments and inputs for this
-document:
-
-	- Christoph Hellwig <hch@infradead.org>
-	- Arjan van de Ven <arjanv@redhat.com>
-	- Randy Dunlap <rdunlap@xenotime.net>
-	- Andre Hedrick <andre@linux-ide.org>
-
-The following people helped with fixes/contributions to the bio patches
-while it was still work-in-progress:
-
-	- David S. Miller <davem@redhat.com>
-
-
-.. Description of Contents:
-
-   1. Scope for tuning of logic to various needs
-     1.1 Tuning based on device or low level driver capabilities
-	- Per-queue parameters
-	- Highmem I/O support
-	- I/O scheduler modularization
-     1.2 Tuning based on high level requirements/capabilities
-	1.2.1 Request Priority/Latency
-     1.3 Direct access/bypass to lower layers for diagnostics and special
-	 device operations
-	1.3.1 Pre-built commands
-   2. New flexible and generic but minimalist i/o structure or descriptor
-      (instead of using buffer heads at the i/o layer)
-     2.1 Requirements/Goals addressed
-     2.2 The bio struct in detail (multi-page io unit)
-     2.3 Changes in the request structure
-   3. Using bios
-     3.1 Setup/teardown (allocation, splitting)
-     3.2 Generic bio helper routines
-       3.2.1 Traversing segments and completion units in a request
-       3.2.2 Setting up DMA scatterlists
-       3.2.3 I/O completion
-       3.2.4 Implications for drivers that do not interpret bios (don't handle
-	  multiple segments)
-     3.3 I/O submission
-   4. The I/O scheduler
-   5. Scalability related changes
-     5.1 Granular locking: Removal of io_request_lock
-     5.2 Prepare for transition to 64 bit sector_t
-   6. Other Changes/Implications
-     6.1 Partition re-mapping handled by the generic block layer
-   7. A few tips on migration of older drivers
-   8. A list of prior/related/impacted patches/ideas
-   9. Other References/Discussion Threads
-
-
-Bio Notes
-=========
-
-Let us discuss the changes in the context of how some overall goals for the
-block layer are addressed.
-
-1. Scope for tuning the generic logic to satisfy various requirements
-=====================================================================
-
-The block layer design supports adaptable abstractions to handle common
-processing with the ability to tune the logic to an appropriate extent
-depending on the nature of the device and the requirements of the caller.
-One of the objectives of the rewrite was to increase the degree of tunability
-and to enable higher level code to utilize underlying device/driver
-capabilities to the maximum extent for better i/o performance. This is
-important especially in the light of ever improving hardware capabilities
-and application/middleware software designed to take advantage of these
-capabilities.
-
-1.1 Tuning based on low level device / driver capabilities
-----------------------------------------------------------
-
-Sophisticated devices with large built-in caches, intelligent i/o scheduling
-optimizations, high memory DMA support, etc may find some of the
-generic processing an overhead, while for less capable devices the
-generic functionality is essential for performance or correctness reasons.
-Knowledge of some of the capabilities or parameters of the device should be
-used at the generic block layer to take the right decisions on
-behalf of the driver.
-
-How is this achieved ?
-
-Tuning at a per-queue level:
-
-i. Per-queue limits/values exported to the generic layer by the driver
-
-Various parameters that the generic i/o scheduler logic uses are set at
-a per-queue level (e.g maximum request size, maximum number of segments in
-a scatter-gather list, logical block size)
-
-Some parameters that were earlier available as global arrays indexed by
-major/minor are now directly associated with the queue. Some of these may
-move into the block device structure in the future. Some characteristics
-have been incorporated into a queue flags field rather than separate fields
-in themselves.  There are blk_queue_xxx functions to set the parameters,
-rather than update the fields directly
-
-Some new queue property settings:
-
-	blk_queue_bounce_limit(q, u64 dma_address)
-		Enable I/O to highmem pages, dma_address being the
-		limit. No highmem default.
-
-	blk_queue_max_sectors(q, max_sectors)
-		Sets two variables that limit the size of the request.
-
-		- The request queue's max_sectors, which is a soft size in
-		  units of 512 byte sectors, and could be dynamically varied
-		  by the core kernel.
-
-		- The request queue's max_hw_sectors, which is a hard limit
-		  and reflects the maximum size request a driver can handle
-		  in units of 512 byte sectors.
-
-		The default for both max_sectors and max_hw_sectors is
-		255. The upper limit of max_sectors is 1024.
-
-	blk_queue_max_phys_segments(q, max_segments)
-		Maximum physical segments you can handle in a request. 128
-		default (driver limit). (See 3.2.2)
-
-	blk_queue_max_hw_segments(q, max_segments)
-		Maximum dma segments the hardware can handle in a request. 128
-		default (host adapter limit, after dma remapping).
-		(See 3.2.2)
-
-	blk_queue_max_segment_size(q, max_seg_size)
-		Maximum size of a clustered segment, 64kB default.
-
-	blk_queue_logical_block_size(q, logical_block_size)
-		Lowest possible sector size that the hardware can operate
-		on, 512 bytes default.
-
-New queue flags:
-
-	- QUEUE_FLAG_CLUSTER (see 3.2.2)
-	- QUEUE_FLAG_QUEUED (see 3.2.4)
-
-
-ii. High-mem i/o capabilities are now considered the default
-
-The generic bounce buffer logic, present in 2.4, where the block layer would
-by default copyin/out i/o requests on high-memory buffers to low-memory buffers
-assuming that the driver wouldn't be able to handle it directly, has been
-changed in 2.5. The bounce logic is now applied only for memory ranges
-for which the device cannot handle i/o. A driver can specify this by
-setting the queue bounce limit for the request queue for the device
-(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
-where a device is capable of handling high memory i/o.
-
-In order to enable high-memory i/o where the device is capable of supporting
-it, the pci dma mapping routines and associated data structures have now been
-modified to accomplish a direct page -> bus translation, without requiring
-a virtual address mapping (unlike the earlier scheme of virtual address
--> bus translation). So this works uniformly for high-memory pages (which
-do not have a corresponding kernel virtual address space mapping) and
-low-memory pages.
-
-Note: Please refer to Documentation/core-api/dma-api-howto.rst for a discussion
-on PCI high mem DMA aspects and mapping of scatter gather lists, and support
-for 64 bit PCI.
-
-Special handling is required only for cases where i/o needs to happen on
-pages at physical memory addresses beyond what the device can support. In these
-cases, a bounce bio representing a buffer from the supported memory range
-is used for performing the i/o with copyin/copyout as needed depending on
-the type of the operation.  For example, in case of a read operation, the
-data read has to be copied to the original buffer on i/o completion, so a
-callback routine is set up to do this, while for write, the data is copied
-from the original buffer to the bounce buffer prior to issuing the
-operation. Since an original buffer may be in a high memory area that's not
-mapped in kernel virtual addr, a kmap operation may be required for
-performing the copy, and special care may be needed in the completion path
-as it may not be in irq context. Special care is also required (by way of
-GFP flags) when allocating bounce buffers, to avoid certain highmem
-deadlock possibilities.
-
-It is also possible that a bounce buffer may be allocated from high-memory
-area that's not mapped in kernel virtual addr, but within the range that the
-device can use directly; so the bounce page may need to be kmapped during
-copy operations. [Note: This does not hold in the current implementation,
-though]
-
-There are some situations when pages from high memory may need to
-be kmapped, even if bounce buffers are not necessary. For example a device
-may need to abort DMA operations and revert to PIO for the transfer, in
-which case a virtual mapping of the page is required. For SCSI it is also
-done in some scenarios where the low level driver cannot be trusted to
-handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions as appropriate. A driver could also use
-the blk_queue_bounce() routine on its own to bounce highmem i/o to low
-memory for specific requests if so desired.
-
-iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
-
-As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
-queue or pick from (copy) existing generic schedulers and replace/override
-certain portions of it. The 2.5 rewrite provides improved modularization
-of the i/o scheduler. There are more pluggable callbacks, e.g for init,
-add request, extract request, which makes it possible to abstract specific
-i/o scheduling algorithm aspects and details outside of the generic loop.
-It also makes it possible to completely hide the implementation details of
-the i/o scheduler from block drivers.
-
-I/O scheduler wrappers are to be used instead of accessing the queue directly.
-See section 4. The I/O scheduler for details.
-
-1.2 Tuning Based on High level code capabilities
-------------------------------------------------
-
-i. Application capabilities for raw i/o
-
-This comes from some of the high-performance database/middleware
-requirements where an application prefers to make its own i/o scheduling
-decisions based on an understanding of the access patterns and i/o
-characteristics
-
-ii. High performance filesystems or other higher level kernel code's
-capabilities
-
-Kernel components like filesystems could also take their own i/o scheduling
-decisions for optimizing performance. Journalling filesystems may need
-some control over i/o ordering.
-
-What kind of support exists at the generic block layer for this ?
-
-The flags and rw fields in the bio structure can be used for some tuning
-from above e.g indicating that an i/o is just a readahead request, or priority
-settings (currently unused). As far as user applications are concerned they
-would need an additional mechanism either via open flags or ioctls, or some
-other upper level mechanism to communicate such settings to block.
-
-1.2.1 Request Priority/Latency
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Todo/Under discussion::
-
-  Arjan's proposed request priority scheme allows higher levels some broad
-  control (high/med/low) over the priority  of an i/o request vs other pending
-  requests in the queue. For example it allows reads for bringing in an
-  executable page on demand to be given a higher priority over pending write
-  requests which haven't aged too much on the queue. Potentially this priority
-  could even be exposed to applications in some manner, providing higher level
-  tunability. Time based aging avoids starvation of lower priority
-  requests. Some bits in the bi_opf flags field in the bio structure are
-  intended to be used for this priority information.
-
-
-1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
------------------------------------------------------------------------
-
-(e.g Diagnostics, Systems Management)
-
-There are situations where high-level code needs to have direct access to
-the low level device capabilities or requires the ability to issue commands
-to the device bypassing some of the intermediate i/o layers.
-These could, for example, be special control commands issued through ioctl
-interfaces, or could be raw read/write commands that stress the drive's
-capabilities for certain kinds of fitness tests. Having direct interfaces at
-multiple levels without having to pass through upper layers makes
-it possible to perform bottom up validation of the i/o path, layer by
-layer, starting from the media.
-
-The normal i/o submission interfaces, e.g submit_bio, could be bypassed
-for specially crafted requests which such ioctl or diagnostics
-interfaces would typically use, and the elevator add_request routine
-can instead be used to directly insert such requests in the queue or preferably
-the blk_do_rq routine can be used to place the request on the queue and
-wait for completion. Alternatively, sometimes the caller might just
-invoke a lower level driver specific interface with the request as a
-parameter.
-
-If the request is a means for passing on special information associated with
-the command, then such information is associated with the request->special
-field (rather than misuse the request->buffer field which is meant for the
-request data buffer's virtual mapping).
-
-For passing request data, the caller must build up a bio descriptor
-representing the concerned memory buffer if the underlying driver interprets
-bio segments or uses the block layer end*request* functions for i/o
-completion. Alternatively one could directly use the request->buffer field to
-specify the virtual address of the buffer, if the driver expects buffer
-addresses passed in this way and ignores bio entries for the request type
-involved. In the latter case, the driver would modify and manage the
-request->buffer, request->sector and request->nr_sectors or
-request->current_nr_sectors fields itself rather than using the block layer
-end_request or end_that_request_first completion interfaces.
-(See 2.3 or Documentation/block/request.rst for a brief explanation of
-the request structure fields)
-
-::
-
-  [TBD: end_that_request_last should be usable even in this case;
-  Perhaps an end_that_direct_request_first routine could be implemented to make
-  handling direct requests easier for such drivers; Also for drivers that
-  expect bios, a helper function could be provided for setting up a bio
-  corresponding to a data buffer]
-
-  <JENS: I dont understand the above, why is end_that_request_first() not
-  usable? Or _last for that matter. I must be missing something>
-
-  <SUP: What I meant here was that if the request doesn't have a bio, then
-   end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
-   and hence can't be used for advancing request state settings on the
-   completion of partial transfers. The driver has to modify these fields
-   directly by hand.
-   This is because end_that_request_first only iterates over the bio list,
-   and always returns 0 if there are none associated with the request.
-   _last works OK in this case, and is not a problem, as I mentioned earlier
-  >
-
-1.3.1 Pre-built Commands
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-A request can be created with a pre-built custom command  to be sent directly
-to the device. The cmd block in the request structure has room for filling
-in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
-command pre-building, and the type of the request is now indicated
-through rq->flags instead of via rq->cmd)
-
-The request structure flags can be set up to indicate the type of request
-in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
-packet command issued via blk_do_rq, REQ_SPECIAL: special request).
-
-It can help to pre-build device commands for requests in advance.
-Drivers can now specify a request prepare function (q->prep_rq_fn) that the
-block layer would invoke to pre-build device commands for a given request,
-or perform other preparatory processing for the request. This is routine is
-called by elv_next_request(), i.e. typically just before servicing a request.
-(The prepare function would not be called for requests that have RQF_DONTPREP
-enabled)
-
-Aside:
-  Pre-building could possibly even be done early, i.e before placing the
-  request on the queue, rather than construct the command on the fly in the
-  driver while servicing the request queue when it may affect latencies in
-  interrupt context or responsiveness in general. One way to add early
-  pre-building would be to do it whenever we fail to merge on a request.
-  Now REQ_NOMERGE is set in the request flags to skip this one in the future,
-  which means that it will not change before we feed it to the device. So
-  the pre-builder hook can be invoked there.
-
-
-2. Flexible and generic but minimalist i/o structure/descriptor
-===============================================================
-
-2.1 Reason for a new structure and requirements addressed
----------------------------------------------------------
-
-Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
-layer, and the low level request structure was associated with a chain of
-buffer heads for a contiguous i/o request. This led to certain inefficiencies
-when it came to large i/o requests and readv/writev style operations, as it
-forced such requests to be broken up into small chunks before being passed
-on to the generic block layer, only to be merged by the i/o scheduler
-when the underlying device was capable of handling the i/o in one shot.
-Also, using the buffer head as an i/o structure for i/os that didn't originate
-from the buffer cache unnecessarily added to the weight of the descriptors
-which were generated for each such chunk.
-
-The following were some of the goals and expectations considered in the
-redesign of the block i/o data structure in 2.5.
-
-1.  Should be appropriate as a descriptor for both raw and buffered i/o  -
-    avoid cache related fields which are irrelevant in the direct/page i/o path,
-    or filesystem block size alignment restrictions which may not be relevant
-    for raw i/o.
-2.  Ability to represent high-memory buffers (which do not have a virtual
-    address mapping in kernel address space).
-3.  Ability to represent large i/os w/o unnecessarily breaking them up (i.e
-    greater than PAGE_SIZE chunks in one shot)
-4.  At the same time, ability to retain independent identity of i/os from
-    different sources or i/o units requiring individual completion (e.g. for
-    latency reasons)
-5.  Ability to represent an i/o involving multiple physical memory segments
-    (including non-page aligned page fragments, as specified via readv/writev)
-    without unnecessarily breaking it up, if the underlying device is capable of
-    handling it.
-6.  Preferably should be based on a memory descriptor structure that can be
-    passed around different types of subsystems or layers, maybe even
-    networking, without duplication or extra copies of data/descriptor fields
-    themselves in the process
-7.  Ability to handle the possibility of splits/merges as the structure passes
-    through layered drivers (lvm, md, evms), with minimal overhead.
-
-The solution was to define a new structure (bio)  for the block layer,
-instead of using the buffer head structure (bh) directly, the idea being
-avoidance of some associated baggage and limitations. The bio structure
-is uniformly used for all i/o at the block layer ; it forms a part of the
-bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
-mapped to bio structures.
-
-2.2 The bio struct
-------------------
-
-The bio structure uses a vector representation pointing to an array of tuples
-of <page, offset, len> to describe the i/o buffer, and has various other
-fields describing i/o parameters and state that needs to be maintained for
-performing the i/o.
-
-Notice that this representation means that a bio has no virtual address
-mapping at all (unlike buffer heads).
-
-::
-
-  struct bio_vec {
-       struct page     *bv_page;
-       unsigned short  bv_len;
-       unsigned short  bv_offset;
-  };
-
-  /*
-   * main unit of I/O for the block layer and lower layers (ie drivers)
-   */
-  struct bio {
-       struct bio          *bi_next;    /* request queue link */
-       struct block_device *bi_bdev;	/* target device */
-       unsigned long       bi_flags;    /* status, command, etc */
-       unsigned long       bi_opf;       /* low bits: r/w, high: priority */
-
-       unsigned int	bi_vcnt;     /* how may bio_vec's */
-       struct bvec_iter	bi_iter;	/* current index into bio_vec array */
-
-       unsigned int	bi_size;     /* total size in bytes */
-       unsigned short	bi_hw_segments; /* segments after DMA remapping */
-       unsigned int	bi_max;	     /* max bio_vecs we can hold
-                                        used as index into pool */
-       struct bio_vec   *bi_io_vec;  /* the actual vec list */
-       bio_end_io_t	*bi_end_io;  /* bi_end_io (bio) */
-       atomic_t		bi_cnt;	     /* pin count: free when it hits zero */
-       void             *bi_private;
-  };
-
-With this multipage bio design:
-
-- Large i/os can be sent down in one go using a bio_vec list consisting
-  of an array of <page, offset, len> fragments (similar to the way fragments
-  are represented in the zero-copy network code)
-- Splitting of an i/o request across multiple devices (as in the case of
-  lvm or raid) is achieved by cloning the bio (where the clone points to
-  the same bi_io_vec array, but with the index and size accordingly modified)
-- A linked list of bios is used as before for unrelated merges [#]_ - this
-  avoids reallocs and makes independent completions easier to handle.
-- Code that traverses the req list can find all the segments of a bio
-  by using rq_for_each_segment.  This handles the fact that a request
-  has multiple bios, each of which can have multiple segments.
-- Drivers which can't process a large bio in one shot can use the bi_iter
-  field to keep track of the next bio_vec entry to process.
-  (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
-  [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
-  bi_offset an len fields]
-
-.. [#]
-
-	unrelated merges -- a request ends up containing two or more bios that
-	didn't originate from the same place.
-
-bi_end_io() i/o callback gets called on i/o completion of the entire bio.
-
-At a lower level, drivers build a scatter gather list from the merged bios.
-The scatter gather list is in the form of an array of <page, offset, len>
-entries with their corresponding dma address mappings filled in at the
-appropriate time. As an optimization, contiguous physical pages can be
-covered by a single entry where <page> refers to the first page and <len>
-covers the range of pages (up to 16 contiguous pages could be covered this
-way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
-the sg list.
-
-Note: Right now the only user of bios with more than one page is ll_rw_kio,
-which in turn means that only raw I/O uses it (direct i/o may not work
-right now). The intent however is to enable clustering of pages etc to
-become possible. The pagebuf abstraction layer from SGI also uses multi-page
-bios, but that is currently not included in the stock development kernels.
-The same is true of Andrew Morton's work-in-progress multipage bio writeout
-and readahead patches.
-
-2.3 Changes in the Request Structure
-------------------------------------
-
-The request structure is the structure that gets passed down to low level
-drivers. The block layer make_request function builds up a request structure,
-places it on the queue and invokes the drivers request_fn. The driver makes
-use of block layer helper routine elv_next_request to pull the next request
-off the queue. Control or diagnostic functions might bypass block and directly
-invoke underlying driver entry points passing in a specially constructed
-request structure.
-
-Only some relevant fields (mainly those which changed or may be referred
-to in some of the discussion here) are listed below, not necessarily in
-the order in which they occur in the structure (see include/linux/blkdev.h)
-Refer to Documentation/block/request.rst for details about all the request
-structure fields and a quick reference about the layers which are
-supposed to use or modify those fields::
-
-  struct request {
-	struct list_head queuelist;  /* Not meant to be directly accessed by
-					the driver.
-					Used by q->elv_next_request_fn
-					rq->queue is gone
-					*/
-	.
-	.
-	unsigned char cmd[16]; /* prebuilt command data block */
-	unsigned long flags;   /* also includes earlier rq->cmd settings */
-	.
-	.
-	sector_t sector; /* this field is now of type sector_t instead of int
-			    preparation for 64 bit sectors */
-	.
-	.
-
-	/* Number of scatter-gather DMA addr+len pairs after
-	 * physical address coalescing is performed.
-	 */
-	unsigned short nr_phys_segments;
-
-	/* Number of scatter-gather addr+len pairs after
-	 * physical and DMA remapping hardware coalescing is performed.
-	 * This is the number of scatter-gather entries the driver
-	 * will actually have to deal with after DMA mapping is done.
-	 */
-	unsigned short nr_hw_segments;
-
-	/* Various sector counts */
-	unsigned long nr_sectors;  /* no. of sectors left: driver modifiable */
-	unsigned long hard_nr_sectors;  /* block internal copy of above */
-	unsigned int current_nr_sectors; /* no. of sectors left in the
-					   current segment:driver modifiable */
-	unsigned long hard_cur_sectors; /* block internal copy of the above */
-	.
-	.
-	int tag;	/* command tag associated with request */
-	void *special;  /* same as before */
-	char *buffer;   /* valid only for low memory buffers up to
-			 current_nr_sectors */
-	.
-	.
-	struct bio *bio, *biotail;  /* bio list instead of bh */
-	struct request_list *rl;
-  }
-
-See the req_ops and req_flag_bits definitions for an explanation of the various
-flags available. Some bits are used by the block layer or i/o scheduler.
-
-The behaviour of the various sector counts are almost the same as before,
-except that since we have multi-segment bios, current_nr_sectors refers
-to the numbers of sectors in the current segment being processed which could
-be one of the many segments in the current bio (i.e i/o completion unit).
-The nr_sectors value refers to the total number of sectors in the whole
-request that remain to be transferred (no change). The purpose of the
-hard_xxx values is for block to remember these counts every time it hands
-over the request to the driver. These values are updated by block on
-end_that_request_first, i.e. every time the driver completes a part of the
-transfer and invokes block end*request helpers to mark this. The
-driver should not modify these values. The block layer sets up the
-nr_sectors and current_nr_sectors fields (based on the corresponding
-hard_xxx values and the number of bytes transferred) and updates it on
-every transfer that invokes end_that_request_first. It does the same for the
-buffer, bio, bio->bi_iter fields too.
-
-The buffer field is just a virtual address mapping of the current segment
-of the i/o buffer in cases where the buffer resides in low-memory. For high
-memory i/o, this field is not valid and must not be used by drivers.
-
-Code that sets up its own request structures and passes them down to
-a driver needs to be careful about interoperation with the block layer helper
-functions which the driver uses. (Section 1.3)
-
-3. Using bios
-=============
-
-3.1 Setup/Teardown
-------------------
-
-There are routines for managing the allocation, and reference counting, and
-freeing of bios (bio_alloc, bio_get, bio_put).
-
-This makes use of Ingo Molnar's mempool implementation, which enables
-subsystems like bio to maintain their own reserve memory pools for guaranteed
-deadlock-free allocations during extreme VM load. For example, the VM
-subsystem makes use of the block layer to writeout dirty pages in order to be
-able to free up memory space, a case which needs careful handling. The
-allocation logic draws from the preallocated emergency reserve in situations
-where it cannot allocate through normal means. If the pool is empty and it
-can wait, then it would trigger action that would help free up memory or
-replenish the pool (without deadlocking) and wait for availability in the pool.
-If it is in IRQ context, and hence not in a position to do this, allocation
-could fail if the pool is empty. In general mempool always first tries to
-perform allocation without having to wait, even if it means digging into the
-pool as long it is not less that 50% full.
-
-On a free, memory is released to the pool or directly freed depending on
-the current availability in the pool. The mempool interface lets the
-subsystem specify the routines to be used for normal alloc and free. In the
-case of bio, these routines make use of the standard slab allocator.
-
-The caller of bio_alloc is expected to taken certain steps to avoid
-deadlocks, e.g. avoid trying to allocate more memory from the pool while
-already holding memory obtained from the pool.
-
-::
-
-  [TBD: This is a potential issue, though a rare possibility
-   in the bounce bio allocation that happens in the current code, since
-   it ends up allocating a second bio from the same pool while
-   holding the original bio ]
-
-Memory allocated from the pool should be released back within a limited
-amount of time (in the case of bio, that would be after the i/o is completed).
-This ensures that if part of the pool has been used up, some work (in this
-case i/o) must already be in progress and memory would be available when it
-is over. If allocating from multiple pools in the same code path, the order
-or hierarchy of allocation needs to be consistent, just the way one deals
-with multiple locks.
-
-The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
-for a non-clone bio. There are the 6 pools setup for different size biovecs,
-so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
-given size from these slabs.
-
-The bio_get() routine may be used to hold an extra reference on a bio prior
-to i/o submission, if the bio fields are likely to be accessed after the
-i/o is issued (since the bio may otherwise get freed in case i/o completion
-happens in the meantime).
-
-3.2 Generic bio helper Routines
--------------------------------
-
-3.2.1 Traversing segments and completion units in a request
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The macro rq_for_each_segment() should be used for traversing the bios
-in the request list (drivers should avoid directly trying to do it
-themselves). Using these helpers should also make it easier to cope
-with block changes in the future.
-
-::
-
-	struct req_iterator iter;
-	rq_for_each_segment(bio_vec, rq, iter)
-		/* bio_vec is now current segment */
-
-I/O completion callbacks are per-bio rather than per-segment, so drivers
-that traverse bio chains on completion need to keep that in mind. Drivers
-which don't make a distinction between segments and completion units would
-need to be reorganized to support multi-segment bios.
-
-3.2.2 Setting up DMA scatterlists
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The blk_rq_map_sg() helper routine would be used for setting up scatter
-gather lists from a request, so a driver need not do it on its own.
-
-	nr_segments = blk_rq_map_sg(q, rq, scatterlist);
-
-The helper routine provides a level of abstraction which makes it easier
-to modify the internals of request to scatterlist conversion down the line
-without breaking drivers. The blk_rq_map_sg routine takes care of several
-things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
-is set) and correct segment accounting to avoid exceeding the limits which
-the i/o hardware can handle, based on various queue properties.
-
-- Prevents a clustered segment from crossing a 4GB mem boundary
-- Avoids building segments that would exceed the number of physical
-  memory segments that the driver can handle (phys_segments) and the
-  number that the underlying hardware can handle at once, accounting for
-  DMA remapping (hw_segments)  (i.e. IOMMU aware limits).
-
-Routines which the low level driver can use to set up the segment limits:
-
-blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
-hw data segments in a request (i.e. the maximum number of address/length
-pairs the host adapter can actually hand to the device at once)
-
-blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
-of physical data segments in a request (i.e. the largest sized scatter list
-a driver could handle)
-
-3.2.3 I/O completion
-^^^^^^^^^^^^^^^^^^^^
-
-The existing generic block layer helper routines end_request,
-end_that_request_first and end_that_request_last can be used for i/o
-completion (and setting things up so the rest of the i/o or the next
-request can be kicked of) as before. With the introduction of multi-page
-bio support, end_that_request_first requires an additional argument indicating
-the number of sectors completed.
-
-3.2.4 Implications for drivers that do not interpret bios
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-(don't handle multiple segments)
-
-Drivers that do not interpret bios e.g those which do not handle multiple
-segments and do not support i/o into high memory addresses (require bounce
-buffers) and expect only virtually mapped buffers, can access the rq->buffer
-field. As before the driver should use current_nr_sectors to determine the
-size of remaining data in the current segment (that is the maximum it can
-transfer in one go unless it interprets segments), and rely on the block layer
-end_request, or end_that_request_first/last to take care of all accounting
-and transparent mapping of the next bio segment when a segment boundary
-is crossed on completion of a transfer. (The end*request* functions should
-be used if only if the request has come down from block/bio path, not for
-direct access requests which only specify rq->buffer without a valid rq->bio)
-
-3.3 I/O Submission
-------------------
-
-The routine submit_bio() is used to submit a single io. Higher level i/o
-routines make use of this:
-
-(a) Buffered i/o:
-
-The routine submit_bh() invokes submit_bio() on a bio corresponding to the
-bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
-
-(b) Kiobuf i/o (for raw/direct i/o):
-
-The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
-maps the array to one or more multi-page bios, issuing submit_bio() to
-perform the i/o on each of these.
-
-The embedded bh array in the kiobuf structure has been removed and no
-preallocation of bios is done for kiobufs. [The intent is to remove the
-blocks array as well, but it's currently in there to kludge around direct i/o.]
-Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
-
-Todo/Observation:
-
- A single kiobuf structure is assumed to correspond to a contiguous range
- of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
- So right now it wouldn't work for direct i/o on non-contiguous blocks.
- This is to be resolved.  The eventual direction is to replace kiobuf
- by kvec's.
-
- Badari Pulavarty has a patch to implement direct i/o correctly using
- bio and kvec.
-
-
-(c) Page i/o:
-
-Todo/Under discussion:
-
- Andrew Morton's multi-page bio patches attempt to issue multi-page
- writeouts (and reads) from the page cache, by directly building up
- large bios for submission completely bypassing the usage of buffer
- heads. This work is still in progress.
-
- Christoph Hellwig had some code that uses bios for page-io (rather than
- bh). This isn't included in bio as yet. Christoph was also working on a
- design for representing virtual/real extents as an entity and modifying
- some of the address space ops interfaces to utilize this abstraction rather
- than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
- abstraction, but intended to be as lightweight as possible).
-
-(d) Direct access i/o:
-
-Direct access requests that do not contain bios would be submitted differently
-as discussed earlier in section 1.3.
-
-Aside:
-
-  Kvec i/o:
-
-  Ben LaHaise's aio code uses a slightly different structure instead
-  of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
-  tuples (very much like the networking code), together with a callback function
-  and data pointer. This is embedded into a brw_cb structure when passed
-  to brw_kvec_async().
-
-  Now it should be possible to directly map these kvecs to a bio. Just as while
-  cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
-  array pointer to point to the veclet array in kvecs.
-
-  TBD: In order for this to work, some changes are needed in the way multi-page
-  bios are handled today. The values of the tuples in such a vector passed in
-  from higher level code should not be modified by the block layer in the course
-  of its request processing, since that would make it hard for the higher layer
-  to continue to use the vector descriptor (kvec) after i/o completes. Instead,
-  all such transient state should either be maintained in the request structure,
-  and passed on in some way to the endio completion routine.
-
-
-4. The I/O scheduler
-====================
-
-I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
-queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
-to refer to both parts and I/O scheduler to specific I/O schedulers.
-
-Block layer implements generic dispatch queue in `block/*.c`.
-The generic dispatch queue is responsible for requeueing, handling non-fs
-requests and all other subtleties.
-
-Specific I/O schedulers are responsible for ordering normal filesystem
-requests.  They can also choose to delay certain requests to improve
-throughput or whatever purpose.  As the plural form indicates, there are
-multiple I/O schedulers.  They can be built as modules but at least one should
-be built inside the kernel.  Each queue can choose different one and can also
-change to another one dynamically.
-
-A block layer call to the i/o scheduler follows the convention elv_xxx(). This
-calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
-and xxx might not match exactly, but use your imagination. If an elevator
-doesn't implement a function, the switch does nothing or some minimal house
-keeping work.
-
-4.1. I/O scheduler API
-----------------------
-
-The functions an elevator may implement are: (* are mandatory)
-
-=============================== ================================================
-elevator_merge_fn		called to query requests for merge with a bio
-
-elevator_merge_req_fn		called when two requests get merged. the one
-				which gets merged into the other one will be
-				never seen by I/O scheduler again. IOW, after
-				being merged, the request is gone.
-
-elevator_merged_fn		called when a request in the scheduler has been
-				involved in a merge. It is used in the deadline
-				scheduler for example, to reposition the request
-				if its sorting order has changed.
-
-elevator_allow_merge_fn		called whenever the block layer determines
-				that a bio can be merged into an existing
-				request safely. The io scheduler may still
-				want to stop a merge at this point if it
-				results in some sort of conflict internally,
-				this hook allows it to do that. Note however
-				that two *requests* can still be merged at later
-				time. Currently the io scheduler has no way to
-				prevent that. It can only learn about the fact
-				from elevator_merge_req_fn callback.
-
-elevator_dispatch_fn*		fills the dispatch queue with ready requests.
-				I/O schedulers are free to postpone requests by
-				not filling the dispatch queue unless @force
-				is non-zero.  Once dispatched, I/O schedulers
-				are not allowed to manipulate the requests -
-				they belong to generic dispatch queue.
-
-elevator_add_req_fn*		called to add a new request into the scheduler
-
-elevator_former_req_fn
-elevator_latter_req_fn		These return the request before or after the
-				one specified in disk sort order. Used by the
-				block layer to find merge possibilities.
-
-elevator_completed_req_fn	called when a request is completed.
-
-elevator_set_req_fn
-elevator_put_req_fn		Must be used to allocate and free any elevator
-				specific storage for a request.
-
-elevator_activate_req_fn	Called when device driver first sees a request.
-				I/O schedulers can use this callback to
-				determine when actual execution of a request
-				starts.
-elevator_deactivate_req_fn	Called when device driver decides to delay
-				a request by requeueing it.
-
-elevator_init_fn*
-elevator_exit_fn		Allocate and free any elevator specific storage
-				for a queue.
-=============================== ================================================
-
-4.2 Request flows seen by I/O schedulers
-----------------------------------------
-
-All requests seen by I/O schedulers strictly follow one of the following three
-flows.
-
- set_req_fn ->
-
- i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
-      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
- ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
- iii. [none]
-
- -> put_req_fn
-
-4.3 I/O scheduler implementation
---------------------------------
-
-The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
-optimal disk scan and request servicing performance (based on generic
-principles and device capabilities), optimized for:
-
-i.   improved throughput
-ii.  improved latency
-iii. better utilization of h/w & CPU time
-
-Characteristics:
-
-i. Binary tree
-AS and deadline i/o schedulers use red black binary trees for disk position
-sorting and searching, and a fifo linked list for time-based searching. This
-gives good scalability and good availability of information. Requests are
-almost always dispatched in disk sort order, so a cache is kept of the next
-request in sort order to prevent binary tree lookups.
-
-This arrangement is not a generic block layer characteristic however, so
-elevators may implement queues as they please.
-
-ii. Merge hash
-AS and deadline use a hash table indexed by the last sector of a request. This
-enables merging code to quickly look up "back merge" candidates, even when
-multiple I/O streams are being performed at once on one disk.
-
-"Front merges", a new request being merged at the front of an existing request,
-are far less common than "back merges" due to the nature of most I/O patterns.
-Front merges are handled by the binary trees in AS and deadline schedulers.
-
-iii. Plugging the queue to batch requests in anticipation of opportunities for
-     merge/sort optimizations
-
-Plugging is an approach that the current i/o scheduling algorithm resorts to so
-that it collects up enough requests in the queue to be able to take
-advantage of the sorting/merging logic in the elevator. If the
-queue is empty when a request comes in, then it plugs the request queue
-(sort of like plugging the bath tub of a vessel to get fluid to build up)
-till it fills up with a few more requests, before starting to service
-the requests. This provides an opportunity to merge/sort the requests before
-passing them down to the device. There are various conditions when the queue is
-unplugged (to open up the flow again), either through a scheduled task or
-could be on demand.
-
-Aside:
-  This is kind of controversial territory, as it's not clear if plugging is
-  always the right thing to do. Devices typically have their own queues,
-  and allowing a big queue to build up in software, while letting the device be
-  idle for a while may not always make sense. The trick is to handle the fine
-  balance between when to plug and when to open up. Also now that we have
-  multi-page bios being queued in one shot, we may not need to wait to merge
-  a big request from the broken up pieces coming by.
-
-4.4 I/O contexts
-----------------
-
-I/O contexts provide a dynamically allocated per process data area. They may
-be used in I/O schedulers, and in the block layer (could be used for IO statis,
-priorities for example). See `*io_context` in block/ll_rw_blk.c, and as-iosched.c
-for an example of usage in an i/o scheduler.
-
-
-5. Scalability related changes
-==============================
-
-5.1 Granular Locking: io_request_lock replaced by a per-queue lock
-------------------------------------------------------------------
-
-The global io_request_lock has been removed as of 2.5, to avoid
-the scalability bottleneck it was causing, and has been replaced by more
-granular locking. The request queue structure has a pointer to the
-lock to be used for that queue. As a result, locking can now be
-per-queue, with a provision for sharing a lock across queues if
-necessary (e.g the scsi layer sets the queue lock pointers to the
-corresponding adapter lock, which results in a per host locking
-granularity). The locking semantics are the same, i.e. locking is
-still imposed by the block layer, grabbing the lock before
-request_fn execution which it means that lots of older drivers
-should still be SMP safe. Drivers are free to drop the queue
-lock themselves, if required. Drivers that explicitly used the
-io_request_lock for serialization need to be modified accordingly.
-Usually it's as easy as adding a global lock::
-
-	static DEFINE_SPINLOCK(my_driver_lock);
-
-and passing the address to that lock to blk_init_queue().
-
-5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
-----------------------------------------------------------------
-
-The sector number used in the bio structure has been changed to sector_t,
-which could be defined as 64 bit in preparation for 64 bit sector support.
-
-6. Other Changes/Implications
-=============================
-
-6.1 Partition re-mapping handled by the generic block layer
------------------------------------------------------------
-
-In 2.5 some of the gendisk/partition related code has been reorganized.
-Now the generic block layer performs partition-remapping early and thus
-provides drivers with a sector number relative to whole device, rather than
-having to take partition number into account in order to arrive at the true
-sector number. The routine blk_partition_remap() is invoked by
-submit_bio_noacct even before invoking the queue specific ->submit_bio,
-so the i/o scheduler also gets to operate on whole disk sector numbers. This
-should typically not require changes to block drivers, it just never gets
-to invoke its own partition sector offset calculations since all bios
-sent are offset from the beginning of the device.
-
-
-7. A Few Tips on Migration of older drivers
-===========================================
-
-Old-style drivers that just use CURRENT and ignores clustered requests,
-may not need much change.  The generic layer will automatically handle
-clustered requests, multi-page bios, etc for the driver.
-
-For a low performance driver or hardware that is PIO driven or just doesn't
-support scatter-gather changes should be minimal too.
-
-The following are some points to keep in mind when converting old drivers
-to bio.
-
-Drivers should use elv_next_request to pick up requests and are no longer
-supposed to handle looping directly over the request list.
-(struct request->queue has been removed)
-
-Now end_that_request_first takes an additional number_of_sectors argument.
-It used to handle always just the first buffer_head in a request, now
-it will loop and handle as many sectors (on a bio-segment granularity)
-as specified.
-
-Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
-right thing to use is bio_endio(bio) instead.
-
-If the driver is dropping the io_request_lock from its request_fn strategy,
-then it just needs to replace that with q->queue_lock instead.
-
-As described in Sec 1.1, drivers can set max sector size, max segment size
-etc per queue now. Drivers that used to define their own merge functions i
-to handle things like this can now just use the blk_queue_* functions at
-blk_init_queue time.
-
-Drivers no longer have to map a {partition, sector offset} into the
-correct absolute location anymore, this is done by the block layer, so
-where a driver received a request ala this before::
-
-	rq->rq_dev = mk_kdev(3, 5);	/* /dev/hda5 */
-	rq->sector = 0;			/* first sector on hda5 */
-
-it will now see::
-
-	rq->rq_dev = mk_kdev(3, 0);	/* /dev/hda */
-	rq->sector = 123128;		/* offset from start of disk */
-
-As mentioned, there is no virtual mapping of a bio. For DMA, this is
-not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (dma_map_page for a single segment or
-use dma_map_sg for scatter gather) to be able to ship it to the driver. For
-PIO drivers (or drivers that need to revert to PIO transfer once in a
-while (IDE for example)), where the CPU is doing the actual data
-transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use kmap_atomic or similar to temporarily map
-a bio into the virtual address space.
-
-
-8. Prior/Related/Impacted patches
-=================================
-
-8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
------------------------------------------------------
-
-- orig kiobuf & raw i/o patches (now in 2.4 tree)
-- direct kiobuf based i/o to devices (no intermediate bh's)
-- page i/o using kiobuf
-- kiobuf splitting for lvm (mkp)
-- elevator support for kiobuf request merging (axboe)
-
-8.2. Zero-copy networking (Dave Miller)
----------------------------------------
-
-8.3. SGI XFS - pagebuf patches - use of kiobufs
------------------------------------------------
-8.4. Multi-page pioent patch for bio (Christoph Hellwig)
---------------------------------------------------------
-8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
---------------------------------------------------------------------
-8.6. Async i/o implementation patch (Ben LaHaise)
--------------------------------------------------
-8.7. EVMS layering design (IBM EVMS team)
------------------------------------------
-8.8. Larger page cache size patch (Ben LaHaise) and Large page size (Daniel Phillips)
--------------------------------------------------------------------------------------
-
-    => larger contiguous physical memory buffers
-
-8.9. VM reservations patch (Ben LaHaise)
-----------------------------------------
-8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
-----------------------------------------------------------
-8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
----------------------------------------------------------------------------
-8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar, Badari)
--------------------------------------------------------------------------------
-8.13  Priority based i/o scheduler - prepatches (Arjan van de Ven)
-------------------------------------------------------------------
-8.14  IDE Taskfile i/o patch (Andre Hedrick)
---------------------------------------------
-8.15  Multi-page writeout and readahead patches (Andrew Morton)
----------------------------------------------------------------
-8.16  Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
------------------------------------------------------------------------
-
-9. Other References
-===================
-
-9.1 The Splice I/O Model
-------------------------
-
-Larry McVoy (and subsequent discussions on lkml, and Linus' comments - Jan 2001
-
-9.2 Discussions about kiobuf and bh design
-------------------------------------------
-
-On lkml between sct, linus, alan et al - Feb-March 2001 (many of the
-initial thoughts that led to bio were brought up in this discussion thread)
-
-9.3 Discussions on mempool on lkml - Dec 2001.
-----------------------------------------------
diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst
index 3a41495dd77b5..68f115f2b1c6b 100644
--- a/Documentation/block/index.rst
+++ b/Documentation/block/index.rst
@@ -8,7 +8,6 @@ Block
    :maxdepth: 1
 
    bfq-iosched
-   biodoc
    biovecs
    blk-mq
    capability
-- 
GitLab


From ec9fd2a13d743ae129bd1b5c6edea0d65260ce10 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 03:52:47 -0800
Subject: [PATCH 0411/1586] blk-lib: don't check bdev_get_queue() NULL check

Based on the comment present in the bdev_get_queue()
bdev->bd_queue can never be NULL. Remove the NULL check for the local
variable q that is set from bdev_get_queue() for discard, write_same,
and write_zeroes.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220215115247.11717-2-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 1b8ced45e4e55..fc6ea52e74824 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -19,9 +19,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	unsigned int op;
 	sector_t bs_mask, part_offset = 0;
 
-	if (!q)
-		return -ENXIO;
-
 	if (bdev_read_only(bdev))
 		return -EPERM;
 
@@ -156,9 +153,6 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	struct bio *bio = *biop;
 	sector_t bs_mask;
 
-	if (!q)
-		return -ENXIO;
-
 	if (bdev_read_only(bdev))
 		return -EPERM;
 
@@ -232,10 +226,6 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 {
 	struct bio *bio = *biop;
 	unsigned int max_write_zeroes_sectors;
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (!q)
-		return -ENXIO;
 
 	if (bdev_read_only(bdev))
 		return -EPERM;
@@ -284,14 +274,10 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
 		sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
 		struct bio **biop)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
 	struct bio *bio = *biop;
 	int bi_size = 0;
 	unsigned int sz;
 
-	if (!q)
-		return -ENXIO;
-
 	if (bdev_read_only(bdev))
 		return -EPERM;
 
-- 
GitLab


From b8ac4ee08d48d4bb46669a2deef10454313e1a00 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 7 Jan 2022 16:00:55 +0000
Subject: [PATCH 0412/1586] arm64: booting.rst: Clarify on requiring non-secure
 EL2

The ARMv8.4 architecture revision introduced the EL2 exception level
to the secure world. Clarify the existing wording to make sure that
Linux relies on being executed in the non-secure state.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20220107160056.322141-2-andre.przywara@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/booting.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 52d060caf8bba..29884b261aa9c 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
 is relevant to all public releases of the AArch64 Linux kernel.
 
 The AArch64 exception model is made up of a number of exception levels
-(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
-counterpart.  EL2 is the hypervisor level and exists only in non-secure
-mode. EL3 is the highest priority level and exists only in secure mode.
+(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
+counterpart.  EL2 is the hypervisor level, EL3 is the highest priority
+level and exists only in secure mode. Both are architecturally optional.
 
 For the purposes of this document, we will use the term `boot loader`
 simply to define all software that executes on the CPU(s) before control
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
 
   All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
   IRQ and FIQ).
-  The CPU must be in either EL2 (RECOMMENDED in order to have access to
-  the virtualisation extensions) or non-secure EL1.
+  The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
+  to have access to the virtualisation extensions), or in EL1.
 
 - Caches, MMUs
 
-- 
GitLab


From 35bde68bba5413592d88864eced79f8a0482bb4f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 13 Jan 2022 14:12:39 +0100
Subject: [PATCH 0413/1586] arm64: random: implement arch_get_random_int/_long
 based on RNDR

When support for RNDR/RNDRRS was introduced, we elected to only
implement arch_get_random_seed_int/_long(), and back them by RNDR
instead of RNDRRS. This was needed to prevent potential performance
and/or starvation issues resulting from the fact that the /dev/random
driver used to invoke these routines on various hot paths.

These issues have all been addressed now [0] [1], and so we can wire up
this API more straight-forwardly:

- map arch_get_random_int/_long() onto RNDR, which returns the output of
  a DRBG that is reseeded at an implemented defined rate;
- map arch_get_random_seed_int/_long() onto the TRNG firmware service,
  which returns true, conditioned entropy, or onto RNDRRS if the TRNG
  service is unavailable, which returns the output of a DRBG that is
  reseeded every time it is used.

[0] 390596c9959c random: avoid arch_get_random_seed_long() when collecting IRQ randomness
[1] 2ee25b6968b1 random: avoid superfluous call to RDRAND in CRNG extraction

Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220113131239.1610455-1-ardb@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/archrandom.h | 45 +++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 09e43272ccb0a..d1bb5e71df256 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v)
 	return ok;
 }
 
+static inline bool __arm64_rndrrs(unsigned long *v)
+{
+	bool ok;
+
+	/*
+	 * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success,
+	 * and set PSTATE.NZCV to 0b0100 otherwise.
+	 */
+	asm volatile(
+		__mrs_s("%0", SYS_RNDRRS_EL0) "\n"
+	"	cset %w1, ne\n"
+	: "=r" (*v), "=r" (ok)
+	:
+	: "cc");
+
+	return ok;
+}
+
 static inline bool __must_check arch_get_random_long(unsigned long *v)
 {
+	/*
+	 * Only support the generic interface after we have detected
+	 * the system wide capability, avoiding complexity with the
+	 * cpufeature code and with potential scheduling between CPUs
+	 * with and without the feature.
+	 */
+	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+		return true;
 	return false;
 }
 
 static inline bool __must_check arch_get_random_int(unsigned int *v)
 {
+	if (cpus_have_const_cap(ARM64_HAS_RNG)) {
+		unsigned long val;
+
+		if (__arm64_rndr(&val)) {
+			*v = val;
+			return true;
+		}
+	}
 	return false;
 }
 
@@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
 	}
 
 	/*
-	 * Only support the generic interface after we have detected
-	 * the system wide capability, avoiding complexity with the
-	 * cpufeature code and with potential scheduling between CPUs
-	 * with and without the feature.
+	 * RNDRRS is not backed by an entropy source but by a DRBG that is
+	 * reseeded after each invocation. This is not a 100% fit but good
+	 * enough to implement this API if no other entropy source exists.
 	 */
-	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+	if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v))
 		return true;
 
 	return false;
@@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
 	}
 
 	if (cpus_have_const_cap(ARM64_HAS_RNG)) {
-		if (__arm64_rndr(&val)) {
+		if (__arm64_rndrrs(&val)) {
 			*v = val;
 			return true;
 		}
-- 
GitLab


From 3352a5556f52bb49b82c0258c0c67f7371ba1f80 Mon Sep 17 00:00:00 2001
From: He Ying <heying24@huawei.com>
Date: Tue, 11 Jan 2022 22:24:10 -0500
Subject: [PATCH 0414/1586] arm64: entry: Save some nops when
 CONFIG_ARM64_PSEUDO_NMI is not set

Arm64 pseudo-NMI feature code brings some additional nops
when CONFIG_ARM64_PSEUDO_NMI is not set, which is not
necessary. So add necessary ifdeffery to avoid it.

Signed-off-by: He Ying <heying24@huawei.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20220112032410.29231-1-heying24@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 772ec2ecf4888..eb59621d6c6a2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -300,6 +300,7 @@ alternative_else_nop_endif
 	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
 	/* Save pmr */
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	mrs_s	x20, SYS_ICC_PMR_EL1
@@ -307,6 +308,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	mov	x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
 	msr_s	SYS_ICC_PMR_EL1, x20
 alternative_else_nop_endif
+#endif
 
 	/* Re-enable tag checking (TCO set on exception entry) */
 #ifdef CONFIG_ARM64_MTE
@@ -330,6 +332,7 @@ alternative_else_nop_endif
 	disable_daif
 	.endif
 
+#ifdef CONFIG_ARM64_PSEUDO_NMI
 	/* Restore pmr */
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	ldr	x20, [sp, #S_PMR_SAVE]
@@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	dsb	sy				// Ensure priority change is seen by redistributor
 .L__skip_pmr_sync\@:
 alternative_else_nop_endif
+#endif
 
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 
-- 
GitLab


From e921da6bc7cac5f0e8458fe5df18ae08eb538f54 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 25 Jan 2022 20:08:33 +0530
Subject: [PATCH 0415/1586] arm64/mm: Consolidate TCR_EL1 fields

This renames and moves SYS_TCR_EL1_TCMA1 and SYS_TCR_EL1_TCMA0 definitions
into pgtable-hwdef.h thus consolidating all TCR fields in a single header.
This does not cause any functional change.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1643121513-21854-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable-hwdef.h | 2 ++
 arch/arm64/include/asm/sysreg.h        | 4 ----
 arch/arm64/mm/proc.S                   | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 40085e53f573d..66671ff051835 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -273,6 +273,8 @@
 #define TCR_NFD1		(UL(1) << 54)
 #define TCR_E0PD0		(UL(1) << 55)
 #define TCR_E0PD1		(UL(1) << 56)
+#define TCR_TCMA0		(UL(1) << 57)
+#define TCR_TCMA1		(UL(1) << 58)
 
 /*
  * TTBR.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004aee..34800d264f690 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1101,10 +1101,6 @@
 #define CPACR_EL1_ZEN_EL0EN	(BIT(17)) /* enable EL0 access, if EL1EN set */
 #define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
 
-/* TCR EL1 Bit Definitions */
-#define SYS_TCR_EL1_TCMA1	(BIT(58))
-#define SYS_TCR_EL1_TCMA0	(BIT(57))
-
 /* GCR_EL1 Definitions */
 #define SYS_GCR_EL1_RRND	(BIT(16))
 #define SYS_GCR_EL1_EXCL_MASK	0xffffUL
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index d35c90d2e47ad..50bbed947bec7 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,7 +46,7 @@
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
 #else
 /*
  * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
-- 
GitLab


From ee017ee353506fcec58e481673e4331ff198a80e Mon Sep 17 00:00:00 2001
From: Jianyong Wu <jianyong.wu@arm.com>
Date: Tue, 1 Feb 2022 19:44:00 +0800
Subject: [PATCH 0416/1586] arm64/mm: avoid fixmap race condition when create
 pud mapping

The 'fixmap' is a global resource and is used recursively by
create pud mapping(), leading to a potential race condition in the
presence of a concurrent call to alloc_init_pud():

kernel_init thread                          virtio-mem workqueue thread
==================                          ===========================

  alloc_init_pud(...)                       alloc_init_pud(...)
  pudp = pud_set_fixmap_offset(...)         pudp = pud_set_fixmap_offset(...)
  READ_ONCE(*pudp)
  pud_clear_fixmap(...)
                                            READ_ONCE(*pudp) // CRASH!

As kernel may sleep during creating pud mapping, introduce a mutex lock to
serialise use of the fixmap entries by alloc_init_pud(). However, there is
no need for locking in early boot stage and it doesn't work well with
KASLR enabled when early boot. So, enable lock when system_state doesn't
equal to "SYSTEM_BOOTING".

Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Fixes: f4710445458c ("arm64: mm: use fixmap when creating page tables")
Link: https://lore.kernel.org/r/20220201114400.56885-1-jianyong.wu@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/mmu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8c..1681430ecab73 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
 
 static DEFINE_SPINLOCK(swapper_pgdir_lock);
+static DEFINE_MUTEX(fixmap_lock);
 
 void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
 {
@@ -329,6 +330,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	}
 	BUG_ON(p4d_bad(p4d));
 
+	/*
+	 * No need for locking during early boot. And it doesn't work as
+	 * expected with KASLR enabled.
+	 */
+	if (system_state != SYSTEM_BOOTING)
+		mutex_lock(&fixmap_lock);
 	pudp = pud_set_fixmap_offset(p4dp, addr);
 	do {
 		pud_t old_pud = READ_ONCE(*pudp);
@@ -359,6 +366,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	} while (pudp++, addr = next, addr != end);
 
 	pud_clear_fixmap();
+	if (system_state != SYSTEM_BOOTING)
+		mutex_unlock(&fixmap_lock);
 }
 
 static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
-- 
GitLab


From a6aab018829948c1818bed656656df9ae321408b Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 27 Jan 2022 16:21:27 +0000
Subject: [PATCH 0417/1586] arm64: insn: Generate 64 bit mask immediates
 correctly

When the insn framework is used to encode an AND/ORR/EOR instruction,
aarch64_encode_immediate() is used to pick the immr imms values.

If the immediate is a 64bit mask, with bit 63 set, and zeros in any
of the upper 32 bits, the immr value is incorrectly calculated meaning
the wrong mask is generated.
For example, 0x8000000000000001 should have an immr of 1, but 32 is used,
meaning the resulting mask is 0x0000000300000000.

It would appear eBPF is unable to hit these cases, as build_insn()'s
imm value is a s32, so when used with BPF_ALU64, the sign-extended
u64 immediate would always have all-1s or all-0s in the upper 32 bits.

KVM does not generate a va_mask with any of the top bits set as these
VA wouldn't be usable with TTBR0_EL2.

This happens because the rotation is calculated from fls(~imm), which
takes an unsigned int, but the immediate may be 64bit.

Use fls64() so the 64bit mask doesn't get truncated to a u32.

Signed-off-by: James Morse <james.morse@arm.com>
Brown-paper-bag-for: Marc Zyngier <maz@kernel.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127162127.2391947-4-james.morse@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/insn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index fccfe363e5679..e485cd735261c 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -1379,7 +1379,7 @@ static u32 aarch64_encode_immediate(u64 imm,
 		 * Compute the rotation to get a continuous set of
 		 * ones, with the first bit set at position 0
 		 */
-		ror = fls(~imm);
+		ror = fls64(~imm);
 	}
 
 	/*
-- 
GitLab


From 3673d4b9cf68164678c6bb8a380bfb9eebb49432 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 24 Jan 2022 17:17:48 +0000
Subject: [PATCH 0418/1586] kselftest/arm64: Remove local ARRAY_SIZE()
 definitions

An ARRAY_SIZE() has been added to kselftest.h so remove the local versions
in some of the arm64 selftests.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220124171748.2195875-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/syscall-abi.c | 1 -
 tools/testing/selftests/arm64/fp/sve-ptrace.c   | 2 --
 2 files changed, 3 deletions(-)

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index d8eeeafb50dcb..1e13b7523918e 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,7 +18,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
 
 extern void do_syscall(int sve_vl);
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index a3c1e67441f9e..4bd333768cc45 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -21,8 +21,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
-
 /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
 #ifndef NT_ARM_SVE
 #define NT_ARM_SVE 0x405
-- 
GitLab


From 396520759bd3bf4a557e4edba9a63afc13cc5773 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 26 Jan 2022 17:44:21 +0000
Subject: [PATCH 0419/1586] kselftest/arm64: Remove local definitions of MTE
 prctls

The GCR EL1 test unconditionally includes local definitions of the prctls
it tests. Since not only will the kselftest build infrastructure ensure
that the in tree uapi headers are available but the toolchain being used to
build kselftest may ensure that system uapi headers with MTE support are
available this causes the compiler to warn about duplicate definitions.
Remove these duplicate definitions.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220126174421.1712795-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_gcr_el1_cswitch.c       | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
index a876db1f096ab..325bca0de0f6e 100644
--- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -19,17 +19,6 @@
 #include "kselftest.h"
 #include "mte_common_util.h"
 
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-# define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
-# define PR_MTE_TCF_SHIFT	1
-# define PR_MTE_TCF_NONE	(0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC	(1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK	(3UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TAG_SHIFT	3
-# define PR_MTE_TAG_MASK	(0xffffUL << PR_MTE_TAG_SHIFT)
-
 #include "mte_def.h"
 
 #define NUM_ITERATIONS		1024
-- 
GitLab


From 4c022f57ad954e1ad6f838bd3b7d54e459745eeb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 27 Jan 2022 19:03:23 +0000
Subject: [PATCH 0420/1586] arm64/mte: Clarify mode reported by
 PR_GET_TAGGED_ADDR_CTRL

With the current wording readers might infer that PR_GET_TAGGED_ADDR_CTRL
will report the mode currently active in the thread however this is not the
actual behaviour, instead all modes currently selected by the process will
be reported with the mode used depending on the combination of the
requested modes and the default set for the current CPU. This has been the
case since 433c38f40f6a81 ("arm64: mte: change ASYNC and SYNC TCF settings
into bitfields"), before that we did not allow more than one mode to be
requested simultaneously.

Update the documentation to more clearly reflect current behaviour.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220127190324.660405-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/memory-tagging-extension.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 7b99c8f428eb6..a0ab2f65b235c 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -91,8 +91,9 @@ mode is specified, the program will run in that mode. If multiple
 modes are specified, the mode is selected as described in the "Per-CPU
 preferred tag checking modes" section below.
 
-The current tag check fault mode can be read using the
-``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
+The current tag check fault configuration can be read using the
+``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
+multiple modes were requested then all will be reported.
 
 Tag checking can also be disabled for a user thread by setting the
 ``PSTATE.TCO`` bit with ``MSR TCO, #1``.
-- 
GitLab


From 65603435599f6425eadf51201956c88a03606ca7 Mon Sep 17 00:00:00 2001
From: Austin Kim <austindh.kim@gmail.com>
Date: Wed, 24 Nov 2021 21:44:18 +0000
Subject: [PATCH 0421/1586] ima: Fix trivial typos in the comments

There are a few minor typos in the comments. Fix these.

Signed-off-by: Austin Kim <austindh.kim@gmail.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_api.c          | 2 +-
 security/integrity/ima/ima_main.c         | 2 +-
 security/integrity/ima/ima_policy.c       | 2 +-
 security/integrity/ima/ima_template_lib.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index a64fb0130b015..5b220a2fe573d 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -238,7 +238,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 		goto out;
 
 	/*
-	 * Dectecting file change is based on i_version. On filesystems
+	 * Detecting file change is based on i_version. On filesystems
 	 * which do not support i_version, support is limited to an initial
 	 * measurement/appraisal/audit.
 	 */
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 8c6e4514d4944..8ed6da428328b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -764,7 +764,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
  *	      call to ima_post_load_data().
  *
  * Callers of this LSM hook can not measure, appraise, or audit the
- * data provided by userspace.  Enforce policy rules requring a file
+ * data provided by userspace.  Enforce policy rules requiring a file
  * signature (eg. kexec'ed kernel image).
  *
  * For permission return 0, otherwise return -EACCES.
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 2a1f6418b10a6..90f528558adc2 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -429,7 +429,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry)
 	/*
 	 * ima_lsm_copy_rule() shallow copied all references, except for the
 	 * LSM references, from entry to nentry so we only want to free the LSM
-	 * references and the entry itself. All other memory refrences will now
+	 * references and the entry itself. All other memory references will now
 	 * be owned by nentry.
 	 */
 	ima_lsm_free_rule(entry);
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index ca017cae73eb3..5a5d462ab36db 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -272,7 +272,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
 	 * digest formats:
 	 *  - DATA_FMT_DIGEST: digest
 	 *  - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
-	 *    where <hash algo> is provided if the hash algoritm is not
+	 *    where <hash algo> is provided if the hash algorithm is not
 	 *    SHA1 or MD5
 	 */
 	u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
-- 
GitLab


From 18848c7191320ae1e5f0afdda7fb99f25daadc75 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Mon, 17 Jan 2022 09:03:29 -0500
Subject: [PATCH 0422/1586] MAINTAINERS: add missing "security/integrity"
 directory

Update the IMA and EVM records to include the "security/integrity/"
directory.

Reviewed-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 MAINTAINERS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fca970a46e77a..fdf0420ba477e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7273,7 +7273,9 @@ Extended Verification Module (EVM)
 M:	Mimi Zohar <zohar@linux.ibm.com>
 L:	linux-integrity@vger.kernel.org
 S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
 F:	security/integrity/evm/
+F:	security/integrity/
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
 M:	Ard Biesheuvel <ardb@kernel.org>
@@ -9521,6 +9523,7 @@ L:	linux-integrity@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
 F:	security/integrity/ima/
+F:	security/integrity/
 
 INTEL 810/815 FRAMEBUFFER DRIVER
 M:	Antonino Daplas <adaplas@gmail.com>
-- 
GitLab


From e4e071baea41e43aebd7f17ed1ffceeca6aa9868 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Tue, 1 Feb 2022 15:37:11 -0500
Subject: [PATCH 0423/1586] ima: Return error code obtained from securityfs
 functions

If an error occurs when creating a securityfs file, return the exact
error code to the caller.

Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_fs.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 3ad8f7734208b..cd1683dad3bf0 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -452,47 +452,61 @@ static const struct file_operations ima_measure_policy_ops = {
 
 int __init ima_fs_init(void)
 {
+	int ret;
+
 	ima_dir = securityfs_create_dir("ima", integrity_dir);
 	if (IS_ERR(ima_dir))
-		return -1;
+		return PTR_ERR(ima_dir);
 
 	ima_symlink = securityfs_create_symlink("ima", NULL, "integrity/ima",
 						NULL);
-	if (IS_ERR(ima_symlink))
+	if (IS_ERR(ima_symlink)) {
+		ret = PTR_ERR(ima_symlink);
 		goto out;
+	}
 
 	binary_runtime_measurements =
 	    securityfs_create_file("binary_runtime_measurements",
 				   S_IRUSR | S_IRGRP, ima_dir, NULL,
 				   &ima_measurements_ops);
-	if (IS_ERR(binary_runtime_measurements))
+	if (IS_ERR(binary_runtime_measurements)) {
+		ret = PTR_ERR(binary_runtime_measurements);
 		goto out;
+	}
 
 	ascii_runtime_measurements =
 	    securityfs_create_file("ascii_runtime_measurements",
 				   S_IRUSR | S_IRGRP, ima_dir, NULL,
 				   &ima_ascii_measurements_ops);
-	if (IS_ERR(ascii_runtime_measurements))
+	if (IS_ERR(ascii_runtime_measurements)) {
+		ret = PTR_ERR(ascii_runtime_measurements);
 		goto out;
+	}
 
 	runtime_measurements_count =
 	    securityfs_create_file("runtime_measurements_count",
 				   S_IRUSR | S_IRGRP, ima_dir, NULL,
 				   &ima_measurements_count_ops);
-	if (IS_ERR(runtime_measurements_count))
+	if (IS_ERR(runtime_measurements_count)) {
+		ret = PTR_ERR(runtime_measurements_count);
 		goto out;
+	}
 
 	violations =
 	    securityfs_create_file("violations", S_IRUSR | S_IRGRP,
 				   ima_dir, NULL, &ima_htable_violations_ops);
-	if (IS_ERR(violations))
+	if (IS_ERR(violations)) {
+		ret = PTR_ERR(violations);
 		goto out;
+	}
 
 	ima_policy = securityfs_create_file("policy", POLICY_FILE_FLAGS,
 					    ima_dir, NULL,
 					    &ima_measure_policy_ops);
-	if (IS_ERR(ima_policy))
+	if (IS_ERR(ima_policy)) {
+		ret = PTR_ERR(ima_policy);
 		goto out;
+	}
 
 	return 0;
 out:
@@ -503,5 +517,6 @@ out:
 	securityfs_remove(binary_runtime_measurements);
 	securityfs_remove(ima_symlink);
 	securityfs_remove(ima_dir);
-	return -1;
+
+	return ret;
 }
-- 
GitLab


From aae6ccbd826d26730a6fd9bc01884f0a0a9cbb25 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Tue, 28 Dec 2021 09:53:14 -0500
Subject: [PATCH 0424/1586] ima: rename IMA_ACTION_FLAGS to IMA_NONACTION_FLAGS

Simple policy rule options, such as fowner, uid, or euid, can be checked
immediately, while other policy rule options, such as requiring a file
signature, need to be deferred.

The 'flags' field in the integrity_iint_cache struct contains the policy
action', 'subaction', and non action/subaction.

action: measure/measured, appraise/appraised, (collect)/collected,
        audit/audited
subaction: appraise status for each hook (e.g. file, mmap, bprm, read,
        creds)
non action/subaction: deferred policy rule options and state

Rename the IMA_ACTION_FLAGS to IMA_NONACTION_FLAGS.

Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_main.c   | 2 +-
 security/integrity/ima/ima_policy.c | 2 +-
 security/integrity/integrity.h      | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 8ed6da428328b..7c80dfe2c7a54 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -263,7 +263,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
 		/* reset appraisal flags if ima_inode_post_setattr was called */
 		iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED |
 				 IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
-				 IMA_ACTION_FLAGS);
+				 IMA_NONACTION_FLAGS);
 
 	/*
 	 * Re-evaulate the file if either the xattr has changed or the
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 90f528558adc2..a0f3775cbd82b 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -712,7 +712,7 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
 				     func, mask, func_data))
 			continue;
 
-		action |= entry->flags & IMA_ACTION_FLAGS;
+		action |= entry->flags & IMA_NONACTION_FLAGS;
 
 		action |= entry->action & IMA_DO_MASK;
 		if (entry->action & IMA_APPRAISE) {
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 547425c20e117..d045dccd415af 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -30,8 +30,8 @@
 #define IMA_HASH		0x00000100
 #define IMA_HASHED		0x00000200
 
-/* iint cache flags */
-#define IMA_ACTION_FLAGS	0xff000000
+/* iint policy rule cache flags */
+#define IMA_NONACTION_FLAGS	0xff000000
 #define IMA_DIGSIG_REQUIRED	0x01000000
 #define IMA_PERMIT_DIRECTIO	0x02000000
 #define IMA_NEW_FILE		0x04000000
-- 
GitLab


From 8c54135e2e6da677291012813a26a5f1b2c8a90a Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Mon, 24 Jan 2022 14:26:23 -0500
Subject: [PATCH 0425/1586] ima: define ima_max_digest_data struct without a
 flexible array variable

To support larger hash digests in the 'iint' cache, instead of defining
the 'digest' field as the maximum digest size, the 'digest' field was
defined as a flexible array variable.  The "ima_digest_data" struct was
wrapped inside a local structure with the maximum digest size.  But
before adding the record to the iint cache, memory for the exact digest
size was dynamically allocated.

The original reason for defining the 'digest' field as a flexible array
variable is still valid for the 'iint' cache use case.  Instead of
wrapping the 'ima_digest_data' struct in a local structure define
'ima_max_digest_data' struct.

Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_api.c          | 10 ++++------
 security/integrity/ima/ima_init.c         |  5 +----
 security/integrity/ima/ima_main.c         |  5 +----
 security/integrity/ima/ima_template_lib.c |  5 +----
 security/integrity/integrity.h            | 10 ++++++++++
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 5b220a2fe573d..c6805af462118 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -217,14 +217,11 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 	const char *audit_cause = "failed";
 	struct inode *inode = file_inode(file);
 	const char *filename = file->f_path.dentry->d_name.name;
+	struct ima_max_digest_data hash;
 	int result = 0;
 	int length;
 	void *tmpbuf;
 	u64 i_version;
-	struct {
-		struct ima_digest_data hdr;
-		char digest[IMA_MAX_DIGEST_SIZE];
-	} hash;
 
 	/*
 	 * Always collect the modsig, because IMA might have already collected
@@ -239,8 +236,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 
 	/*
 	 * Detecting file change is based on i_version. On filesystems
-	 * which do not support i_version, support is limited to an initial
-	 * measurement/appraisal/audit.
+	 * which do not support i_version, support was originally limited
+	 * to an initial measurement/appraisal/audit, but was modified to
+	 * assume the file changed.
 	 */
 	i_version = inode_query_iversion(inode);
 	hash.hdr.algo = algo;
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index b26fa67476b41..63979aefc95f7 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -47,12 +47,9 @@ static int __init ima_add_boot_aggregate(void)
 	struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
 	struct ima_event_data event_data = { .iint = iint,
 					     .filename = boot_aggregate_name };
+	struct ima_max_digest_data hash;
 	int result = -ENOMEM;
 	int violation = 0;
-	struct {
-		struct ima_digest_data hdr;
-		char digest[TPM_MAX_DIGEST_SIZE];
-	} hash;
 
 	memset(iint, 0, sizeof(*iint));
 	memset(&hash, 0, sizeof(hash));
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 7c80dfe2c7a54..c6412dec3810b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -874,10 +874,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
 					    .buf = buf,
 					    .buf_len = size};
 	struct ima_template_desc *template;
-	struct {
-		struct ima_digest_data hdr;
-		char digest[IMA_MAX_DIGEST_SIZE];
-	} hash = {};
+	struct ima_max_digest_data hash;
 	char digest_hash[IMA_MAX_DIGEST_SIZE];
 	int digest_hash_len = hash_digest_size[ima_hash_algo];
 	int violation = 0;
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 5a5d462ab36db..7155d17a3b75f 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -307,10 +307,7 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
 int ima_eventdigest_init(struct ima_event_data *event_data,
 			 struct ima_field_data *field_data)
 {
-	struct {
-		struct ima_digest_data hdr;
-		char digest[IMA_MAX_DIGEST_SIZE];
-	} hash;
+	struct ima_max_digest_data hash;
 	u8 *cur_digest = NULL;
 	u32 cur_digestsize = 0;
 	struct inode *inode;
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index d045dccd415af..daf49894fd7d0 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/integrity.h>
 #include <crypto/sha1.h>
+#include <crypto/hash.h>
 #include <linux/key.h>
 #include <linux/audit.h>
 
@@ -110,6 +111,15 @@ struct ima_digest_data {
 	u8 digest[];
 } __packed;
 
+/*
+ * Instead of wrapping the ima_digest_data struct inside a local structure
+ * with the maximum hash size, define ima_max_digest_data struct.
+ */
+struct ima_max_digest_data {
+	struct ima_digest_data hdr;
+	u8 digest[HASH_MAX_DIGESTSIZE];
+} __packed;
+
 /*
  * signature format v2 - for using with asymmetric keys
  */
-- 
GitLab


From d53f8f8dbe97e4ed7d52e57581d1a8f6e62a7643 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:35 +0000
Subject: [PATCH 0426/1586] kselftest/arm64: mte: user_mem: introduce
 tag_offset and tag_len

These can be used to place an MTE tag at an address that is not at a
page size boundary.

The kernel prior to 295cf156231c ("arm64: Avoid premature usercopy failure"),
would infinite loop if an MTE tag was placed not at a PAGE_SIZE boundary.
This is because the kernel checked if the pages were readable by checking the
first byte of each page, but would then fault in the middle of the page due
to the MTE tag.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-2-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 1de7a0abd0ae3..5a5a7e1f57890 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -19,7 +19,8 @@
 
 static size_t page_sz;
 
-static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+                                      int tag_offset, int tag_len)
 {
 	int fd, i, err;
 	char val = 'A';
@@ -54,10 +55,12 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	if (i < len)
 		goto usermem_acc_err;
 
-	/* Tag the next half of memory with different value */
-	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	if (!tag_len)
+		tag_len = len - tag_offset;
+	/* Tag a part of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + tag_offset);
 	ptr_next = mte_insert_new_tag(ptr_next);
-	mte_set_tag_address_range(ptr_next, page_sz);
+	mte_set_tag_address_range(ptr_next, tag_len);
 
 	lseek(fd, 0, 0);
 	/* Copy from file into buffer with invalid tag */
@@ -100,14 +103,14 @@ int main(int argc, char *argv[])
 	/* Set test plan */
 	ksft_set_plan(4);
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0),
 		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0),
 		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0),
 		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0),
 		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
 
 	mte_restore_setup();
-- 
GitLab


From ff0b9aba30aeca68de09b784093f4482108586a9 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:36 +0000
Subject: [PATCH 0427/1586] kselftest/arm64: mte: user_mem: rework error
 handling

Future commits will have multiple iterations of tests in this function,
so make the error handling assume it will pass and then bail out if there
is an error.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-3-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 5a5a7e1f57890..2afcc9fb9ae82 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -27,7 +27,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	size_t len, read_len;
 	void *ptr, *ptr_next;
 
-	err = KSFT_FAIL;
+	err = KSFT_PASS;
 	len = 2 * page_sz;
 	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
 	fd = create_temp_file();
@@ -71,14 +71,22 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	 * mode without fault but may not fail in async mode as per the
 	 * implemented MTE userspace support in Arm64 kernel.
 	 */
-	if (mode == MTE_SYNC_ERR &&
-	    !cur_mte_cxt.fault_valid && read_len < len) {
-		err = KSFT_PASS;
-	} else if (mode == MTE_ASYNC_ERR &&
-		   !cur_mte_cxt.fault_valid && read_len == len) {
-		err = KSFT_PASS;
+	if (cur_mte_cxt.fault_valid)
+		goto usermem_acc_err;
+
+	if (mode == MTE_SYNC_ERR && read_len < len) {
+		/* test passed */
+	} else if (mode == MTE_ASYNC_ERR && read_len == len) {
+		/* test passed */
+	} else {
+		goto usermem_acc_err;
 	}
+
+	goto exit;
+
 usermem_acc_err:
+	err = KSFT_FAIL;
+exit:
 	mte_free_memory((void *)ptr, len, mem_type, true);
 	close(fd);
 	return err;
-- 
GitLab


From 682b064bae871deb213ed2e97fe4a5d4a5132e37 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:37 +0000
Subject: [PATCH 0428/1586] kselftest/arm64: mte: user_mem: check different
 offsets and sizes

To check there are no assumptions in the kernel about buffer sizes or alignments of
user space pointers, expand the test to cover different sizes and offsets.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-4-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 45 +++++++++++--------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 2afcc9fb9ae82..89c861ee68fa1 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -26,6 +26,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	char val = 'A';
 	size_t len, read_len;
 	void *ptr, *ptr_next;
+	int fileoff, ptroff, size;
+	int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
 
 	err = KSFT_PASS;
 	len = 2 * page_sz;
@@ -62,24 +64,31 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	ptr_next = mte_insert_new_tag(ptr_next);
 	mte_set_tag_address_range(ptr_next, tag_len);
 
-	lseek(fd, 0, 0);
-	/* Copy from file into buffer with invalid tag */
-	read_len = read(fd, ptr, len);
-	mte_wait_after_trig();
-	/*
-	 * Accessing user memory in kernel with invalid tag should fail in sync
-	 * mode without fault but may not fail in async mode as per the
-	 * implemented MTE userspace support in Arm64 kernel.
-	 */
-	if (cur_mte_cxt.fault_valid)
-		goto usermem_acc_err;
-
-	if (mode == MTE_SYNC_ERR && read_len < len) {
-		/* test passed */
-	} else if (mode == MTE_ASYNC_ERR && read_len == len) {
-		/* test passed */
-	} else {
-		goto usermem_acc_err;
+	for (fileoff = 0; fileoff < 16; fileoff++) {
+		for (ptroff = 0; ptroff < 16; ptroff++) {
+			for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+				size = sizes[i];
+				lseek(fd, 0, 0);
+				/* Copy from file into buffer with invalid tag */
+				read_len = read(fd, ptr + ptroff, size);
+				mte_wait_after_trig();
+				/*
+				 * Accessing user memory in kernel with invalid tag should fail in sync
+				 * mode without fault but may not fail in async mode as per the
+				 * implemented MTE userspace support in Arm64 kernel.
+				 */
+				if (cur_mte_cxt.fault_valid) {
+					goto usermem_acc_err;
+				}
+				if (mode == MTE_SYNC_ERR && read_len < len) {
+					/* test passed */
+				} else if (mode == MTE_ASYNC_ERR && read_len == size) {
+					/* test passed */
+				} else {
+					goto usermem_acc_err;
+				}
+			}
+		}
 	}
 
 	goto exit;
-- 
GitLab


From b9fc700176f1cc3d9aef7dd51423150cc1567a9a Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:38 +0000
Subject: [PATCH 0429/1586] kselftest/arm64: mte: user_mem: add test type enum

The test is currently hardcoded to use the `read` syscall, this commit adds
a test_type enum to support expanding the test coverage to other syscalls.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-5-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 38 +++++++++++++------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 89c861ee68fa1..58b1b272ca80f 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -19,12 +19,18 @@
 
 static size_t page_sz;
 
+enum test_type {
+	READ_TEST,
+	LAST_TEST,
+};
+
 static int check_usermem_access_fault(int mem_type, int mode, int mapping,
-                                      int tag_offset, int tag_len)
+                                      int tag_offset, int tag_len,
+                                      enum test_type test_type)
 {
 	int fd, i, err;
 	char val = 'A';
-	size_t len, read_len;
+	ssize_t len, syscall_len;
 	void *ptr, *ptr_next;
 	int fileoff, ptroff, size;
 	int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
@@ -46,9 +52,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	}
 	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
 	/* Copy from file into buffer with valid tag */
-	read_len = read(fd, ptr, len);
+	syscall_len = read(fd, ptr, len);
 	mte_wait_after_trig();
-	if (cur_mte_cxt.fault_valid || read_len < len)
+	if (cur_mte_cxt.fault_valid || syscall_len < len)
 		goto usermem_acc_err;
 	/* Verify same pattern is read */
 	for (i = 0; i < len; i++)
@@ -69,8 +75,16 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 			for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 				size = sizes[i];
 				lseek(fd, 0, 0);
-				/* Copy from file into buffer with invalid tag */
-				read_len = read(fd, ptr + ptroff, size);
+
+				/* perform file operation on buffer with invalid tag */
+				switch (test_type) {
+				case READ_TEST:
+					syscall_len = read(fd, ptr + ptroff, size);
+					break;
+				case LAST_TEST:
+					goto usermem_acc_err;
+				}
+
 				mte_wait_after_trig();
 				/*
 				 * Accessing user memory in kernel with invalid tag should fail in sync
@@ -80,9 +94,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 				if (cur_mte_cxt.fault_valid) {
 					goto usermem_acc_err;
 				}
-				if (mode == MTE_SYNC_ERR && read_len < len) {
+				if (mode == MTE_SYNC_ERR && syscall_len < len) {
 					/* test passed */
-				} else if (mode == MTE_ASYNC_ERR && read_len == size) {
+				} else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
 					/* test passed */
 				} else {
 					goto usermem_acc_err;
@@ -120,14 +134,14 @@ int main(int argc, char *argv[])
 	/* Set test plan */
 	ksft_set_plan(4);
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
 
 	mte_restore_setup();
-- 
GitLab


From e8d3974f34fa8ac38915c307677657b4d6acc619 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:39 +0000
Subject: [PATCH 0430/1586] kselftest/arm64: mte: user_mem: add more test types

To expand the test coverage for MTE tags in userspace memory,
also perform the test with `write`, `readv` and `writev` syscalls.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-6-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 58b1b272ca80f..bb4974c437f8a 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <ucontext.h>
 #include <unistd.h>
+#include <sys/uio.h>
 #include <sys/mman.h>
 
 #include "kselftest.h"
@@ -21,6 +22,9 @@ static size_t page_sz;
 
 enum test_type {
 	READ_TEST,
+	WRITE_TEST,
+	READV_TEST,
+	WRITEV_TEST,
 	LAST_TEST,
 };
 
@@ -81,6 +85,23 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 				case READ_TEST:
 					syscall_len = read(fd, ptr + ptroff, size);
 					break;
+				case WRITE_TEST:
+					syscall_len = write(fd, ptr + ptroff, size);
+					break;
+				case READV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = readv(fd, iov, 1);
+					break;
+				}
+				case WRITEV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = writev(fd, iov, 1);
+					break;
+				}
 				case LAST_TEST:
 					goto usermem_acc_err;
 				}
-- 
GitLab


From 0a775ccb81207413d07214ac6eaed75d0e4376b1 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:40 +0000
Subject: [PATCH 0431/1586] kselftest/arm64: mte: user_mem: test a wider range
 of values

Instead of hard coding a small amount of tests, generate a wider
range of tests to try catch any corner cases that could show up.

These new tests test different MTE tag lengths and offsets, which
previously would have caused infinite loops in the kernel. This was
fixed by 295cf156231c ("arm64: Avoid premature usercopy failure"),
so these are regressions tests for that corner case.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-7-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/mte/check_user_mem.c      | 94 ++++++++++++++++---
 1 file changed, 83 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index bb4974c437f8a..f4ae5f87a3b77 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -3,6 +3,7 @@
 
 #define _GNU_SOURCE
 
+#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
@@ -20,6 +21,8 @@
 
 static size_t page_sz;
 
+#define TEST_NAME_MAX 100
+
 enum test_type {
 	READ_TEST,
 	WRITE_TEST,
@@ -136,9 +139,67 @@ exit:
 	return err;
 }
 
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+	const char* test_type;
+	const char* mte_type;
+	const char* map_type;
+
+	switch (type) {
+	case READ_TEST:
+		test_type = "read";
+		break;
+	case WRITE_TEST:
+		test_type = "write";
+		break;
+	case READV_TEST:
+		test_type = "readv";
+		break;
+	case WRITEV_TEST:
+		test_type = "writev";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (sync) {
+	case MTE_SYNC_ERR:
+		mte_type = "MTE_SYNC_ERR";
+		break;
+	case MTE_ASYNC_ERR:
+		mte_type = "MTE_ASYNC_ERR";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (map) {
+	case MAP_SHARED:
+		map_type = "MAP_SHARED";
+		break;
+	case MAP_PRIVATE:
+		map_type = "MAP_PRIVATE";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	snprintf(name, name_len,
+	         "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+	         test_type, mte_type, map_type, len, offset);
+}
+
 int main(int argc, char *argv[])
 {
 	int err;
+	int t, s, m, l, o;
+	int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+	int maps[] = {MAP_SHARED, MAP_PRIVATE};
+	int tag_lens[] = {0, MT_GRANULE_SIZE};
+	int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+	char test_name[TEST_NAME_MAX];
 
 	page_sz = getpagesize();
 	if (!page_sz) {
@@ -153,17 +214,28 @@ int main(int argc, char *argv[])
 	mte_register_signal(SIGSEGV, mte_default_handler);
 
 	/* Set test plan */
-	ksft_set_plan(4);
-
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
-
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+	ksft_set_plan(64);
+
+	for (t = 0; t < LAST_TEST; t++) {
+		for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+			for (m = 0; m < ARRAY_SIZE(maps); m++) {
+				for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+					for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+						int sync = mte_sync[s];
+						int map = maps[m];
+						int offset = tag_offsets[o];
+						int tag_len = tag_lens[l];
+						int res = check_usermem_access_fault(USE_MMAP, sync,
+						                                     map, offset,
+						                                     tag_len, t);
+						format_test_name(test_name, TEST_NAME_MAX,
+						                 t, sync, map, tag_len, offset);
+						evaluate_test(res, test_name);
+					}
+				}
+			}
+		}
+	}
 
 	mte_restore_setup();
 	ksft_print_cnts();
-- 
GitLab


From 16860a209cf1ad20a3b454b1c56d64c9ea9532ac Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 4 Feb 2022 10:44:39 +0000
Subject: [PATCH 0432/1586] arm64: atomics: remove redundant static branch

Due to a historical oversight, we emit a redundant static branch for
each atomic/atomic64 operation when CONFIG_ARM64_LSE_ATOMICS is
selected. We can safely remove this, making the kernel Image reasonably
smaller.

When CONFIG_ARM64_LSE_ATOMICS is selected, every LSE atomic operation
has two preceding static branches with the same target, e.g.

	b	f7c <kernel_init_freeable+0xa4>
	b	f7c <kernel_init_freeable+0xa4>
	mov	w0, #0x1                   	// #1
	ldadd	w0, w0, [x19]

This is because the __lse_ll_sc_body() wrapper uses
system_uses_lse_atomics(), which checks both `arm64_const_caps_ready`
and `cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]`, each of which emits a
static branch. This has been the case since commit:

  addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")

However, there was never a need to check `arm64_const_caps_ready`, which
was itself introduced in commit:

  63a1e1c95e60e798 ("arm64/cpufeature: don't use mutex in bringup path")

... so that cpus_have_const_cap() could fall back to checking the
`cpu_hwcaps` bitmap prior to the static keys for individual caps
becoming enabled. As system_uses_lse_atomics() doesn't check
`cpu_hwcaps`, and doesn't need to as we can safely use the LL/SC atomics
prior to enabling the `ARM64_HAS_LSE_ATOMICS` static key, it doesn't
need to check `arm64_const_caps_ready`.

This patch removes the `arm64_const_caps_ready` check from
system_uses_lse_atomics(). As the arch_atomic_* routines are meant to be
safely usable in noinstr code, I've also marked
system_uses_lse_atomics() as __always_inline.

This results in one fewer static branch per atomic operation, with the
prior example becoming:

	b	f78 <kernel_init_freeable+0xa0>
	mov	w0, #0x1                   	// #1
	ldadd	w0, w0, [x19]

Each static branch consists of the branch itself and an associated
__jump_table entry. Removing these has a reasonable impact on the Image
size, with a GCC 11.1.0 defconfig v5.17-rc2 Image being reduced by
128KiB:

| [mark@lakrids:~/src/linux]% ls -al Image*
| -rw-r--r-- 1 mark mark 34619904 Feb  3 18:24 Image.baseline
| -rw-r--r-- 1 mark mark 34488832 Feb  3 18:33 Image.onebranch

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220204104439.270567-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/lse.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 5d10051c3e62e..29c85810ae690 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -17,12 +17,10 @@
 #include <asm/cpucaps.h>
 
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
 
-static inline bool system_uses_lse_atomics(void)
+static __always_inline bool system_uses_lse_atomics(void)
 {
-	return (static_branch_likely(&arm64_const_caps_ready)) &&
-		static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
+	return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
 }
 
 #define __lse_ll_sc_body(op, ...)					\
-- 
GitLab


From 5e50f5d4ff31e95599d695df1f0a4e7d2d6fef99 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Sat, 12 Feb 2022 18:59:21 +0100
Subject: [PATCH 0433/1586] security: add sctp_assoc_established hook

security_sctp_assoc_established() is added to replace
security_inet_conn_established() called in
sctp_sf_do_5_1E_ca(), so that asoc can be accessed in security
subsystem and save the peer secid to asoc->peer_secid.

Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 Documentation/security/SCTP.rst | 22 ++++++++++------------
 include/linux/lsm_hook_defs.h   |  2 ++
 include/linux/lsm_hooks.h       |  5 +++++
 include/linux/security.h        |  8 ++++++++
 net/sctp/sm_statefuns.c         |  8 +++++---
 security/security.c             |  7 +++++++
 6 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index d5fd6ccc3dcbd..406cc68b88087 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-
-Also the following security hook has been utilised::
-
-    security_inet_conn_established()
+    security_sctp_assoc_established()
 
 The usage of these hooks are described below with the SELinux implementation
 described in the `SCTP SELinux Support`_ chapter.
@@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3).
     @newsk - pointer to new sock structure.
 
 
-security_inet_conn_established()
+security_sctp_assoc_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received::
+Called when a COOKIE ACK is received, and the peer secid will be
+saved into ``@asoc->peer_secid`` for client::
 
-    @sk  - pointer to sock structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
@@ -134,7 +132,7 @@ Security Hooks used for Association Establishment
 -------------------------------------------------
 
 The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
+``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
 establishing an association.
 ::
 
@@ -172,7 +170,7 @@ establishing an association.
           <------------------------------------------- COOKIE ACK
           |                                               |
     sctp_sf_do_5_1E_ca                                    |
- Call security_inet_conn_established()                    |
+ Call security_sctp_assoc_established()                   |
  to set the peer label.                                   |
           |                                               |
           |                               If SCTP_SOCKET_TCP or peeled off
@@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_inet_conn_established()
+    security_sctp_assoc_established()
 
 
 security_sctp_assoc_request()
@@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
     @newsk - pointer to new sock structure.
 
 
-security_inet_conn_established()
+security_sctp_assoc_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-    @sk  - pointer to sock structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index a5a724c308d8d..45931d81ccc3f 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
+LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc,
+	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 3bf5c658bc448..419b5febc3ca5 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1046,6 +1046,11 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
+ * @sctp_assoc_established:
+ *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
+ *	to the security module.
+ *	@asoc pointer to sctp association structure.
+ *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index 6d72772182c82..25b3ef71f495e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
+int security_sctp_assoc_established(struct sctp_association *asoc,
+				    struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *newsk)
 {
 }
+
+static inline int security_sctp_assoc_established(struct sctp_association *asoc,
+						  struct sk_buff *skb)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index cc544a97c4afd..7f342bc127358 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
+	/* Set peer label for connection. */
+	if (security_sctp_assoc_established((struct sctp_association *)asoc,
+					    chunk->skb))
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
 	/* Verify that the chunk length for the COOKIE-ACK is OK.
 	 * If we don't do this, any bundled chunks may be junked.
 	 */
@@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
 
-	/* Set peer label for connection. */
-	security_inet_conn_established(ep->base.sk, chunk->skb);
-
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
 	 * E) Upon reception of the COOKIE ACK, endpoint "A" will move
diff --git a/security/security.c b/security/security.c
index e649c8691be27..9663ffcca4b01 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2393,6 +2393,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
+int security_sctp_assoc_established(struct sctp_association *asoc,
+				    struct sk_buff *skb)
+{
+	return call_int_hook(sctp_assoc_established, 0, asoc, skb);
+}
+EXPORT_SYMBOL(security_sctp_assoc_established);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
-- 
GitLab


From 3eb8eaf2ca3e98d4f6e52bed6148ee8fe3069a3d Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Sat, 12 Feb 2022 18:59:22 +0100
Subject: [PATCH 0434/1586] security: implement sctp_assoc_established hook in
 selinux

Do this by extracting the peer labeling per-association logic from
selinux_sctp_assoc_request() into a new helper
selinux_sctp_process_new_assoc() and use this helper in both
selinux_sctp_assoc_request() and selinux_sctp_assoc_established(). This
ensures that the peer labeling behavior as documented in
Documentation/security/SCTP.rst is applied both on the client and server
side:
"""
An SCTP socket will only have one peer label assigned to it. This will be
assigned during the establishment of the first association. Any further
associations on this socket will have their packet peer label compared to
the sockets peer label, and only if they are different will the
``association`` permission be validated. This is validated by checking the
socket peer sid against the received packets peer sid to determine whether
the association should be allowed or denied.
"""

At the same time, it also ensures that the peer label of the association
is set to the correct value, such that if it is peeled off into a new
socket, the socket's peer label  will then be set to the association's
peer label, same as it already works on the server side.

While selinux_inet_conn_established() (which we are replacing by
selinux_sctp_assoc_established() for SCTP) only deals with assigning a
peer label to the connection (socket), in case of SCTP we need to also
copy the (local) socket label to the association, so that
selinux_sctp_sk_clone() can then pick it up for the new socket in case
of SCTP peeloff.

Careful readers will notice that the selinux_sctp_process_new_assoc()
helper also includes the "IPv4 packet received over an IPv6 socket"
check, even though it hadn't been in selinux_sctp_assoc_request()
before. While such check is not necessary in
selinux_inet_conn_request() (because struct request_sock's family field
is already set according to the skb's family), here it is needed, as we
don't have request_sock and we take the initial family from the socket.
In selinux_sctp_assoc_established() it is similarly needed as well (and
also selinux_inet_conn_established() already has it).

Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Based-on-patch-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 90 +++++++++++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 24 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ab32303e66181..dafabb4dcc640 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5238,37 +5238,38 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
 	sksec->sclass = isec->sclass;
 }
 
-/* Called whenever SCTP receives an INIT chunk. This happens when an incoming
- * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association
- * already present).
+/*
+ * Determines peer_secid for the asoc and updates socket's peer label
+ * if it's the first association on the socket.
  */
-static int selinux_sctp_assoc_request(struct sctp_association *asoc,
-				      struct sk_buff *skb)
+static int selinux_sctp_process_new_assoc(struct sctp_association *asoc,
+					  struct sk_buff *skb)
 {
-	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+	struct sock *sk = asoc->base.sk;
+	u16 family = sk->sk_family;
+	struct sk_security_struct *sksec = sk->sk_security;
 	struct common_audit_data ad;
 	struct lsm_network_audit net = {0,};
-	u8 peerlbl_active;
-	u32 peer_sid = SECINITSID_UNLABELED;
-	u32 conn_sid;
-	int err = 0;
+	int err;
 
-	if (!selinux_policycap_extsockclass())
-		return 0;
+	/* handle mapped IPv4 packets arriving via IPv6 sockets */
+	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
+		family = PF_INET;
 
-	peerlbl_active = selinux_peerlbl_enabled();
+	if (selinux_peerlbl_enabled()) {
+		asoc->peer_secid = SECSID_NULL;
 
-	if (peerlbl_active) {
 		/* This will return peer_sid = SECSID_NULL if there are
 		 * no peer labels, see security_net_peersid_resolve().
 		 */
-		err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family,
-					      &peer_sid);
+		err = selinux_skb_peerlbl_sid(skb, family, &asoc->peer_secid);
 		if (err)
 			return err;
 
-		if (peer_sid == SECSID_NULL)
-			peer_sid = SECINITSID_UNLABELED;
+		if (asoc->peer_secid == SECSID_NULL)
+			asoc->peer_secid = SECINITSID_UNLABELED;
+	} else {
+		asoc->peer_secid = SECINITSID_UNLABELED;
 	}
 
 	if (sksec->sctp_assoc_state == SCTP_ASSOC_UNSET) {
@@ -5279,8 +5280,8 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
 		 * then it is approved by policy and used as the primary
 		 * peer SID for getpeercon(3).
 		 */
-		sksec->peer_sid = peer_sid;
-	} else if  (sksec->peer_sid != peer_sid) {
+		sksec->peer_sid = asoc->peer_secid;
+	} else if (sksec->peer_sid != asoc->peer_secid) {
 		/* Other association peer SIDs are checked to enforce
 		 * consistency among the peer SIDs.
 		 */
@@ -5288,11 +5289,32 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
 		ad.u.net = &net;
 		ad.u.net->sk = asoc->base.sk;
 		err = avc_has_perm(&selinux_state,
-				   sksec->peer_sid, peer_sid, sksec->sclass,
-				   SCTP_SOCKET__ASSOCIATION, &ad);
+				   sksec->peer_sid, asoc->peer_secid,
+				   sksec->sclass, SCTP_SOCKET__ASSOCIATION,
+				   &ad);
 		if (err)
 			return err;
 	}
+	return 0;
+}
+
+/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This
+ * happens on an incoming connect(2), sctp_connectx(3) or
+ * sctp_sendmsg(3) (with no association already present).
+ */
+static int selinux_sctp_assoc_request(struct sctp_association *asoc,
+				      struct sk_buff *skb)
+{
+	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+	u32 conn_sid;
+	int err;
+
+	if (!selinux_policycap_extsockclass())
+		return 0;
+
+	err = selinux_sctp_process_new_assoc(asoc, skb);
+	if (err)
+		return err;
 
 	/* Compute the MLS component for the connection and store
 	 * the information in asoc. This will be used by SCTP TCP type
@@ -5300,17 +5322,36 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc,
 	 * socket to be generated. selinux_sctp_sk_clone() will then
 	 * plug this into the new socket.
 	 */
-	err = selinux_conn_sid(sksec->sid, peer_sid, &conn_sid);
+	err = selinux_conn_sid(sksec->sid, asoc->peer_secid, &conn_sid);
 	if (err)
 		return err;
 
 	asoc->secid = conn_sid;
-	asoc->peer_secid = peer_sid;
 
 	/* Set any NetLabel labels including CIPSO/CALIPSO options. */
 	return selinux_netlbl_sctp_assoc_request(asoc, skb);
 }
 
+/* Called when SCTP receives a COOKIE ACK chunk as the final
+ * response to an association request (initited by us).
+ */
+static int selinux_sctp_assoc_established(struct sctp_association *asoc,
+					  struct sk_buff *skb)
+{
+	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+
+	if (!selinux_policycap_extsockclass())
+		return 0;
+
+	/* Inherit secid from the parent socket - this will be picked up
+	 * by selinux_sctp_sk_clone() if the association gets peeled off
+	 * into a new socket.
+	 */
+	asoc->secid = sksec->sid;
+
+	return selinux_sctp_process_new_assoc(asoc, skb);
+}
+
 /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting
  * based on their @optname.
  */
@@ -7131,6 +7172,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request),
 	LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone),
 	LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect),
+	LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established),
 	LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request),
 	LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone),
 	LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established),
-- 
GitLab


From f122d103b564e5fb7c82de902c6f8f6cbdf50ec9 Mon Sep 17 00:00:00 2001
From: Chengming Zhou <zhouchengming@bytedance.com>
Date: Sun, 13 Feb 2022 16:59:02 +0800
Subject: [PATCH 0435/1586] blk-cgroup: set blkg iostat after percpu stat
 aggregation

Don't need to do blkg_iostat_set for top blkg iostat on each CPU,
so move it after percpu stat aggregation.

Fixes: ef45fe470e1e ("blk-cgroup: show global disk stats in root cgroup io.stat")
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20220213085902.88884-1-zhouchengming@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4108d445c73af..fa063c6c0338e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -856,11 +856,11 @@ static void blkcg_fill_root_iostats(void)
 			blk_queue_root_blkg(bdev_get_queue(bdev));
 		struct blkg_iostat tmp;
 		int cpu;
+		unsigned long flags;
 
 		memset(&tmp, 0, sizeof(tmp));
 		for_each_possible_cpu(cpu) {
 			struct disk_stats *cpu_dkstats;
-			unsigned long flags;
 
 			cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
 			tmp.ios[BLKG_IOSTAT_READ] +=
@@ -876,11 +876,11 @@ static void blkcg_fill_root_iostats(void)
 				cpu_dkstats->sectors[STAT_WRITE] << 9;
 			tmp.bytes[BLKG_IOSTAT_DISCARD] +=
 				cpu_dkstats->sectors[STAT_DISCARD] << 9;
-
-			flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
-			blkg_iostat_set(&blkg->iostat.cur, &tmp);
-			u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
 		}
+
+		flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+		blkg_iostat_set(&blkg->iostat.cur, &tmp);
+		u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
 	}
 }
 
-- 
GitLab


From b62a8486de3ab1d7c2353ec422b9cca3abfcfbcd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:52 +0000
Subject: [PATCH 0436/1586] elfcore: Replace CONFIG_{IA64, UML} checks with a
 new option

As arm64 is about to introduce MTE-specific phdrs in the core dump, add
a common CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS option currently selectable
by UML_X86 and IA64.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Link: https://lore.kernel.org/r/20220131165456.2160675-2-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/ia64/Kconfig       | 1 +
 arch/x86/um/Kconfig     | 1 +
 fs/Kconfig.binfmt       | 3 +++
 include/linux/elfcore.h | 4 ++--
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a7e01573abd83..e003b2473c64d 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,6 +8,7 @@ menu "Processor type and features"
 
 config IA64
 	bool
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 40d6a06e41c81..ead7e5b3a9757 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -8,6 +8,7 @@ endmenu
 
 config UML_X86
 	def_bool y
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32
 
 config 64BIT
 	bool "64-bit kernel" if "$(SUBARCH)" = "x86"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4d5ae61580aae..68e5862837649 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF
 config ARCH_BINFMT_ELF_STATE
 	bool
 
+config ARCH_BINFMT_ELF_EXTRA_PHDRS
+	bool
+
 config ARCH_HAVE_ELF_PROT
 	bool
 
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 746e081879a5a..f8e206e82476c 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
 #endif
 }
 
-#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
+#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS
 /*
  * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the gate DSO contents.  Dumping its
@@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void)
 {
 	return 0;
 }
-#endif
+#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */
 
 #endif /* _LINUX_ELFCORE_H */
-- 
GitLab


From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:53 +0000
Subject: [PATCH 0437/1586] elf: Introduce the ARM MTE ELF segment type

Memory tags will be dumped in the core file as segments with their own
type. Discussions with the binutils and the generic ABI community
settled on using new definitions in the PT_*PROC space (and to be
documented in the processor-specific ABIs).

Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this
patch since there is no upstream support but the CHERI/BSD community
will also reserve:

  #define PT_ARM_MEMTAG_CHERI    (PT_LOPROC + 0x2)
  #define PT_RISCV_MEMTAG_CHERI  (PT_LOPROC + 0x3)

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Luis Machado <luis.machado@linaro.org>
Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/elf.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2a..fe8e5b74cb39e 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -40,6 +40,9 @@ typedef __s64	Elf64_Sxword;
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
+/* ARM MTE memory tag segment type */
+#define PT_ARM_MEMTAG_MTE	(PT_LOPROC + 0x1)
+
 /*
  * Extended Numbering
  *
-- 
GitLab


From ab1e435ca7913e384ed801210418633eee43a71b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:54 +0000
Subject: [PATCH 0438/1586] arm64: mte: Define the number of bytes for storing
 the tags in a page

Rather than explicitly calculating the number of bytes for a compact tag
storage format corresponding to a page, just add a MTE_PAGE_TAG_STORAGE
macro. With the current MTE implementation of 4 bits per tag, we store
2 tags in a byte.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Luis Machado <luis.machado@linaro.org>
Link: https://lore.kernel.org/r/20220131165456.2160675-4-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte-def.h | 1 +
 arch/arm64/lib/mte.S             | 4 ++--
 arch/arm64/mm/mteswap.c          | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index 626d359b396e5..14ee86b019c2e 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -11,6 +11,7 @@
 #define MTE_TAG_SHIFT		56
 #define MTE_TAG_SIZE		4
 #define MTE_TAG_MASK		GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define MTE_PAGE_TAG_STORAGE	(MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8)
 
 #define __MTE_PREAMBLE		ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index f531dcb95174a..8590af3c98c0b 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user)
 /*
  * Save the tags in a page
  *   x0 - page address
- *   x1 - tag storage
+ *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
 SYM_FUNC_START(mte_save_page_tags)
 	multitag_transfer_size x7, x5
@@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags)
 /*
  * Restore the tags in a page
  *   x0 - page address
- *   x1 - tag storage
+ *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
 SYM_FUNC_START(mte_restore_page_tags)
 	multitag_transfer_size x7, x5
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index 7c4ef56265ee1..a9e50e930484a 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages);
 void *mte_allocate_tag_storage(void)
 {
 	/* tags granule is 16 bytes, 2 tags stored per byte */
-	return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL);
+	return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
 }
 
 void mte_free_tag_storage(char *storage)
-- 
GitLab


From 6dd8b1a0b6cb3ed93d24110e02e67ff9d006610a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:55 +0000
Subject: [PATCH 0439/1586] arm64: mte: Dump the MTE tags in the core file

For each vma mapped with PROT_MTE (the VM_MTE flag set), generate a
PT_ARM_MEMTAG_MTE segment in the core file and dump the corresponding
tags. The in-file size for such segments is 128 bytes per page.

For pages in a VM_MTE vma which are not present in the user page tables
or don't have the PG_mte_tagged flag set (e.g. execute-only), just write
zeros in the core file.

An example of program headers for two vmas, one 2-page, the other 4-page
long:

  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
  ...
  LOAD           0x030000 0x0000ffff80034000 0x0000000000000000 0x000000 0x002000 RW  0x1000
  LOAD           0x030000 0x0000ffff80036000 0x0000000000000000 0x004000 0x004000 RW  0x1000
  ...
  LOPROC+0x1     0x05b000 0x0000ffff80034000 0x0000000000000000 0x000100 0x002000     0
  LOPROC+0x1     0x05b100 0x0000ffff80036000 0x0000000000000000 0x000200 0x004000     0

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Luis Machado <luis.machado@linaro.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220131165456.2160675-5-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig          |   1 +
 arch/arm64/kernel/Makefile  |   1 +
 arch/arm64/kernel/elfcore.c | 123 ++++++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 arch/arm64/kernel/elfcore.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cbcd42decb2ad..b55c11796fad2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
 	select ARCH_HAS_DEBUG_WX
+	select ARCH_BINFMT_ELF_EXTRA_PHDRS
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88b3e2a214084..986837d7ec82d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
 obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
+obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
 obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
new file mode 100644
index 0000000000000..3455ee4acc04c
--- /dev/null
+++ b/arch/arm64/kernel/elfcore.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/coredump.h>
+#include <linux/elfcore.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+
+#define for_each_mte_vma(tsk, vma)					\
+	if (system_supports_mte())					\
+		for (vma = tsk->mm->mmap; vma; vma = vma->vm_next)	\
+			if (vma->vm_flags & VM_MTE)
+
+static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_DONTDUMP)
+		return 0;
+
+	return vma_pages(vma) * MTE_PAGE_TAG_STORAGE;
+}
+
+/* Derived from dump_user_range(); start/end must be page-aligned */
+static int mte_dump_tag_range(struct coredump_params *cprm,
+			      unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = start; addr < end; addr += PAGE_SIZE) {
+		char tags[MTE_PAGE_TAG_STORAGE];
+		struct page *page = get_dump_page(addr);
+
+		/*
+		 * get_dump_page() returns NULL when encountering an empty
+		 * page table entry that would otherwise have been filled with
+		 * the zero page. Skip the equivalent tag dump which would
+		 * have been all zeros.
+		 */
+		if (!page) {
+			dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+			continue;
+		}
+
+		/*
+		 * Pages mapped in user space as !pte_access_permitted() (e.g.
+		 * PROT_EXEC only) may not have the PG_mte_tagged flag set.
+		 */
+		if (!test_bit(PG_mte_tagged, &page->flags)) {
+			put_page(page);
+			dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+			continue;
+		}
+
+		mte_save_page_tags(page_address(page), tags);
+		put_page(page);
+		if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE))
+			return 0;
+	}
+
+	return 1;
+}
+
+Elf_Half elf_core_extra_phdrs(void)
+{
+	struct vm_area_struct *vma;
+	int vma_count = 0;
+
+	for_each_mte_vma(current, vma)
+		vma_count++;
+
+	return vma_count;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+	struct vm_area_struct *vma;
+
+	for_each_mte_vma(current, vma) {
+		struct elf_phdr phdr;
+
+		phdr.p_type = PT_ARM_MEMTAG_MTE;
+		phdr.p_offset = offset;
+		phdr.p_vaddr = vma->vm_start;
+		phdr.p_paddr = 0;
+		phdr.p_filesz = mte_vma_tag_dump_size(vma);
+		phdr.p_memsz = vma->vm_end - vma->vm_start;
+		offset += phdr.p_filesz;
+		phdr.p_flags = 0;
+		phdr.p_align = 0;
+
+		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+			return 0;
+	}
+
+	return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+	struct vm_area_struct *vma;
+	size_t data_size = 0;
+
+	for_each_mte_vma(current, vma)
+		data_size += mte_vma_tag_dump_size(vma);
+
+	return data_size;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+	struct vm_area_struct *vma;
+
+	for_each_mte_vma(current, vma) {
+		if (vma->vm_flags & VM_DONTDUMP)
+			continue;
+
+		if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end))
+			return 0;
+	}
+
+	return 1;
+}
-- 
GitLab


From 731451ab3c0c6fe88142dbc73a74c71bd92a5cff Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 31 Jan 2022 16:54:56 +0000
Subject: [PATCH 0440/1586] arm64: mte: Document the core dump file format

Add the program header definition and data layout for the
PT_ARM_MEMTAG_MTE segments.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Luis Machado <luis.machado@linaro.org>
Link: https://lore.kernel.org/r/20220131165456.2160675-6-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../arm64/memory-tagging-extension.rst        | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 7b99c8f428eb6..5a70d7a3ca129 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -213,6 +213,29 @@ address ABI control and MTE configuration of a process as per the
 Documentation/arm64/tagged-address-abi.rst and above. The corresponding
 ``regset`` is 1 element of 8 bytes (``sizeof(long))``).
 
+Core dump support
+-----------------
+
+The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
+in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
+program header for such segment is defined as:
+
+:``p_type``: ``PT_ARM_MEMTAG_MTE``
+:``p_flags``: 0
+:``p_offset``: segment file offset
+:``p_vaddr``: segment virtual address, same as the corresponding
+  ``PT_LOAD`` segment
+:``p_paddr``: 0
+:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
+  (two 4-bit tags cover 32 bytes of memory)
+:``p_memsz``: segment size in memory, same as the corresponding
+  ``PT_LOAD`` segment
+:``p_align``: 0
+
+The tags are stored in the core file at ``p_offset`` as two 4-bit tags
+in a byte. With the tag granule of 16 bytes, a 4K page requires 128
+bytes in the core file.
+
 Example of correct usage
 ========================
 
-- 
GitLab


From a693396fd569c0a33e4b0f82398fbd6410a7beec Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand@sony.com>
Date: Thu, 10 Feb 2022 17:36:30 -0600
Subject: [PATCH 0441/1586] Documentation: dev-tools: clarify KTAP
 specification wording

Add the spec version to the title line.

Explain likely source of "Unknown lines".

"Unknown lines" in nested tests are optionally indented.

Add "Unknown lines" items to differences between TAP & KTAP list

Convert "Major differences between TAP and KTAP" from a bullet list
to a table.  The bullet list was being formatted as a single
paragraph.

Reviewed-by: Tim Bird <Tim.Bird@sony.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Frank Rowand <frank.rowand@sony.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Link: https://lore.kernel.org/r/20220210233630.3304495-1-frowand.list@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/ktap.rst | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
index 878530cb9c271..d7fe05de40b44 100644
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-========================================
-The Kernel Test Anything Protocol (KTAP)
-========================================
+===================================================
+The Kernel Test Anything Protocol (KTAP), version 1
+===================================================
 
 TAP, or the Test Anything Protocol is a format for specifying test results used
 by a number of projects. It's website and specification are found at this `link
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
 the four formats for lines described above. This is allowed, however, they will
 not influence the status of the tests.
 
+This is an important difference from TAP.  Kernel tests may print messages
+to the system console or a log file.  Both of these destinations may contain
+messages either from unrelated kernel or userspace activity, or kernel
+messages from non-test code that is invoked by the test.  The kernel code
+invoked by the test likely is not aware that a test is in progress and
+thus can not print the message as a diagnostic message.
+
 Nested tests
 ------------
 
@@ -186,10 +193,13 @@ starting with another KTAP version line and test plan, and end with the overall
 result. If one of the subtests fail, for example, the parent test should also
 fail.
 
-Additionally, all result lines in a subtest should be indented. One level of
+Additionally, all lines in a subtest should be indented. One level of
 indentation is two spaces: "  ". The indentation should begin at the version
 line and should end before the parent test's result line.
 
+"Unknown lines" are not considered to be lines in a subtest and thus are
+allowed to be either indented or not indented.
+
 An example of a test with two nested subtests:
 
 .. code-block::
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
 Major differences between TAP and KTAP
 --------------------------------------
 
-Note the major differences between the TAP and KTAP specification:
-- yaml and json are not recommended in diagnostic messages
-- TODO directive not recognized
-- KTAP allows for an arbitrary number of tests to be nested
+==================================================   =========  ===============
+Feature                                              TAP        KTAP
+==================================================   =========  ===============
+yaml and json in diagnosic message                   ok         not recommended
+TODO directive                                       ok         not recognized
+allows an arbitrary number of tests to be nested     no         yes
+"Unknown lines" are in category of "Anything else"   yes        no
+"Unknown lines" are                                  incorrect  allowed
+==================================================   =========  ===============
 
 The TAP14 specification does permit nested tests, but instead of using another
 nested version line, uses a line of the form
-- 
GitLab


From 013ebb6d822a51ccc8ad162e7c8d02fbd7e90dd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Thu, 10 Feb 2022 20:22:00 +0100
Subject: [PATCH 0442/1586] Documentation: Fix links for udftools project and
 pktcdvd tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20220210192200.30828-1-pali@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cdrom/packet-writing.rst             | 4 ++--
 Documentation/userspace-api/ioctl/ioctl-number.rst | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst
index c5c957195a5a7..43db58c50d292 100644
--- a/Documentation/cdrom/packet-writing.rst
+++ b/Documentation/cdrom/packet-writing.rst
@@ -11,7 +11,7 @@ Getting started quick
 - Compile and install kernel and modules, reboot.
 
 - You need the udftools package (pktsetup, mkudffs, cdrwtool).
-  Download from http://sourceforge.net/projects/linux-udf/
+  Download from https://github.com/pali/udftools
 
 - Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
   as appropriate)::
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
 
 Since Linux 2.6.20, the pktcdvd module has a sysfs interface
 and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
+this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
 
 "pktcdvd" works similar to "pktsetup", e.g.::
 
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 687efcf245c13..559c436276c6a 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -253,7 +253,7 @@ Code  Seq#    Include File                                           Comments
 'l'   00-3F  linux/tcfs_fs.h                                         transparent cryptographic file system
                                                                      <http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs>
 'l'   40-7F  linux/udf_fs_i.h                                        in development:
-                                                                     <http://sourceforge.net/projects/linux-udf/>
+                                                                     <https://github.com/pali/udftools>
 'm'   00-09  linux/mmtimer.h                                         conflict!
 'm'   all    linux/mtio.h                                            conflict!
 'm'   all    linux/soundcard.h                                       conflict!
-- 
GitLab


From 339cf5a2c6fb8559f30b9d4bd82c1dc4d7a16468 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Tue, 8 Feb 2022 21:37:16 +0800
Subject: [PATCH 0443/1586] docs/zh_CN: Add energy-model Chinese translation

Translate power/energy-model.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20220208133716.24070-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/power/energy-model.rst | 190 ++++++++++++++++++
 .../translations/zh_CN/power/index.rst        |   2 +-
 2 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/power/energy-model.rst

diff --git a/Documentation/translations/zh_CN/power/energy-model.rst b/Documentation/translations/zh_CN/power/energy-model.rst
new file mode 100644
index 0000000000000..c7da1b6aefeee
--- /dev/null
+++ b/Documentation/translations/zh_CN/power/energy-model.rst
@@ -0,0 +1,190 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/power/energy-model.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+============
+设备能量模型
+============
+
+1. 概述
+-------
+
+能量模型（EM）框架是一种驱动程序与内核子系统之间的接口。其中驱动程序了解不同
+性能层级的设备所消耗的功率，而内核子系统愿意使用该信息做出能量感知决策。
+
+设备所消耗的功率的信息来源在不同的平台上可能有很大的不同。这些功率成本在某些
+情况下可以使用设备树数据来估算。在其它情况下，固件会更清楚。或者，用户空间可能
+是最清楚的。以此类推。为了避免每一个客户端子系统对每一种可能的信息源自己重新
+实现支持，EM框架作为一个抽象层介入，它在内核中对功率成本表的格式进行标准化，
+因此能够避免多余的工作。
+
+功率值可以用毫瓦或“抽象刻度”表示。多个子系统可能使用EM，由系统集成商来检查
+功率值刻度类型的要求是否满足。可以在能量感知调度器的文档中找到一个例子
+Documentation/scheduler/sched-energy.rst。对于一些子系统，比如热能或
+powercap，用“抽象刻度”描述功率值可能会导致问题。这些子系统对过去使用的功率的
+估算值更感兴趣，因此可能需要真实的毫瓦。这些要求的一个例子可以在智能功率分配
+Documentation/driver-api/thermal/power_allocator.rst文档中找到。
+
+内核子系统可能（基于EM内部标志位）实现了对EM注册设备是否具有不一致刻度的自动
+检查。要记住的重要事情是，当功率值以“抽象刻度”表示时，从中推导以毫焦耳为单位
+的真实能量消耗是不可能的。
+
+下图描述了一个驱动的例子（这里是针对Arm的，但该方法适用于任何体系结构），它
+向EM框架提供了功率成本，感兴趣的客户端可从中读取数据::
+
+       +---------------+  +-----------------+  +---------------+
+       | Thermal (IPA) |  | Scheduler (EAS) |  |     Other     |
+       +---------------+  +-----------------+  +---------------+
+               |                   | em_cpu_energy()   |
+               |                   | em_cpu_get()      |
+               +---------+         |         +---------+
+                         |         |         |
+                         v         v         v
+                        +---------------------+
+                        |    Energy Model     |
+                        |     Framework       |
+                        +---------------------+
+                           ^       ^       ^
+                           |       |       | em_dev_register_perf_domain()
+                +----------+       |       +---------+
+                |                  |                 |
+        +---------------+  +---------------+  +--------------+
+        |  cpufreq-dt   |  |   arm_scmi    |  |    Other     |
+        +---------------+  +---------------+  +--------------+
+                ^                  ^                 ^
+                |                  |                 |
+        +--------------+   +---------------+  +--------------+
+        | Device Tree  |   |   Firmware    |  |      ?       |
+        +--------------+   +---------------+  +--------------+
+
+对于CPU设备，EM框架管理着系统中每个“性能域”的功率成本表。一个性能域是一组
+性能一起伸缩的CPU。性能域通常与CPUFreq策略具有1对1映射。一个性能域中的
+所有CPU要求具有相同的微架构。不同性能域中的CPU可以有不同的微架构。
+
+
+2. 核心API
+----------
+
+2.1 配置选项
+^^^^^^^^^^^^
+
+必须使能CONFIG_ENERGY_MODEL才能使用EM框架。
+
+
+2.2 性能域的注册
+^^^^^^^^^^^^^^^^
+
+“高级”EM的注册
+~~~~~~~~~~~~~~~~
+
+“高级”EM因它允许驱动提供更精确的功率模型而得名。它并不受限于框架中的一些已
+实现的数学公式（就像“简单”EM那样）。它可以更好地反映每个性能状态的实际功率
+测量。因此，在EM静态功率（漏电流功率）是重要的情况下，应该首选这种注册方式。
+
+驱动程序应通过以下API将性能域注册到EM框架中::
+
+  int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
+		struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts);
+
+驱动程序必须提供一个回调函数，为每个性能状态返回<频率,功率>元组。驱动程序
+提供的回调函数可以自由地从任何相关位置（DT、固件......）以及以任何被认为是
+必要的方式获取数据。只有对于CPU设备，驱动程序必须使用cpumask指定性能域的CPU。
+对于CPU以外的其他设备，最后一个参数必须被设置为NULL。
+
+最后一个参数“milliwatts”（毫瓦）设置成正确的值是很重要的，使用EM的内核
+子系统可能会依赖这个标志来检查所有的EM设备是否使用相同的刻度。如果有不同的
+刻度，这些子系统可能决定：返回警告/错误，停止工作或崩溃（panic）。
+
+关于实现这个回调函数的驱动程序的例子，参见第3节。或者在第2.4节阅读这个API
+的更多文档。
+
+
+“简单”EM的注册
+~~~~~~~~~~~~~~~~
+
+“简单”EM是用框架的辅助函数cpufreq_register_em_with_opp()注册的。它实现了
+一个和以下数学公式紧密相关的功率模型::
+
+	Power = C * V^2 * f
+
+使用这种方法注册的EM可能无法正确反映真实设备的物理特性，例如当静态功率
+（漏电流功率）很重要时。
+
+
+2.3 访问性能域
+^^^^^^^^^^^^^^
+
+有两个API函数提供对能量模型的访问。em_cpu_get()以CPU id为参数，em_pd_get()
+以设备指针为参数。使用哪个接口取决于子系统，但对于CPU设备来说，这两个函数都返
+回相同的性能域。
+
+对CPU的能量模型感兴趣的子系统可以通过em_cpu_get() API检索它。在创建性能域时
+分配一次能量模型表，它保存在内存中不被修改。
+
+一个性能域所消耗的能量可以使用em_cpu_energy() API来估算。该估算假定CPU设备
+使用的CPUfreq监管器是schedutil。当前该计算不能提供给其它类型的设备。
+
+关于上述API的更多细节可以在 ``<linux/energy_model.h>`` 或第2.4节中找到。
+
+
+2.4 API的细节描述
+^^^^^^^^^^^^^^^^^
+参见 include/linux/energy_model.h 和 kernel/power/energy_model.c 的kernel doc。
+
+3. 驱动示例
+-----------
+
+CPUFreq框架支持专用的回调函数，用于为指定的CPU（们）注册EM：
+cpufreq_driver::register_em()。这个回调必须为每个特定的驱动程序正确实现，
+因为框架会在设置过程中适时地调用它。本节提供了一个简单的例子，展示CPUFreq驱动
+在能量模型框架中使用（假的）“foo”协议注册性能域。该驱动实现了一个est_power()
+函数提供给EM框架::
+
+  -> drivers/cpufreq/foo_cpufreq.c
+
+  01	static int est_power(unsigned long *mW, unsigned long *KHz,
+  02			struct device *dev)
+  03	{
+  04		long freq, power;
+  05
+  06		/* 使用“foo”协议设置频率上限 */
+  07		freq = foo_get_freq_ceil(dev, *KHz);
+  08		if (freq < 0);
+  09			return freq;
+  10
+  11		/* 估算相关频率下设备的功率成本 */
+  12		power = foo_estimate_power(dev, freq);
+  13		if (power < 0);
+  14			return power;
+  15
+  16		/* 将这些值返回给EM框架 */
+  17		*mW = power;
+  18		*KHz = freq;
+  19
+  20		return 0;
+  21	}
+  22
+  23	static void foo_cpufreq_register_em(struct cpufreq_policy *policy)
+  24	{
+  25		struct em_data_callback em_cb = EM_DATA_CB(est_power);
+  26		struct device *cpu_dev;
+  27		int nr_opp;
+  28
+  29		cpu_dev = get_cpu_device(cpumask_first(policy->cpus));
+  30
+  31     	/* 查找该策略支持的OPP数量 */
+  32     	nr_opp = foo_get_nr_opp(policy);
+  33
+  34     	/* 并注册新的性能域 */
+  35     	em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
+  36					    true);
+  37	}
+  38
+  39	static struct cpufreq_driver foo_cpufreq_driver = {
+  40		.register_em = foo_cpufreq_register_em,
+  41	};
diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst
index ad80a9e80b7cd..bc54983ba5156 100644
--- a/Documentation/translations/zh_CN/power/index.rst
+++ b/Documentation/translations/zh_CN/power/index.rst
@@ -14,6 +14,7 @@
 .. toctree::
     :maxdepth: 1
 
+    energy-model
     opp
 
 TODOList:
@@ -22,7 +23,6 @@ TODOList:
     * basic-pm-debugging
     * charger-manager
     * drivers-testing
-    * energy-model
     * freezing-of-tasks
     * pci
     * pm_qos_interface
-- 
GitLab


From d535e6c25a8131324aef4534e14d09b4d81c98a9 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Tue, 8 Feb 2022 10:01:05 +0800
Subject: [PATCH 0444/1586] docs/zh_CN: Add sched-energy Chinese translation

Translate scheduler/sched-energy.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20220208020105.14117-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/scheduler/index.rst    |   4 +-
 .../zh_CN/scheduler/sched-energy.rst          | 351 ++++++++++++++++++
 2 files changed, 353 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/scheduler/sched-energy.rst

diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index f8f8f35d53c71..ab79259802668 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -5,6 +5,7 @@
 :翻译:
 
  司延腾 Yanteng Si <siyanteng@loongson.cn>
+ 唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
 
 :校译:
 
@@ -23,13 +24,12 @@ Linux调度器
     sched-design-CFS
     sched-domains
     sched-capacity
+    sched-energy
 
 
 TODOList:
 
-    sched-bwc
     sched-deadline
-    sched-energy
     sched-nice-design
     sched-rt-group
     sched-stats
diff --git a/Documentation/translations/zh_CN/scheduler/sched-energy.rst b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
new file mode 100644
index 0000000000000..fdbf6cfeea933
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-energy.rst
@@ -0,0 +1,351 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-energy.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+============
+能量感知调度
+============
+
+1. 简介
+-------
+
+能量感知调度（EAS）使调度器有能力预测其决策对CPU所消耗的能量的影响。EAS依靠
+一个能量模型（EM）来为每个任务选择一个节能的CPU，同时最小化对吞吐率的影响。
+本文档致力于介绍介绍EAS是如何工作的，它背后的主要设计决策是什么，以及使其运行
+所需的条件细节。
+
+在进一步阅读之前，请注意，在撰写本文时::
+
+   /!\ EAS不支持对称CPU拓扑的平台 /!\
+
+EAS只在异构CPU拓扑结构（如Arm大小核，big.LITTLE）上运行。因为在这种情况下，
+通过调度来节约能量的潜力是最大的。
+
+EAS实际使用的EM不是由调度器维护的，而是一个专门的框架。关于这个框架的细节和
+它提供的内容，请参考其文档（见Documentation/power/energy-model.rst）。
+
+
+2. 背景和术语
+-------------
+
+从一开始就说清楚定义:
+ - 能量 = [焦耳] （比如供电设备上的电池提供的资源）
+ - 功率 = 能量/时间 = [焦耳/秒] = [瓦特]
+
+ EAS的目标是最小化能量消耗，同时仍能将工作完成。也就是说，我们要最大化::
+
+	性能 [指令数/秒]
+	----------------
+	   功率 [瓦特]
+
+它等效于最小化::
+
+	能量 [焦耳]
+	-----------
+	   指令数
+
+同时仍然获得“良好”的性能。当前调度器只考虑性能目标，因此该式子本质上是一个
+可选的优化目标，它同时考虑了两个目标：能量效率和性能。
+
+引入EM的想法是为了让调度器评估其决策的影响，而不是盲目地应用可能仅在部分
+平台有正面效果的节能技术。同时，EM必须尽可能的简单，以最小化调度器的时延
+影响。
+
+简而言之，EAS改变了CFS任务分配给CPU的方式。当调度器决定一个任务应该在哪里
+运行时（在唤醒期间），EM被用来在不损害系统吞吐率的情况下，从几个较好的候选
+CPU中挑选一个经预测能量消耗最优的CPU。EAS的预测依赖于对平台拓扑结构特定元素
+的了解，包括CPU的“算力”，以及它们各自的能量成本。
+
+
+3. 拓扑信息
+-----------
+
+EAS（以及调度器的剩余部分）使用“算力”的概念来区分不同计算吞吐率的CPU。一个
+CPU的“算力”代表了它在最高频率下运行时能完成的工作量，且这个值是相对系统中
+算力最大的CPU而言的。算力值被归一化为1024以内，并且可与由实体负载跟踪
+（PELT）机制算出的利用率信号做对比。由于有算力值和利用率值，EAS能够估计一个
+任务/CPU有多大/有多忙，并在评估性能与能量时将其考虑在内。CPU算力由特定体系
+结构实现的arch_scale_cpu_capacity()回调函数提供。
+
+EAS使用的其余平台信息是直接从能量模型（EM）框架中读取的。一个平台的EM是一张
+表，表中每项代表系统中一个“性能域”的功率成本。（若要了解更多关于性能域的细节，
+见Documentation/power/energy-model.rst）
+
+当调度域被建立或重新建立时，调度器管理对拓扑代码中EM对象的引用。对于每个根域
+（rd），调度器维护一个与当前rd->span相交的所有性能域的单向链表。链表中的每个
+节点都包含一个指向EM框架所提供的结构体em_perf_domain的指针。
+
+链表被附加在根域上，以应对独占的cpuset的配置。由于独占的cpuset的边界不一定与
+性能域的边界一致，不同根域的链表可能包含重复的元素。
+
+示例1
+    让我们考虑一个有12个CPU的平台，分成3个性能域，（pd0，pd4和pd8），按以下
+    方式组织::
+
+	          CPUs:   0 1 2 3 4 5 6 7 8 9 10 11
+	          PDs:   |--pd0--|--pd4--|---pd8---|
+	          RDs:   |----rd1----|-----rd2-----|
+
+    现在，考虑用户空间决定将系统分成两个独占的cpusets，因此创建了两个独立的根域，
+    每个根域包含6个CPU。这两个根域在上图中被表示为rd1和rd2。由于pd4与rd1和rd2
+    都有交集，它将同时出现于附加在这两个根域的“->pd”链表中:
+
+       * rd1->pd: pd0 -> pd4
+       * rd2->pd: pd4 -> pd8
+
+    请注意，调度器将为pd4创建两个重复的链表节点（每个链表中各一个）。然而这
+    两个节点持有指向同一个EM框架的共享数据结构的指针。
+
+由于对这些链表的访问可能与热插拔及其它事件并发发生，因此它们受RCU锁保护，就像
+被调度器操控的拓扑结构体中剩下字段一样。
+
+EAS同样维护了一个静态键（sched_energy_present），当至少有一个根域满足EAS
+启动的所有条件时，这个键就会被启动。在第6节中总结了这些条件。
+
+
+4. 能量感知任务放置
+-------------------
+
+EAS覆盖了CFS的任务唤醒平衡代码。在唤醒平衡时，它使用平台的EM和PELT信号来选择节能
+的目标CPU。当EAS被启用时，select_task_rq_fair()调用find_energy_efficient_cpu()
+来做任务放置决定。这个函数寻找在每个性能域中寻找具有最高剩余算力（CPU算力 - CPU
+利用率）的CPU，因为它能让我们保持最低的频率。然后，该函数检查将任务放在新CPU相较
+依然放在之前活动的prev_cpu是否可以节省能量。
+
+如果唤醒的任务被迁移，find_energy_efficient_cpu()使用compute_energy()来估算
+系统将消耗多少能量。compute_energy()检查各CPU当前的利用率情况，并尝试调整来
+“模拟”任务迁移。EM框架提供了API em_pd_energy()计算每个性能域在给定的利用率条件
+下的预期能量消耗。
+
+下面详细介绍一个优化能量消耗的任务放置决策的例子。
+
+示例2
+    让我们考虑一个有两个独立性能域的（伪）平台，每个性能域含有2个CPU。CPU0和CPU1
+    是小核，CPU2和CPU3是大核。
+
+    调度器必须决定将任务P放在哪个CPU上，这个任务的util_avg = 200（平均利用率），
+    prev_cpu = 0（上一次运行在CPU0）。
+
+    目前CPU的利用率情况如下图所示。CPU 0-3的util_avg分别为400、100、600和500。
+    每个性能域有三个操作性能值（OPP）。与每个OPP相关的CPU算力和功率成本列在能量
+    模型表中。P的util_avg在图中显示为"PP"::
+
+     CPU util.
+      1024                 - - - - - - -              Energy Model
+                                               +-----------+-------------+
+                                               |  Little   |     Big     |
+       768                 =============       +-----+-----+------+------+
+                                               | Cap | Pwr | Cap  | Pwr  |
+                                               +-----+-----+------+------+
+       512  ===========    - ##- - - - -       | 170 | 50  | 512  | 400  |
+                             ##     ##         | 341 | 150 | 768  | 800  |
+       341  -PP - - - -      ##     ##         | 512 | 300 | 1024 | 1700 |
+             PP              ##     ##         +-----+-----+------+------+
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+      Current OPP: =====       Other OPP: - - -     util_avg (100 each): ##
+
+
+    find_energy_efficient_cpu()将首先在两个性能域中寻找具有最大剩余算力的CPU。
+    在这个例子中是CPU1和CPU3。然后，它将估算，当P被放在它们中的任意一个时，系统的
+    能耗，并检查这样做是否会比把P放在CPU0上节省一些能量。EAS假定OPPs遵循利用率
+    （这与CPUFreq监管器schedutil的行为一致。关于这个问题的更多细节，见第6节）。
+
+    **情况1. P被迁移到CPU1**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 300 / 341 * 150 = 131
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1364
+       341  ===========      ##     ##
+                    PP       ##     ##
+       170  -## - - PP-      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+
+    **情况2. P被迁移到CPU3**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 200 / 341 * 150 = 88
+                                             * CPU1: 100 / 341 * 150 = 43
+                                    PP       * CPU2: 600 / 768 * 800 = 625
+       512  - - - - - -    - ##- - -PP -     * CPU3: 700 / 768 * 800 = 729
+                             ##     ##          => total_energy = 1485
+       341  ===========      ##     ##
+                             ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+    **情况3. P依旧留在prev_cpu/CPU0**::
+
+      1024                 - - - - - - -
+
+                                            Energy calculation:
+       768                 =============     * CPU0: 400 / 512 * 300 = 234
+                                             * CPU1: 100 / 512 * 300 = 58
+                                             * CPU2: 600 / 768 * 800 = 625
+       512  ===========    - ##- - - - -     * CPU3: 500 / 768 * 800 = 520
+                             ##     ##          => total_energy = 1437
+       341  -PP - - - -      ##     ##
+             PP              ##     ##
+       170  -## - - - -      ##     ##
+             ##     ##       ##     ##
+           ------------    -------------
+            CPU0   CPU1     CPU2   CPU3
+
+    从这些计算结果来看，情况1的总能量最低。所以从节约能量的角度看，CPU1是最佳候选
+    者。
+
+大核通常比小核更耗电，因此主要在任务不适合在小核运行时使用。然而，小核并不总是比
+大核节能。举例来说，对于某些系统，小核的高OPP可能比大核的低OPP能量消耗更高。因此，
+如果小核在某一特定时间点刚好有足够的利用率，在此刻被唤醒的小任务放在大核执行可能
+会更节能，尽管它在小核上运行也是合适的。
+
+即使在大核所有OPP都不如小核OPP节能的情况下，在某些特定条件下，令小任务运行在大核
+上依然可能节能。事实上，将一个任务放在一个小核上可能导致整个性能域的OPP提高，这将
+增加已经在该性能域运行的任务的能量成本。如果唤醒的任务被放在一个大核上，它的执行
+成本可能比放在小核上更高，但这不会影响小核上的其它任务，这些任务将继续以较低的OPP
+运行。因此，当考虑CPU消耗的总能量时，在大核上运行一个任务的额外成本可能小于为所有
+其它运行在小核的任务提高OPP的成本。
+
+上面的例子几乎不可能以一种通用的方式得到正确的结果；同时，对于所有平台，在不知道
+系统所有CPU每个不同OPP的运行成本时，也无法得到正确的结果。得益于基于EM的设计，
+EAS应该能够正确处理这些问题而不会引发太多麻烦。然而，为了确保对高利用率场景的
+吞吐率造成的影响最小化，EAS同样实现了另外一种叫“过度利用率”的机制。
+
+
+5. 过度利用率
+-------------
+
+从一般的角度来看，EAS能提供最大帮助的是那些涉及低、中CPU利用率的使用场景。每当CPU
+密集型的长任务运行，它们将需要所有的可用CPU算力，调度器将没有什么办法来节省能量同时
+又不损害吞吐率。为了避免EAS损害性能，一旦CPU被使用的算力超过80%，它将被标记为“过度
+利用”。只要根域中没有CPU是过度利用状态，负载均衡被禁用，而EAS将覆盖唤醒平衡代码。
+EAS很可能将负载放置在系统中能量效率最高的CPU而不是其它CPU上，只要不损害吞吐率。
+因此，负载均衡器被禁用以防止它打破EAS发现的节能任务放置。当系统未处于过度利用状态时，
+这样做是安全的，因为低于80%的临界点意味着：
+
+    a. 所有的CPU都有一些空闲时间，所以EAS使用的利用率信号很可能准确地代表各种任务
+       的“大小”。
+    b. 所有任务，不管它们的nice值是多大，都应该被提供了足够多的CPU算力。
+    c. 既然有多余的算力，那么所有的任务都必须定期阻塞/休眠，在唤醒时进行平衡就足够
+       了。
+
+只要一个CPU利用率超过80%的临界点，上述三个假设中至少有一个是不正确的。在这种情况下，
+整个根域的“过度利用”标志被设置，EAS被禁用，负载均衡器被重新启用。通过这样做，调度器
+又回到了在CPU密集的条件下基于负载的算法做负载均衡。这更好地尊重了任务的nice值。
+
+由于过度利用率的概念在很大程度上依赖于检测系统中是否有一些空闲时间，所以必须考虑
+（比CFS）更高优先级的调度类（以及中断）“窃取”的CPU算力。像这样，对过度使用率的检测
+不仅要考虑CFS任务使用的算力，还需要考虑其它调度类和中断。
+
+
+6. EAS的依赖和要求
+------------------
+
+能量感知调度依赖系统的CPU具有特定的硬件属性，以及内核中的其它特性被启用。本节列出
+了这些依赖，并对如何满足这些依赖提供了提示。
+
+
+6.1 - 非对称CPU拓扑
+^^^^^^^^^^^^^^^^^^^
+
+
+如简介所提，目前只有非对称CPU拓扑结构的平台支持EAS。通过在运行时查询
+SD_ASYM_CPUCAPACITY_FULL标志位是否在创建调度域时已设置来检查这一要求是否满足。
+
+参阅Documentation/scheduler/sched-capacity.rst以了解在sched_domain层次结构
+中设置此标志位所需满足的要求。
+
+请注意，EAS并非从根本上与SMP不兼容，但在SMP平台上还没有观察到明显的节能。这一
+限制可以在将来进行修改，如果被证明不是这样的话。
+
+
+6.2 - 当前的能量模型
+^^^^^^^^^^^^^^^^^^^^
+
+EAS使用一个平台的EM来估算调度决策对能量的影响。因此，你的平台必须向EM框架提供
+能量成本表，以启动EAS。要做到这一点，请参阅文档
+Documentation/power/energy-model.rst中的独立EM框架部分。
+
+另请注意，调度域需要在EM注册后重建，以便启动EAS。
+
+EAS使用EM对能量使用率进行预测决策，因此它在检查任务放置的可能选项时更加注重
+差异。对于EAS来说，EM的功率值是以毫瓦还是以“抽象刻度”为单位表示并不重要。
+
+
+
+6.3 - 能量模型复杂度
+^^^^^^^^^^^^^^^^^^^^
+
+任务唤醒路径是时延敏感的。当一个平台的EM太复杂（太多CPU，太多性能域，太多状态
+等），在唤醒路径中使用它的成本就会升高到不可接受。能量感知唤醒算法的复杂度为：
+
+	C = Nd * (Nc + Ns)
+
+其中：Nd是性能域的数量；Nc是CPU的数量；Ns是OPP的总数（例如：对于两个性能域，
+每个域有4个OPP，则Ns = 8）。
+
+当调度域建立时，复杂性检查是在根域上进行的。如果一个根域的复杂度C恰好高于完全
+主观设定的EM_MAX_COMPLEXITY阈值（在本文写作时，是2048），则EAS不会在此根域
+启动。
+
+如果你的平台的能量模型的复杂度太高，EAS无法在这个根域上使用，但你真的想用，
+那么你就只剩下两个可能的选择：
+
+    1. 将你的系统拆分成分离的、较小的、使用独占cpuset的根域，并在每个根域局部
+       启用EAS。这个方案的好处是开箱即用，但缺点是无法在根域之间实现负载均衡，
+       这可能会导致总体系统负载不均衡。
+    2. 提交补丁以降低EAS唤醒算法的复杂度，从而使其能够在合理的时间内处理更大
+       的EM。
+
+
+6.4 - Schedutil监管器
+^^^^^^^^^^^^^^^^^^^^^
+
+EAS试图预测CPU在不久的将来会在哪个OPP下运行，以估算它们的能量消耗。为了做到
+这一点，它假定CPU的OPP跟随CPU利用率变化而变化。
+
+尽管在实践中很难对这一假设的准确性提供硬性保证（因为，举例来说硬件可能不会做
+它被要求做的事情），相对于其他CPUFreq监管器，schedutil至少_请求_使用利用率
+信号计算的频率。因此，与EAS一起使用的唯一合理的监管器是schedutil，因为它是
+唯一一个在频率请求和能量预测之间提供某种程度的一致性的监管器。
+
+不支持将EAS与schedutil之外的任何其它监管器一起使用。
+
+
+6.5 刻度不变性使用率信号
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+为了对不同的CPU和所有的性能状态做出准确的预测，EAS需要频率不变的和CPU不变的
+PELT信号。这些信号可以通过每个体系结构定义的arch_scale{cpu,freq}_capacity()
+回调函数获取。
+
+不支持在没有实现这两个回调函数的平台上使用EAS。
+
+
+6.6 多线程（SMT）
+^^^^^^^^^^^^^^^^^
+
+当前实现的EAS是不感知SMT的，因此无法利用多线程硬件节约能量。EAS认为线程是独立的
+CPU，这实际上对性能和能量消耗都是不利的。
+
+不支持在SMT上使用EAS。
-- 
GitLab


From 4fbe7b19a9485db0a53efc018fe88db25846c89f Mon Sep 17 00:00:00 2001
From: Ethan Dye <mrtops03@gmail.com>
Date: Mon, 7 Feb 2022 16:54:42 -0700
Subject: [PATCH 0445/1586] docs: Fix wording in optional zram feature docs

This fixes some simple grammar errors in the documentation for zram,
specifically errors in the optional feature section of the zram
documentation.

Signed-off-by: Ethan Dye <mrtops03@gmail.com>
Link: https://lore.kernel.org/r/20220207235442.95090-1-mrtops03@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/blockdev/zram.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 3e11926a4df95..54fe63745ed8e 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
 
 	echo /dev/sda5 > /sys/block/zramX/backing_dev
 
-before disksize setting. It supports only partition at this moment.
-If admin wants to use incompressible page writeback, they could do via::
+before disksize setting. It supports only partitions at this moment.
+If admin wants to use incompressible page writeback, they could do it via::
 
 	echo huge > /sys/block/zramX/writeback
 
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
 
 	echo idle > /sys/block/zramX/writeback
 
-With the command, zram writeback idle pages from memory to the storage.
+With the command, zram will writeback idle pages from memory to the storage.
 
-If admin want to write a specific page in zram device to backing device,
+If an admin wants to write a specific page in zram device to the backing device,
 they could write a page index into the interface.
 
 	echo "page_index=1251" > /sys/block/zramX/writeback
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
 
 To overcome the concern, zram supports "writeback_limit" feature.
 The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin wants to apply writeback budget, he should
+any writeback. IOW, if admin wants to apply writeback budget, they should
 enable writeback_limit_enable via::
 
 	$ echo 1 > /sys/block/zramX/writeback_limit_enable
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
 (If admin doesn't enable writeback_limit_enable, writeback_limit's value
 assigned via /sys/block/zramX/writeback_limit is meaningless.)
 
-If admin want to limit writeback as per-day 400M, he could do it
+If admin wants to limit writeback as per-day 400M, they could do it
 like below::
 
 	$ MB_SHIFT=20
@@ -375,16 +375,16 @@ like below::
 	$ echo 1 > /sys/block/zram0/writeback_limit_enable
 
 If admins want to allow further write again once the budget is exhausted,
-he could do it like below::
+they could do it like below::
 
 	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
 		/sys/block/zram0/writeback_limit
 
-If admin wants to see remaining writeback budget since last set::
+If an admin wants to see the remaining writeback budget since last set::
 
 	$ cat /sys/block/zramX/writeback_limit
 
-If admin want to disable writeback limit, he could do::
+If an admin wants to disable writeback limit, they could do::
 
 	$ echo 0 > /sys/block/zramX/writeback_limit_enable
 
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
 writeback happened until you reset the zram to allocate extra writeback
 budget in next setting is user's job.
 
-If admin wants to measure writeback count in a certain period, he could
+If admin wants to measure writeback count in a certain period, they could
 know it via /sys/block/zram0/bd_stat's 3rd column.
 
 memory tracking
-- 
GitLab


From 8716ef413aa55d9a25481f84eb54f9d571a1f421 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Tue, 1 Feb 2022 08:59:58 +0900
Subject: [PATCH 0446/1586] docs: pdfdocs: Tweak width params of TOC

Sphinx has its own set of width parameters of Table of Contents (TOC)
for LaTeX defined in its class definition of sphinxmanual.cls.
It also inherits parameters for chapter entries from report.cls of
original LaTeX base.

However, they are optimized assuming small documents with tens of
pages and chapters/sections of less than 10.

To cope with some of kernel-doc documents with more than 1000
pages and several tens of chapters/sections, definitions of those
parameters need to be adjusted.

Unfortunately, those parameters are hard coded in the class
definitions and need low-level LaTeX coding tricks to redefine.

As Sphinx 1.7.9 does not have \sphinxtableofcontentshook,
which defines those parameters in later Sphinx versions,
for compatibility with both pre-1.8 and later Sphinx versions,
empty the hook altogether and redefine \@pnumwidth, \l@chapter,
\l@section, and \@subsection commands originally defined in
report.cls.

Summary of parameter changes:

  Width of page number (\@pnumwidth): 1.55em  -> 2.7em
  Width of chapter number:  1.5em -> 1.8em
  Indent of section number: 1.5em -> 1.8em
  Width of section number:  2.6em -> 3.2em
  Indent of subsection number: 4.1em -> 5em
  Width of subsection number:  3.5em -> 4.3em

Notes:
1. Parameters for subsection become relevant only when
   ":maxdepth: 3" is specified under "toctree::" (e.g., RCU/index.rst).
   They can hold subsection numbers up to 5 digits such as "18.7.13"
   (in RCU.pdf).

2. Number of chapters in driver-api.pdf is getting closer to 100.
   When it reaches 100, another set of tweaks will be necessary.

3. The low-level LaTeX trick is mentioned in "Unofficial LaTeX2e
   reference manual" at:
        http://latexref.xyz/Table-of-contents-etc_002e.html

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/e52b4718-7909-25be-fbc1-76800aa62ae3@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index f07f2e9b9f2c8..e5c13dee2de80 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -409,6 +409,37 @@ latex_elements = {
 
     # Additional stuff for the LaTeX preamble.
     'preamble': '''
+	% Custom width parameters for TOC --- Redefine low-level commands
+	% defined in report.cls
+	\\makeatletter
+	%% Redefine \\@pnumwidth (page number width)
+	\\renewcommand*\\@pnumwidth{2.7em}
+	%% Redefine \\l@chapter (chapter list entry)
+	\\renewcommand*\\l@chapter[2]{%
+	  \\ifnum \\c@tocdepth >\\m@ne
+	    \\addpenalty{-\\@highpenalty}%
+	    \\vskip 1.0em \\@plus\\p@
+	    \\setlength\\@tempdima{1.8em}%
+	    \\begingroup
+	      \\parindent \\z@ \\rightskip \\@pnumwidth
+	      \\parfillskip -\\@pnumwidth
+	      \\leavevmode \\bfseries
+	      \\advance\\leftskip\\@tempdima
+	      \\hskip -\\leftskip
+	      #1\\nobreak\\hfil
+	      \\nobreak\\hb@xt@\\@pnumwidth{\\hss #2%
+	                                 \\kern-\\p@\\kern\\p@}\\par
+	      \\penalty\\@highpenalty
+	    \\endgroup
+	  \\fi}
+	%% Redefine \\l@section and \\l@subsection
+	\\renewcommand*\\l@section{\\@dottedtocline{1}{1.8em}{3.2em}}
+	\\renewcommand*\\l@subsection{\\@dottedtocline{2}{5em}{4.3em}}
+	\\makeatother
+	%% Sphinx < 1.8 doesn't have \\sphinxtableofcontentshook
+	\\providecommand{\\sphinxtableofcontentshook}{}
+	%% Undefine it for compatibility with Sphinx 1.7.9
+	\\renewcommand{\\sphinxtableofcontentshook}{} % Empty the hook
 	% Prevent column squeezing of tabulary.
 	\\setlength{\\tymin}{20em}
         % Use some font with UTF-8 support with XeLaTeX
-- 
GitLab


From 66939df53948bbf66753100266076f6a3357b54c Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Tue, 1 Feb 2022 09:02:14 +0900
Subject: [PATCH 0447/1586] docs: pdfdocs: Switch default CJK font to KR
 variants

xeCJK is enabled in Table of Contents (TOC) so that translations.pdf
built by top-level "make pdfdocs" can have its TOC typeset properly.

This causes quotation marks and apostrophe symbols appear too wide in
Latin-script docs.
This is because (1) Sphinx converts ASCII symbols into multi-byte
UTF-8 ones in LaTeX and (2) in the SC variant of "Noto CJK" font
families, those UTF-8 symbols have full-width glyph.

The KR variant of the font families has half-width glyph for those
symbols and TOC pages should look nicer when it is used instead.

Switch the default CJK font families to the KR variant and teach
xeCJK of those symbols' widths.
To compensate the switch, teach xeCJK of the width in the SC and
TC variants.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/0c8ea878-0a6f-ea01-ab45-4e66c5facee9@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index e5c13dee2de80..e70aa5fd969f3 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -460,13 +460,15 @@ latex_elements['preamble']  += '''
     \\IfFontExistsTF{Noto Sans CJK SC}{
 	% This is needed for translations
 	\\usepackage{xeCJK}
-	\\IfFontExistsTF{Noto Serif CJK SC}{
-	    \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
+	\\IfFontExistsTF{Noto Serif CJK KR}{
+	    \\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
 	}{
-	    \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
+	    \\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant]
 	}
-	\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
-	\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
+	\\setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
+	\\setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
+	\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
+	\\xeCJKDeclareCharClass{HalfRight}{`”,`’}
 	% CJK Language-specific font choices
 	\\IfFontExistsTF{Noto Serif CJK SC}{
 	    \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
@@ -513,11 +515,18 @@ latex_elements['preamble']  += '''
 	\\newcommand{\\kerneldocBeginSC}{%
 	    \\begingroup%
 	    \\scmain%
+	    \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
+	    \\xeCJKDeclareCharClass{FullRight}{`”,`’}%
+	    \\renewcommand{\\CJKrmdefault}{SCserif}%
+	    \\renewcommand{\\CJKsfdefault}{SCsans}%
+	    \\renewcommand{\\CJKttdefault}{SCmono}%
 	}
 	\\newcommand{\\kerneldocEndSC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginTC}{%
 	    \\begingroup%
 	    \\tcmain%
+	    \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
+	    \\xeCJKDeclareCharClass{FullRight}{`”,`’}%
 	    \\renewcommand{\\CJKrmdefault}{TCserif}%
 	    \\renewcommand{\\CJKsfdefault}{TCsans}%
 	    \\renewcommand{\\CJKttdefault}{TCmono}%
@@ -525,8 +534,6 @@ latex_elements['preamble']  += '''
 	\\newcommand{\\kerneldocEndTC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginKR}{%
 	    \\begingroup%
-	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
 	    \\krmain%
 	    \\renewcommand{\\CJKrmdefault}{KRserif}%
 	    \\renewcommand{\\CJKsfdefault}{KRsans}%
@@ -536,8 +543,6 @@ latex_elements['preamble']  += '''
 	\\newcommand{\\kerneldocEndKR}{\\endgroup}
 	\\newcommand{\\kerneldocBeginJP}{%
 	    \\begingroup%
-	    \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
 	    \\jpmain%
 	    \\renewcommand{\\CJKrmdefault}{JPserif}%
 	    \\renewcommand{\\CJKsfdefault}{JPsans}%
-- 
GitLab


From 7b686a2ea1e41e75c35ff2ec333d68b2b8c032d6 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Tue, 1 Feb 2022 09:03:16 +0900
Subject: [PATCH 0448/1586] docs: pdfdocs: Enable CJKspace in TOC for Korean
 titles

Korean (Hangul) titles in Table of Contents of translations.pdf
don't have inter-phrase spaces.

This is because the CJKspace option of xeCJK is disabled by
default.

Restore the spaces by enabling the option at the beginning of every
document and disable it in the \kerneldocBegin{SC|TC|JP} commands.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/19141b3e-01d9-1f6d-5020-42fbda784831@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index e70aa5fd969f3..ded49b8e9bf63 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -520,6 +520,7 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKrmdefault}{SCserif}%
 	    \\renewcommand{\\CJKsfdefault}{SCsans}%
 	    \\renewcommand{\\CJKttdefault}{SCmono}%
+	    \\xeCJKsetup{CJKspace = false}%
 	}
 	\\newcommand{\\kerneldocEndSC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginTC}{%
@@ -530,6 +531,7 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKrmdefault}{TCserif}%
 	    \\renewcommand{\\CJKsfdefault}{TCsans}%
 	    \\renewcommand{\\CJKttdefault}{TCmono}%
+	    \\xeCJKsetup{CJKspace = false}%
 	}
 	\\newcommand{\\kerneldocEndTC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginKR}{%
@@ -538,7 +540,7 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKrmdefault}{KRserif}%
 	    \\renewcommand{\\CJKsfdefault}{KRsans}%
 	    \\renewcommand{\\CJKttdefault}{KRmono}%
-	    \\xeCJKsetup{CJKspace = true} % For inter-phrase space
+	    % \\xeCJKsetup{CJKspace = true} % true by default
 	}
 	\\newcommand{\\kerneldocEndKR}{\\endgroup}
 	\\newcommand{\\kerneldocBeginJP}{%
@@ -547,6 +549,7 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKrmdefault}{JPserif}%
 	    \\renewcommand{\\CJKsfdefault}{JPsans}%
 	    \\renewcommand{\\CJKttdefault}{JPmono}%
+	    \\xeCJKsetup{CJKspace = false}%
 	}
 	\\newcommand{\\kerneldocEndJP}{\\endgroup}
 	% Single spacing in literal blocks
@@ -555,6 +558,7 @@ latex_elements['preamble']  += '''
 	\\usepackage{etoolbox}
 	% Inactivate CJK after tableofcontents
 	\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
+	\\xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC
     }{ % No CJK font found
 	% Custom macros to on/off CJK (Dummy)
 	\\newcommand{\\kerneldocCJKon}{}
-- 
GitLab


From 5d9158e3c762f0bf1753501d8e64eb6fe19dc437 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Tue, 1 Feb 2022 09:04:40 +0900
Subject: [PATCH 0449/1586] docs/translations: Skip CJK contents if suitable
 fonts not found

On systems without "Noto Sans CJK" fonts, CJK chapters in
translations.pdf are full of "TOFU" boxes, with a long build time and
a large log file containing lots of missing-font warnings.

Avoid such waste of time and resources by skipping CJK chapters when
CJK fonts are not available.

To skip whole chapters, change the definition of
\kerneldocBegin{SC|TC|KR|JP} commands so that they can have an argument
to be ignored.
This works as far as the argument (#1) is not used in the command.
In place of skipped contents, put a note on skipped contents at the
beginning of the PDF.

Change the call sites in index.rst of CJK translations accordingly.

When CJK fonts are available, existing command definitions with
no argument just work.  LaTeX engine will see additional pairs of
"{" and "}", which add a level of grouping without having any effect
on typesetting.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/3359ca41-b81d-b2c7-e437-7618efbe241d@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py                      | 13 +++++++++----
 Documentation/translations/ja_JP/index.rst |  4 ++--
 Documentation/translations/ko_KR/index.rst |  5 ++---
 Documentation/translations/zh_CN/index.rst |  4 ++--
 Documentation/translations/zh_TW/index.rst |  4 ++--
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index ded49b8e9bf63..62cd0e472b3b3 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -563,13 +563,18 @@ latex_elements['preamble']  += '''
 	% Custom macros to on/off CJK (Dummy)
 	\\newcommand{\\kerneldocCJKon}{}
 	\\newcommand{\\kerneldocCJKoff}{}
-	\\newcommand{\\kerneldocBeginSC}{}
+	\\newcommand{\\kerneldocBeginSC}[1]{%
+	    \\begin{sphinxadmonition}{note}{Note:}
+		``Noto Sans CJK'' fonts are not found while building this PDF\\@.
+		Translations of zh\\_CN, zh\\_TW, ko\\_KR, and ja\\_JP are
+		skipped.
+	    \\end{sphinxadmonition}}
 	\\newcommand{\\kerneldocEndSC}{}
-	\\newcommand{\\kerneldocBeginTC}{}
+	\\newcommand{\\kerneldocBeginTC}[1]{}
 	\\newcommand{\\kerneldocEndTC}{}
-	\\newcommand{\\kerneldocBeginKR}{}
+	\\newcommand{\\kerneldocBeginKR}[1]{}
 	\\newcommand{\\kerneldocEndKR}{}
-	\\newcommand{\\kerneldocBeginJP}{}
+	\\newcommand{\\kerneldocBeginJP}[1]{}
 	\\newcommand{\\kerneldocEndJP}{}
     }
 '''
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 88d4d98eed15a..20738c931d025 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -3,7 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginJP
+	\kerneldocBeginJP{
 
 Japanese translations
 =====================
@@ -15,4 +15,4 @@ Japanese translations
 
 .. raw:: latex
 
-	\kerneldocEndJP
+	}\kerneldocEndJP
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index f636b482fb4c1..4add6b2fe1f21 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -3,7 +3,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginKR
+	\kerneldocBeginKR{
 
 한국어 번역
 ===========
@@ -26,5 +26,4 @@
 
 .. raw:: latex
 
-    \normalsize
-    \kerneldocEndKR
+    }\kerneldocEndKR
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index f831887eacfb1..23f8f4c68e83c 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -5,7 +5,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginSC
+	\kerneldocBeginSC{
 
 .. _linux_doc_zh:
 
@@ -198,4 +198,4 @@ TODOList:
 
 .. raw:: latex
 
-	\kerneldocEndSC
+	}\kerneldocEndSC
diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst
index f56f78ba78609..e1ce9d8c06f8f 100644
--- a/Documentation/translations/zh_TW/index.rst
+++ b/Documentation/translations/zh_TW/index.rst
@@ -5,7 +5,7 @@
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
 	\kerneldocCJKon
-	\kerneldocBeginTC
+	\kerneldocBeginTC{
 
 .. _linux_doc_zh_tw:
 
@@ -174,4 +174,4 @@ TODOList:
 
 .. raw:: latex
 
-	\kerneldocEndTC
+	}\kerneldocEndTC
-- 
GitLab


From b774cc46313b3d7c9139f29df67818a8b858c558 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Tue, 1 Feb 2022 09:05:40 +0900
Subject: [PATCH 0450/1586] docs: pdfdocs: Move CJK monospace font setting to
 main conf.py

As LaTeX macros for CJK font settings can have Latin-script font
settings as well, settings under Documentation/translations/ can
be moved to the main conf.py.

By this change, translations.pdf built by top-level "make pdfdocs"
can have properly aligned ascii-art diagrams except for Korean
ones.

For the reason of remaining misalignment in Korean diagrams, see
changelog of commit a90dad8f610a ("docs: pdfdocs: Add conf.py
local to translations for ascii-art alignment").

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/eb87790a-03f4-9f29-c8a3-ef2c3e78ca18@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py              |  8 ++++++++
 Documentation/translations/conf.py | 12 ------------
 2 files changed, 8 insertions(+), 12 deletions(-)
 delete mode 100644 Documentation/translations/conf.py

diff --git a/Documentation/conf.py b/Documentation/conf.py
index 62cd0e472b3b3..fb8f69fc4d38c 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -521,6 +521,8 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKsfdefault}{SCsans}%
 	    \\renewcommand{\\CJKttdefault}{SCmono}%
 	    \\xeCJKsetup{CJKspace = false}%
+	    % For CJK ascii-art alignment
+	    \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
 	}
 	\\newcommand{\\kerneldocEndSC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginTC}{%
@@ -532,6 +534,8 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKsfdefault}{TCsans}%
 	    \\renewcommand{\\CJKttdefault}{TCmono}%
 	    \\xeCJKsetup{CJKspace = false}%
+	    % For CJK ascii-art alignment
+	    \\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
 	}
 	\\newcommand{\\kerneldocEndTC}{\\endgroup}
 	\\newcommand{\\kerneldocBeginKR}{%
@@ -541,6 +545,8 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKsfdefault}{KRsans}%
 	    \\renewcommand{\\CJKttdefault}{KRmono}%
 	    % \\xeCJKsetup{CJKspace = true} % true by default
+	    % For CJK ascii-art alignment (still misaligned for Hangul)
+	    \\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
 	}
 	\\newcommand{\\kerneldocEndKR}{\\endgroup}
 	\\newcommand{\\kerneldocBeginJP}{%
@@ -550,6 +556,8 @@ latex_elements['preamble']  += '''
 	    \\renewcommand{\\CJKsfdefault}{JPsans}%
 	    \\renewcommand{\\CJKttdefault}{JPmono}%
 	    \\xeCJKsetup{CJKspace = false}%
+	    % For CJK ascii-art alignment
+	    \\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
 	}
 	\\newcommand{\\kerneldocEndJP}{\\endgroup}
 	% Single spacing in literal blocks
diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py
deleted file mode 100644
index 92cdbba742299..0000000000000
--- a/Documentation/translations/conf.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# SPDX-License-Identifier: GPL-2.0
-
-# -- Additinal options for LaTeX output ----------------------------------
-# font config for ascii-art alignment
-
-latex_elements['preamble']  += '''
-    \\IfFontExistsTF{Noto Sans CJK SC}{
-	% For CJK ascii-art alignment
-	\\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
-    }{}
-'''
-- 
GitLab


From 10720e120e2b1d66172dccb06eb4f346a665cca6 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 5 Jan 2022 20:43:04 +0800
Subject: [PATCH 0451/1586] clocksource/drivers/imx-tpm: Exclude sched clock
 for ARM64

For ARM64 platform such as i.MX8ULP which has ARMv8 generic timer as sched
clock, which is much faster compared with tpm sched clock. Reading the
tpm count register in i.MX8ULP requires about 290ns, this is slow and
introduce scheduler latency. So exclude tpm sched clock for ARM64
platform.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20220105124304.3567629-1-peng.fan@oss.nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index df8064122b10c..60cefc247b715 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void)
 	tpm_delay_timer.read_current_timer = &tpm_read_current_timer;
 	tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3;
 	register_current_timer_delay(&tpm_delay_timer);
-#endif
 
 	sched_clock_register(tpm_read_sched_clock, counter_width,
 			     timer_of_rate(&to_tpm) >> 3);
+#endif
 
 	return clocksource_mmio_init(timer_base + TPM_CNT,
 				     "imx-tpm",
-- 
GitLab


From 47b34f495b8b75475952f12c521c4c1fc2fa09b4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 15 Feb 2022 15:51:39 +0200
Subject: [PATCH 0452/1586] spi: intel-pci: Add support for Intel Ice Lake-N
 SPI serial flash

Intel Ice Lake-N has the same SPI serial flash controller as Ice Lake-LP.
Add Ice Lake-N PCI ID to the driver list of supported devices.

The device can be found on MacBookPro16,2 [1].

[1]: https://linux-hardware.org/?probe=f1c5cf0c43

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Link: https://lore.kernel.org/r/20220215135139.4328-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-intel-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c
index a9cb4d77ffe39..a5ef7a526a7fc 100644
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -66,6 +66,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
 	{ PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info },
 	{ PCI_VDEVICE(INTEL, 0x1bca), (unsigned long)&bxt_info },
 	{ PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info },
+	{ PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info },
 	{ PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info },
 	{ PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info },
 	{ PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
-- 
GitLab


From 2b993ab79b5dc83eb699e747bfac6c04f4f5fc70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Wed, 16 Feb 2022 13:27:19 -0300
Subject: [PATCH 0453/1586] spi: amd: Fix building without ACPI enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 209043554915 ("spi: amd: Add support for version AMDI0062")
removed the cast ACPI_PTR() for no good reason. This wrapper is
important to make sure that the driver can be compiled with or without
CONFIG_ACPI enabled, useful for compiling test. Give back the cast so
compilation works again.

Fixes: 209043554915 ("spi: amd: Add support for version AMDI0062")
Signed-off-by: André Almeida <andrealmeid@collabora.com>
Link: https://lore.kernel.org/r/20220216162719.116062-1-andrealmeid@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index d909afac6e219..cba6a4486c24c 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -330,7 +330,7 @@ MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
 static struct platform_driver amd_spi_driver = {
 	.driver = {
 		.name = "amd_spi",
-		.acpi_match_table = spi_acpi_match,
+		.acpi_match_table = ACPI_PTR(spi_acpi_match),
 	},
 	.probe = amd_spi_probe,
 };
-- 
GitLab


From 54d0fd06e2bd52d3b17648de787157a7c0625adb Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Wed, 16 Feb 2022 11:13:17 +0200
Subject: [PATCH 0454/1586] spi: pxa2xx: Add support for Intel Raptor Lake
 PCH-S

Add support for LPSS SPI on Intel Raptor Lake PCH-S. It has four
controllers each having two chip selects.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Link: https://lore.kernel.org/r/20220216091317.1302254-1-jarkko.nikula@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index abb9f0ffd3773..edb42d08857d0 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1394,6 +1394,11 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
 	{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac6), LPSS_BXT_SSP },
+	/* RPL-S */
+	{ PCI_VDEVICE(INTEL, 0x7a2a), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x7a2b), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x7a79), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x7a7b), LPSS_CNL_SSP },
 	/* ADL-S */
 	{ PCI_VDEVICE(INTEL, 0x7aaa), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0x7aab), LPSS_CNL_SSP },
-- 
GitLab


From 08f253ec3767bcfafc5d32617a92cee57c63968e Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 15 Feb 2022 21:44:46 -0600
Subject: [PATCH 0455/1586] x86/cpu: Clear SME feature flag when not in use

Currently, the SME CPU feature flag is reflective of whether the CPU
supports the feature but not whether it has been activated by the
kernel.

Change this around to clear the SME feature flag if the kernel is not
using it so userspace can determine if it is available and in use
from /proc/cpuinfo.

As the feature flag is cleared on systems where SME isn't active, use
CPUID 0x8000001f to confirm SME availability before calling
native_wbinvd().

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20220216034446.2430634-1-mario.limonciello@amd.com
---
 arch/x86/kernel/cpu/amd.c | 5 +++++
 arch/x86/kernel/process.c | 5 ++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bad0fa4c17799..0c0b09796ced3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 	 *	      the SME physical address space reduction value.
 	 *	      If BIOS has not enabled SME then don't advertise the
 	 *	      SME feature (set in scattered.c).
+	 *	      If the kernel has not enabled SME via any means then
+	 *	      don't advertise the SME feature.
 	 *   For SEV: If BIOS has not enabled SEV then don't advertise the
 	 *            SEV and SEV_ES feature (set in scattered.c).
 	 *
@@ -578,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 		if (IS_ENABLED(CONFIG_X86_32))
 			goto clear_all;
 
+		if (!sme_me_mask)
+			setup_clear_cpu_cap(X86_FEATURE_SME);
+
 		rdmsrl(MSR_K7_HWCR, msr);
 		if (!(msr & MSR_K7_HWCR_SMMLOCK))
 			goto clear_sev;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 81d8ef036637c..e131d71b3cae9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy)
 	 * without the encryption bit, they don't race each other when flushed
 	 * and potentially end up with the wrong entry being committed to
 	 * memory.
+	 *
+	 * Test the CPUID bit directly because the machine might've cleared
+	 * X86_FEATURE_SME due to cmdline options.
 	 */
-	if (boot_cpu_has(X86_FEATURE_SME))
+	if (cpuid_eax(0x8000001f) & BIT(0))
 		native_wbinvd();
 	for (;;) {
 		/*
-- 
GitLab


From 3f51aa9e296fe4af785d5761bb12556fb2494761 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Wed, 9 Feb 2022 19:29:51 +0800
Subject: [PATCH 0456/1586] PM: hibernate: fix load_image_and_restore() error
 path

As 'swsusp_check' open 'hib_resume_bdev', if call 'create_basic_memory_bitmaps'
failed, we need to close 'hib_resume_bdev' in 'load_image_and_restore' function.

Signed-off-by: Ye Bin <yebin10@huawei.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e6af502c2fd77..49d1df0218cb8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -689,8 +689,10 @@ static int load_image_and_restore(void)
 
 	lock_device_hotplug();
 	error = create_basic_memory_bitmaps();
-	if (error)
+	if (error) {
+		swsusp_close(FMODE_READ | FMODE_EXCL);
 		goto Unlock;
+	}
 
 	error = swsusp_read(&flags);
 	swsusp_close(FMODE_READ | FMODE_EXCL);
-- 
GitLab


From e1be43d9b5d0d1310dbd90185a8e5c7145dde40f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 18 Sep 2021 15:17:53 -0700
Subject: [PATCH 0457/1586] overflow: Implement size_t saturating arithmetic
 helpers

In order to perform more open-coded replacements of common allocation
size arithmetic, the kernel needs saturating (SIZE_MAX) helpers for
multiplication, addition, and subtraction. For example, it is common in
allocators, especially on realloc, to add to an existing size:

    p = krealloc(map->patch,
                 sizeof(struct reg_sequence) * (map->patch_regs + num_regs),
                 GFP_KERNEL);

There is no existing saturating replacement for this calculation, and
just leaving the addition open coded inside array_size() could
potentially overflow as well. For example, an overflow in an expression
for a size_t argument might wrap to zero:

    array_size(anything, something_at_size_max + 1) == 0

Introduce size_mul(), size_add(), and size_sub() helpers that
implicitly promote arguments to size_t and saturated calculations for
use in allocations. With these helpers it is also possible to redefine
array_size(), array3_size(), flex_array_size(), and struct_size() in
terms of the new helpers.

As with the check_*_overflow() helpers, the new helpers use __must_check,
though what is really desired is a way to make sure that assignment is
only to a size_t lvalue. Without this, it's still possible to introduce
overflow/underflow via type conversion (i.e. from size_t to int).
Enforcing this will currently need to be left to static analysis or
future use of -Wconversion.

Additionally update the overflow unit tests to force runtime evaluation
for the pathological cases.

Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Len Baker <len.baker@gmx.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/process/deprecated.rst |  20 ++++-
 include/linux/overflow.h             | 110 +++++++++++++++++----------
 lib/test_overflow.c                  |  98 ++++++++++++++++++++++++
 3 files changed, 184 insertions(+), 44 deletions(-)

diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 388cb19f5dbb6..a6e36d9c3d14b 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -71,6 +71,9 @@ Instead, the 2-factor form of the allocator should be used::
 
 	foo = kmalloc_array(count, size, GFP_KERNEL);
 
+Specifically, kmalloc() can be replaced with kmalloc_array(), and
+kzalloc() can be replaced with kcalloc().
+
 If no 2-factor form is available, the saturate-on-overflow helpers should
 be used::
 
@@ -91,9 +94,20 @@ Instead, use the helper::
         array usage and switch to a `flexible array member
         <#zero-length-and-one-element-arrays>`_ instead.
 
-See array_size(), array3_size(), and struct_size(),
-for more details as well as the related check_add_overflow() and
-check_mul_overflow() family of functions.
+For other calculations, please compose the use of the size_mul(),
+size_add(), and size_sub() helpers. For example, in the case of::
+
+	foo = krealloc(current_size + chunk_size * (count - 3), GFP_KERNEL);
+
+Instead, use the helpers::
+
+	foo = krealloc(size_add(current_size,
+				size_mul(chunk_size,
+					 size_sub(count, 3))), GFP_KERNEL);
+
+For more details, also see array3_size() and flex_array_size(),
+as well as the related check_mul_overflow(), check_add_overflow(),
+check_sub_overflow(), and check_shl_overflow() family of functions.
 
 simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
 ----------------------------------------------------------------------
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 4669632bd72bc..59d7228104d02 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -118,81 +118,94 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 }))
 
 /**
- * array_size() - Calculate size of 2-dimensional array.
- *
- * @a: dimension one
- * @b: dimension two
+ * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
  *
- * Calculates size of 2-dimensional array: @a * @b.
+ * @factor1: first factor
+ * @factor2: second factor
  *
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
+ * Returns: calculate @factor1 * @factor2, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. The
+ * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline __must_check size_t array_size(size_t a, size_t b)
+static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
 {
 	size_t bytes;
 
-	if (check_mul_overflow(a, b, &bytes))
+	if (check_mul_overflow(factor1, factor2, &bytes))
 		return SIZE_MAX;
 
 	return bytes;
 }
 
 /**
- * array3_size() - Calculate size of 3-dimensional array.
+ * size_add() - Calculate size_t addition with saturation at SIZE_MAX
  *
- * @a: dimension one
- * @b: dimension two
- * @c: dimension three
- *
- * Calculates size of 3-dimensional array: @a * @b * @c.
+ * @addend1: first addend
+ * @addend2: second addend
  *
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
+ * Returns: calculate @addend1 + @addend2, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. The
+ * lvalue must be size_t to avoid implicit type conversion.
  */
-static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+static inline size_t __must_check size_add(size_t addend1, size_t addend2)
 {
 	size_t bytes;
 
-	if (check_mul_overflow(a, b, &bytes))
-		return SIZE_MAX;
-	if (check_mul_overflow(bytes, c, &bytes))
+	if (check_add_overflow(addend1, addend2, &bytes))
 		return SIZE_MAX;
 
 	return bytes;
 }
 
-/*
- * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
- * struct_size() below.
+/**
+ * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
+ *
+ * @minuend: value to subtract from
+ * @subtrahend: value to subtract from @minuend
+ *
+ * Returns: calculate @minuend - @subtrahend, both promoted to size_t,
+ * with any overflow causing the return value to be SIZE_MAX. For
+ * composition with the size_add() and size_mul() helpers, neither
+ * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
+ * The lvalue must be size_t to avoid implicit type conversion.
  */
-static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 {
 	size_t bytes;
 
-	if (check_mul_overflow(a, b, &bytes))
-		return SIZE_MAX;
-	if (check_add_overflow(bytes, c, &bytes))
+	if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
+	    check_sub_overflow(minuend, subtrahend, &bytes))
 		return SIZE_MAX;
 
 	return bytes;
 }
 
 /**
- * struct_size() - Calculate size of structure with trailing array.
- * @p: Pointer to the structure.
- * @member: Name of the array member.
- * @count: Number of elements in the array.
+ * array_size() - Calculate size of 2-dimensional array.
  *
- * Calculates size of memory needed for structure @p followed by an
- * array of @count number of @member elements.
+ * @a: dimension one
+ * @b: dimension two
  *
- * Return: number of bytes needed or SIZE_MAX on overflow.
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
  */
-#define struct_size(p, member, count)					\
-	__ab_c_size(count,						\
-		    sizeof(*(p)->member) + __must_be_array((p)->member),\
-		    sizeof(*(p)))
+#define array_size(a, b)	size_mul(a, b)
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+#define array3_size(a, b, c)	size_mul(size_mul(a, b), c)
 
 /**
  * flex_array_size() - Calculate size of a flexible array member
@@ -208,7 +221,22 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
  * Return: number of bytes needed or SIZE_MAX on overflow.
  */
 #define flex_array_size(p, member, count)				\
-	array_size(count,						\
-		    sizeof(*(p)->member) + __must_be_array((p)->member))
+	size_mul(count,							\
+		 sizeof(*(p)->member) + __must_be_array((p)->member))
+
+/**
+ * struct_size() - Calculate size of structure with trailing flexible array.
+ *
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @count number of @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, count)					\
+	size_add(sizeof(*(p)), flex_array_size(p, member, count))
 
 #endif /* __LINUX_OVERFLOW_H */
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
index cea37ae826153..712fb2351c271 100644
--- a/lib/test_overflow.c
+++ b/lib/test_overflow.c
@@ -594,12 +594,110 @@ static int __init test_overflow_allocation(void)
 	return err;
 }
 
+struct __test_flex_array {
+	unsigned long flags;
+	size_t count;
+	unsigned long data[];
+};
+
+static int __init test_overflow_size_helpers(void)
+{
+	struct __test_flex_array *obj;
+	int count = 0;
+	int err = 0;
+	int var;
+
+#define check_one_size_helper(expected, func, args...)	({	\
+	bool __failure = false;					\
+	size_t _r;						\
+								\
+	_r = func(args);					\
+	if (_r != (expected)) {					\
+		pr_warn("expected " #func "(" #args ") "	\
+			"to return %zu but got %zu instead\n",	\
+			(size_t)(expected), _r);		\
+		__failure = true;				\
+	}							\
+	count++;						\
+	__failure;						\
+})
+
+	var = 4;
+	err |= check_one_size_helper(20,       size_mul, var++, 5);
+	err |= check_one_size_helper(20,       size_mul, 4, var++);
+	err |= check_one_size_helper(0,	       size_mul, 0, 3);
+	err |= check_one_size_helper(0,	       size_mul, 3, 0);
+	err |= check_one_size_helper(6,	       size_mul, 2, 3);
+	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX,  1);
+	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX,  3);
+	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3);
+
+	var = 4;
+	err |= check_one_size_helper(9,        size_add, var++, 5);
+	err |= check_one_size_helper(9,        size_add, 4, var++);
+	err |= check_one_size_helper(9,	       size_add, 9, 0);
+	err |= check_one_size_helper(9,	       size_add, 0, 9);
+	err |= check_one_size_helper(5,	       size_add, 2, 3);
+	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  1);
+	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  3);
+	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3);
+
+	var = 4;
+	err |= check_one_size_helper(1,        size_sub, var--, 3);
+	err |= check_one_size_helper(1,        size_sub, 4, var--);
+	err |= check_one_size_helper(1,        size_sub, 3, 2);
+	err |= check_one_size_helper(9,	       size_sub, 9, 0);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX,  0);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, 0,  SIZE_MAX);
+	err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX);
+	err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1,  1);
+	err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1,  3);
+	err |= check_one_size_helper(1,		size_sub, SIZE_MAX - 1, -3);
+
+	var = 4;
+	err |= check_one_size_helper(4 * sizeof(*obj->data),
+				     flex_array_size, obj, data, var++);
+	err |= check_one_size_helper(5 * sizeof(*obj->data),
+				     flex_array_size, obj, data, var++);
+	err |= check_one_size_helper(0, flex_array_size, obj, data, 0);
+	err |= check_one_size_helper(sizeof(*obj->data),
+				     flex_array_size, obj, data, 1);
+	err |= check_one_size_helper(7 * sizeof(*obj->data),
+				     flex_array_size, obj, data, 7);
+	err |= check_one_size_helper(SIZE_MAX,
+				     flex_array_size, obj, data, -1);
+	err |= check_one_size_helper(SIZE_MAX,
+				     flex_array_size, obj, data, SIZE_MAX - 4);
+
+	var = 4;
+	err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
+				     struct_size, obj, data, var++);
+	err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
+				     struct_size, obj, data, var++);
+	err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0);
+	err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
+				     struct_size, obj, data, 1);
+	err |= check_one_size_helper(SIZE_MAX,
+				     struct_size, obj, data, -3);
+	err |= check_one_size_helper(SIZE_MAX,
+				     struct_size, obj, data, SIZE_MAX - 3);
+
+	pr_info("%d overflow size helper tests finished\n", count);
+
+	return err;
+}
+
 static int __init test_module_init(void)
 {
 	int err = 0;
 
 	err |= test_overflow_calculation();
 	err |= test_overflow_shift();
+	err |= test_overflow_size_helpers();
 	err |= test_overflow_allocation();
 
 	if (err) {
-- 
GitLab


From 230f6fa2c1db6a3f3e668cfe95995ac8e6eee212 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 9 Feb 2022 16:40:41 -0800
Subject: [PATCH 0458/1586] overflow: Provide constant expression struct_size

There have been cases where struct_size() (or flex_array_size()) needs
to be calculated for an initializer, which requires it be a constant
expression. This is possible when the "count" argument is a constant
expression, so provide this ability for the helpers.

Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Tested-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/20220210010407.GA701603@embeddedor
---
 include/linux/overflow.h | 10 +++++++---
 lib/test_overflow.c      | 26 +++++++++++++++++---------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 59d7228104d02..f1221d11f8e57 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -4,6 +4,7 @@
 
 #include <linux/compiler.h>
 #include <linux/limits.h>
+#include <linux/const.h>
 
 /*
  * We need to compute the minimum and maximum values representable in a given
@@ -221,8 +222,9 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * Return: number of bytes needed or SIZE_MAX on overflow.
  */
 #define flex_array_size(p, member, count)				\
-	size_mul(count,							\
-		 sizeof(*(p)->member) + __must_be_array((p)->member))
+	__builtin_choose_expr(__is_constexpr(count),			\
+		(count) * sizeof(*(p)->member) + __must_be_array((p)->member),	\
+		size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member)))
 
 /**
  * struct_size() - Calculate size of structure with trailing flexible array.
@@ -237,6 +239,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * Return: number of bytes needed or SIZE_MAX on overflow.
  */
 #define struct_size(p, member, count)					\
-	size_add(sizeof(*(p)), flex_array_size(p, member, count))
+	__builtin_choose_expr(__is_constexpr(count),			\
+		sizeof(*(p)) + flex_array_size(p, member, count),	\
+		size_add(sizeof(*(p)), flex_array_size(p, member, count)))
 
 #endif /* __LINUX_OVERFLOW_H */
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
index 712fb2351c271..f6530fce799db 100644
--- a/lib/test_overflow.c
+++ b/lib/test_overflow.c
@@ -602,10 +602,18 @@ struct __test_flex_array {
 
 static int __init test_overflow_size_helpers(void)
 {
+	/* Make sure struct_size() can be used in a constant expression. */
+	u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)];
 	struct __test_flex_array *obj;
 	int count = 0;
 	int err = 0;
 	int var;
+	volatile int unconst = 0;
+
+	/* Verify constant expression against runtime version. */
+	var = 55;
+	OPTIMIZER_HIDE_VAR(var);
+	err |= sizeof(ce_array) != struct_size(obj, data, var);
 
 #define check_one_size_helper(expected, func, args...)	({	\
 	bool __failure = false;					\
@@ -663,28 +671,28 @@ static int __init test_overflow_size_helpers(void)
 				     flex_array_size, obj, data, var++);
 	err |= check_one_size_helper(5 * sizeof(*obj->data),
 				     flex_array_size, obj, data, var++);
-	err |= check_one_size_helper(0, flex_array_size, obj, data, 0);
+	err |= check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst);
 	err |= check_one_size_helper(sizeof(*obj->data),
-				     flex_array_size, obj, data, 1);
+				     flex_array_size, obj, data, 1 + unconst);
 	err |= check_one_size_helper(7 * sizeof(*obj->data),
-				     flex_array_size, obj, data, 7);
+				     flex_array_size, obj, data, 7 + unconst);
 	err |= check_one_size_helper(SIZE_MAX,
-				     flex_array_size, obj, data, -1);
+				     flex_array_size, obj, data, -1 + unconst);
 	err |= check_one_size_helper(SIZE_MAX,
-				     flex_array_size, obj, data, SIZE_MAX - 4);
+				     flex_array_size, obj, data, SIZE_MAX - 4 + unconst);
 
 	var = 4;
 	err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
 				     struct_size, obj, data, var++);
 	err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
 				     struct_size, obj, data, var++);
-	err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0);
+	err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst);
 	err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
-				     struct_size, obj, data, 1);
+				     struct_size, obj, data, 1 + unconst);
 	err |= check_one_size_helper(SIZE_MAX,
-				     struct_size, obj, data, -3);
+				     struct_size, obj, data, -3 + unconst);
 	err |= check_one_size_helper(SIZE_MAX,
-				     struct_size, obj, data, SIZE_MAX - 3);
+				     struct_size, obj, data, SIZE_MAX - 3 + unconst);
 
 	pr_info("%d overflow size helper tests finished\n", count);
 
-- 
GitLab


From 248c793359daacd826a7507a258ffe41653efef7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 11:05:36 +0100
Subject: [PATCH 0459/1586] blk-mq: make the blk-mq stacking code optional

The code to stack blk-mq drivers is only used by dm-multipath, and
will preferably stay that way.  Make it optional and only selected
by device mapper, so that the buildbots more easily catch abuses
like the one that slipped in in the ufs driver in the last merged
window.  Another positive side effects is that kernel builds without
device mapper shrink a little bit as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220215100540.3892965-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig      | 3 +++
 block/blk-mq.c     | 2 ++
 drivers/md/Kconfig | 1 +
 3 files changed, 6 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 205f8d01c6952..168b873eb666d 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -230,6 +230,9 @@ config BLK_PM
 config BLOCK_HOLDER_DEPRECATED
 	bool
 
+config BLK_MQ_STACKING
+	bool
+
 source "block/Kconfig.iosched"
 
 endif # BLOCK
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6c59ffe765fde..db62d34afb637 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2840,6 +2840,7 @@ void blk_mq_submit_bio(struct bio *bio)
 				blk_mq_try_issue_directly(rq->mq_hctx, rq));
 }
 
+#ifdef CONFIG_BLK_MQ_STACKING
 /**
  * blk_cloned_rq_check_limits - Helper function to check a cloned request
  *                              for the new queue limits
@@ -3017,6 +3018,7 @@ free_and_out:
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+#endif /* CONFIG_BLK_MQ_STACKING */
 
 /*
  * Steal bios from a request and add them to a bio list.
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b5ea378e66cb1..998a5cfdbc4e9 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -204,6 +204,7 @@ config BLK_DEV_DM
 	tristate "Device mapper support"
 	select BLOCK_HOLDER_DEPRECATED if SYSFS
 	select BLK_DEV_DM_BUILTIN
+	select BLK_MQ_STACKING
 	depends on DAX || DAX=n
 	help
 	  Device-mapper is a low level volume manager.  It works by allowing
-- 
GitLab


From a5efda3c46a1db9a579d953667906933d5037bf9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 11:05:37 +0100
Subject: [PATCH 0460/1586] blk-mq: fold blk_cloned_rq_check_limits into
 blk_insert_cloned_request

Fold blk_cloned_rq_check_limits into its only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220215100540.3892965-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 38 +++++---------------------------------
 1 file changed, 5 insertions(+), 33 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index db62d34afb637..fc132933397fb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2842,26 +2842,14 @@ void blk_mq_submit_bio(struct bio *bio)
 
 #ifdef CONFIG_BLK_MQ_STACKING
 /**
- * blk_cloned_rq_check_limits - Helper function to check a cloned request
- *                              for the new queue limits
- * @q:  the queue
- * @rq: the request being checked
- *
- * Description:
- *    @rq may have been made based on weaker limitations of upper-level queues
- *    in request stacking drivers, and it may violate the limitation of @q.
- *    Since the block layer and the underlying device driver trust @rq
- *    after it is inserted to @q, it should be checked against @q before
- *    the insertion using this generic function.
- *
- *    Request stacking drivers like request-based dm may change the queue
- *    limits when retrying requests on other queues. Those requests need
- *    to be checked against the new queue limits again during dispatch.
+ * blk_insert_cloned_request - Helper for stacking drivers to submit a request
+ * @q:  the queue to submit the request
+ * @rq: the request being queued
  */
-static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
-				      struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+	blk_status_t ret;
 
 	if (blk_rq_sectors(rq) > max_sectors) {
 		/*
@@ -2893,22 +2881,6 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
 		return BLK_STS_IOERR;
 	}
 
-	return BLK_STS_OK;
-}
-
-/**
- * blk_insert_cloned_request - Helper for stacking drivers to submit a request
- * @q:  the queue to submit the request
- * @rq: the request being queued
- */
-blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
-{
-	blk_status_t ret;
-
-	ret = blk_cloned_rq_check_limits(q, rq);
-	if (ret != BLK_STS_OK)
-		return ret;
-
 	if (rq->q->disk &&
 	    should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq)))
 		return BLK_STS_IOERR;
-- 
GitLab


From 28db4711bf48303814dcfd8d41a41106e90bc374 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 11:05:38 +0100
Subject: [PATCH 0461/1586] blk-mq: remove the request_queue argument to
 blk_insert_cloned_request

The request must be submitted to the queue it was allocated for, so
remove the extra request_queue argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220215100540.3892965-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 9 ++++-----
 drivers/md/dm-rq.c     | 2 +-
 include/linux/blk-mq.h | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index fc132933397fb..886836a54064c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2843,11 +2843,11 @@ void blk_mq_submit_bio(struct bio *bio)
 #ifdef CONFIG_BLK_MQ_STACKING
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
- * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request *rq)
 {
+	struct request_queue *q = rq->q;
 	unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
 	blk_status_t ret;
 
@@ -2881,8 +2881,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 		return BLK_STS_IOERR;
 	}
 
-	if (rq->q->disk &&
-	    should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq)))
+	if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
 		return BLK_STS_IOERR;
 
 	if (blk_crypto_insert_cloned_request(rq))
@@ -2895,7 +2894,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 	 * bypass a potential scheduler on the bottom device for
 	 * insert.
 	 */
-	blk_mq_run_dispatch_ops(rq->q,
+	blk_mq_run_dispatch_ops(q,
 			ret = blk_mq_request_issue_directly(rq, true));
 	if (ret)
 		blk_account_io_done(rq, ktime_get_ns());
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 579ab6183d4d8..2fcc9b7f391b3 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -311,7 +311,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
 		clone->rq_flags |= RQF_IO_STAT;
 
 	clone->start_time_ns = ktime_get_ns();
-	r = blk_insert_cloned_request(clone->q, clone);
+	r = blk_insert_cloned_request(clone);
 	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 		/* must complete clone in terms of original request */
 		dm_complete_request(rq, r);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d319ffa59354a..3a41d50b85d3a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -952,8 +952,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		struct bio_set *bs, gfp_t gfp_mask,
 		int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
 void blk_rq_unprep_clone(struct request *rq);
-blk_status_t blk_insert_cloned_request(struct request_queue *q,
-		struct request *rq);
+blk_status_t blk_insert_cloned_request(struct request *rq);
 
 struct rq_map_data {
 	struct page **pages;
-- 
GitLab


From 8803c89f365b344859decd5b3074e9bb3b65caa1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 11:05:39 +0100
Subject: [PATCH 0462/1586] dm: remove useless code from
 dm_dispatch_clone_request

Both ->start_time_ns and the RQF_IO_STAT are set when the request is
allocated using blk_mq_alloc_request by dm-mpath in blk_mq_rq_ctx_init.
The block layer also ensures ->start_time_ns is only set when actually
needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220215100540.3892965-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-rq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 2fcc9b7f391b3..8f6117342d322 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -307,10 +307,6 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
 {
 	blk_status_t r;
 
-	if (blk_queue_io_stat(clone->q))
-		clone->rq_flags |= RQF_IO_STAT;
-
-	clone->start_time_ns = ktime_get_ns();
 	r = blk_insert_cloned_request(clone);
 	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 		/* must complete clone in terms of original request */
-- 
GitLab


From 9f9adea7187ec1978bd3863f59b7fe27ccf33519 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 11:05:40 +0100
Subject: [PATCH 0463/1586] dm: remove dm_dispatch_clone_request

Fold dm_dispatch_clone_request into it's only caller, and use a switch
statement to single dispatch for the handling of the different return
values from blk_insert_cloned_request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Link: https://lore.kernel.org/r/20220215100540.3892965-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-rq.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 8f6117342d322..6948d5db90925 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -303,17 +303,6 @@ static void end_clone_request(struct request *clone, blk_status_t error)
 	dm_complete_request(tio->orig, error);
 }
 
-static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
-{
-	blk_status_t r;
-
-	r = blk_insert_cloned_request(clone);
-	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
-		/* must complete clone in terms of original request */
-		dm_complete_request(rq, r);
-	return r;
-}
-
 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
 				 void *data)
 {
@@ -394,13 +383,20 @@ static int map_request(struct dm_rq_target_io *tio)
 		/* The target has remapped the I/O so dispatch it */
 		trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
 				     blk_rq_pos(rq));
-		ret = dm_dispatch_clone_request(clone, rq);
-		if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
+		ret = blk_insert_cloned_request(clone);
+		switch (ret) {
+		case BLK_STS_OK:
+			break;
+		case BLK_STS_RESOURCE:
+		case BLK_STS_DEV_RESOURCE:
 			blk_rq_unprep_clone(clone);
 			blk_mq_cleanup_rq(clone);
 			tio->ti->type->release_clone_rq(clone, &tio->info);
 			tio->clone = NULL;
 			return DM_MAPIO_REQUEUE;
+		default:
+			/* must complete clone in terms of original request */
+			dm_complete_request(rq, ret);
 		}
 		break;
 	case DM_MAPIO_REQUEUE:
-- 
GitLab


From a650628bde77f6ac5b1d532092346feff7b58c52 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:07 +0800
Subject: [PATCH 0464/1586] block: move submit_bio_checks() into
 submit_bio_noacct

It is more clean & readable to check bio when starting to submit it,
instead of just before calling ->submit_bio() or blk_mq_submit_bio().

Also it provides us chance to optimize bio submission without checking
bio.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 5a4a590416298..d4a023667ac11 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -797,9 +797,6 @@ static void __submit_bio(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
 
-	if (unlikely(!submit_bio_checks(bio)))
-		return;
-
 	if (!disk->fops->submit_bio)
 		blk_mq_submit_bio(bio);
 	else
@@ -893,6 +890,9 @@ static void __submit_bio_noacct_mq(struct bio *bio)
  */
 void submit_bio_noacct(struct bio *bio)
 {
+	if (unlikely(!submit_bio_checks(bio)))
+		return;
+
 	/*
 	 * We only want one ->submit_bio to be active at a time, else stack
 	 * usage with stacked devices could be a problem.  Use current->bio_list
-- 
GitLab


From 7f36b7d02a287ed18d02ae821868aa07b0235521 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:08 +0800
Subject: [PATCH 0465/1586] block: move blk_crypto_bio_prep() out of blk-mq.c

blk_crypto_bio_prep() is called for both bio based and blk-mq drivers,
so move it out of blk-mq.c, then we can unify this kind of handling.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 21 ++++++++-------------
 block/blk-mq.c   |  3 ---
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d4a023667ac11..f03fff1fa3919 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -783,24 +783,19 @@ end_io:
 	return false;
 }
 
-static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
-{
-	if (blk_crypto_bio_prep(&bio)) {
-		if (likely(bio_queue_enter(bio) == 0)) {
-			disk->fops->submit_bio(bio);
-			blk_queue_exit(disk->queue);
-		}
-	}
-}
-
 static void __submit_bio(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
 
-	if (!disk->fops->submit_bio)
+	if (unlikely(!blk_crypto_bio_prep(&bio)))
+		return;
+
+	if (!disk->fops->submit_bio) {
 		blk_mq_submit_bio(bio);
-	else
-		__submit_bio_fops(disk, bio);
+	} else if (likely(bio_queue_enter(bio) == 0)) {
+		disk->fops->submit_bio(bio);
+		blk_queue_exit(disk->queue);
+	}
 }
 
 /*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 886836a54064c..7ca0b47246a61 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2788,9 +2788,6 @@ void blk_mq_submit_bio(struct bio *bio)
 	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
-	if (unlikely(!blk_crypto_bio_prep(&bio)))
-		return;
-
 	blk_queue_bounce(q, &bio);
 	if (blk_may_split(q, bio))
 		__blk_queue_split(q, &bio, &nr_segs);
-- 
GitLab


From 29ff23624e21c89d3321d6429dec8ad3847b534a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:09 +0800
Subject: [PATCH 0466/1586] block: don't declare submit_bio_checks in local
 header

submit_bio_checks() won't be called outside of block/blk-core.c any more
since commit 9d497e2941c3 ("block: don't protect submit_bio_checks by
q_usage_counter"), so mark it as one local helper.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 2 +-
 block/blk.h      | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f03fff1fa3919..5248b94d276bd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -676,7 +676,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 	return BLK_STS_OK;
 }
 
-noinline_for_stack bool submit_bio_checks(struct bio *bio)
+static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
diff --git a/block/blk.h b/block/blk.h
index abb663a2a147b..b2516cb4f98e6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -46,7 +46,6 @@ void blk_freeze_queue(struct request_queue *q);
 void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
 void blk_queue_start_drain(struct request_queue *q);
 int __bio_queue_enter(struct request_queue *q, struct bio *bio);
-bool submit_bio_checks(struct bio *bio);
 
 static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
 {
-- 
GitLab


From 3f98c753717c600eb5708e9b78b3eba6664bddf1 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:10 +0800
Subject: [PATCH 0467/1586] block: don't check bio in
 blk_throtl_dispatch_work_fn

The bio has been checked already before throttling, so no need to check
it again before dispatching it from throttle queue.

Add a helper of submit_bio_noacct_nocheck() for this purpose.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220216044514.2903784-5-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c     | 30 +++++++++++++++++-------------
 block/blk-throttle.c |  2 +-
 block/blk.h          |  1 +
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 5248b94d276bd..72b7b2214c708 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -874,20 +874,8 @@ static void __submit_bio_noacct_mq(struct bio *bio)
 	current->bio_list = NULL;
 }
 
-/**
- * submit_bio_noacct - re-submit a bio to the block device layer for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers.  All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
- */
-void submit_bio_noacct(struct bio *bio)
+void submit_bio_noacct_nocheck(struct bio *bio)
 {
-	if (unlikely(!submit_bio_checks(bio)))
-		return;
-
 	/*
 	 * We only want one ->submit_bio to be active at a time, else stack
 	 * usage with stacked devices could be a problem.  Use current->bio_list
@@ -901,6 +889,22 @@ void submit_bio_noacct(struct bio *bio)
 	else
 		__submit_bio_noacct(bio);
 }
+
+/**
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
+ * @bio:  The bio describing the location in memory and on the device.
+ *
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers.  All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
+ */
+void submit_bio_noacct(struct bio *bio)
+{
+	if (unlikely(!submit_bio_checks(bio)))
+		return;
+	submit_bio_noacct_nocheck(bio);
+}
 EXPORT_SYMBOL(submit_bio_noacct);
 
 /**
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 73640d80e99ec..8770768f10009 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1218,7 +1218,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
 	if (!bio_list_empty(&bio_list_on_stack)) {
 		blk_start_plug(&plug);
 		while ((bio = bio_list_pop(&bio_list_on_stack)))
-			submit_bio_noacct(bio);
+			submit_bio_noacct_nocheck(bio);
 		blk_finish_plug(&plug);
 	}
 }
diff --git a/block/blk.h b/block/blk.h
index b2516cb4f98e6..ebaa59ca46ca6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -46,6 +46,7 @@ void blk_freeze_queue(struct request_queue *q);
 void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
 void blk_queue_start_drain(struct request_queue *q);
 int __bio_queue_enter(struct request_queue *q, struct bio *bio);
+void submit_bio_noacct_nocheck(struct bio *bio);
 
 static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
 {
-- 
GitLab


From d24c670ec1f9f1dc320e59004e61f3491ae24546 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:11 +0800
Subject: [PATCH 0468/1586] block: merge submit_bio_checks() into
 submit_bio_noacct

Now submit_bio_checks() is only called by submit_bio_noacct(), so merge
it into submit_bio_noacct().

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-6-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 209 +++++++++++++++++++++++------------------------
 1 file changed, 101 insertions(+), 108 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 72b7b2214c708..94bf37f8e61d2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -676,113 +676,6 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 	return BLK_STS_OK;
 }
 
-static noinline_for_stack bool submit_bio_checks(struct bio *bio)
-{
-	struct block_device *bdev = bio->bi_bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-	blk_status_t status = BLK_STS_IOERR;
-	struct blk_plug *plug;
-
-	might_sleep();
-
-	plug = blk_mq_plug(q, bio);
-	if (plug && plug->nowait)
-		bio->bi_opf |= REQ_NOWAIT;
-
-	/*
-	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
-	 * if queue does not support NOWAIT.
-	 */
-	if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
-		goto not_supported;
-
-	if (should_fail_bio(bio))
-		goto end_io;
-	if (unlikely(bio_check_ro(bio)))
-		goto end_io;
-	if (!bio_flagged(bio, BIO_REMAPPED)) {
-		if (unlikely(bio_check_eod(bio)))
-			goto end_io;
-		if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
-			goto end_io;
-	}
-
-	/*
-	 * Filter flush bio's early so that bio based drivers without flush
-	 * support don't have to worry about them.
-	 */
-	if (op_is_flush(bio->bi_opf) &&
-	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
-		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
-		if (!bio_sectors(bio)) {
-			status = BLK_STS_OK;
-			goto end_io;
-		}
-	}
-
-	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-		bio_clear_polled(bio);
-
-	switch (bio_op(bio)) {
-	case REQ_OP_DISCARD:
-		if (!blk_queue_discard(q))
-			goto not_supported;
-		break;
-	case REQ_OP_SECURE_ERASE:
-		if (!blk_queue_secure_erase(q))
-			goto not_supported;
-		break;
-	case REQ_OP_WRITE_SAME:
-		if (!q->limits.max_write_same_sectors)
-			goto not_supported;
-		break;
-	case REQ_OP_ZONE_APPEND:
-		status = blk_check_zone_append(q, bio);
-		if (status != BLK_STS_OK)
-			goto end_io;
-		break;
-	case REQ_OP_ZONE_RESET:
-	case REQ_OP_ZONE_OPEN:
-	case REQ_OP_ZONE_CLOSE:
-	case REQ_OP_ZONE_FINISH:
-		if (!blk_queue_is_zoned(q))
-			goto not_supported;
-		break;
-	case REQ_OP_ZONE_RESET_ALL:
-		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
-			goto not_supported;
-		break;
-	case REQ_OP_WRITE_ZEROES:
-		if (!q->limits.max_write_zeroes_sectors)
-			goto not_supported;
-		break;
-	default:
-		break;
-	}
-
-	if (blk_throtl_bio(bio))
-		return false;
-
-	blk_cgroup_bio_start(bio);
-	blkcg_bio_issue_init(bio);
-
-	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-		trace_block_bio_queue(bio);
-		/* Now that enqueuing has been traced, we need to trace
-		 * completion as well.
-		 */
-		bio_set_flag(bio, BIO_TRACE_COMPLETION);
-	}
-	return true;
-
-not_supported:
-	status = BLK_STS_NOTSUPP;
-end_io:
-	bio->bi_status = status;
-	bio_endio(bio);
-	return false;
-}
-
 static void __submit_bio(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
@@ -901,9 +794,109 @@ void submit_bio_noacct_nocheck(struct bio *bio)
  */
 void submit_bio_noacct(struct bio *bio)
 {
-	if (unlikely(!submit_bio_checks(bio)))
+	struct block_device *bdev = bio->bi_bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	blk_status_t status = BLK_STS_IOERR;
+	struct blk_plug *plug;
+
+	might_sleep();
+
+	plug = blk_mq_plug(q, bio);
+	if (plug && plug->nowait)
+		bio->bi_opf |= REQ_NOWAIT;
+
+	/*
+	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+	 * if queue does not support NOWAIT.
+	 */
+	if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
+		goto not_supported;
+
+	if (should_fail_bio(bio))
+		goto end_io;
+	if (unlikely(bio_check_ro(bio)))
+		goto end_io;
+	if (!bio_flagged(bio, BIO_REMAPPED)) {
+		if (unlikely(bio_check_eod(bio)))
+			goto end_io;
+		if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
+			goto end_io;
+	}
+
+	/*
+	 * Filter flush bio's early so that bio based drivers without flush
+	 * support don't have to worry about them.
+	 */
+	if (op_is_flush(bio->bi_opf) &&
+	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
+		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
+		if (!bio_sectors(bio)) {
+			status = BLK_STS_OK;
+			goto end_io;
+		}
+	}
+
+	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		bio_clear_polled(bio);
+
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+		if (!blk_queue_discard(q))
+			goto not_supported;
+		break;
+	case REQ_OP_SECURE_ERASE:
+		if (!blk_queue_secure_erase(q))
+			goto not_supported;
+		break;
+	case REQ_OP_WRITE_SAME:
+		if (!q->limits.max_write_same_sectors)
+			goto not_supported;
+		break;
+	case REQ_OP_ZONE_APPEND:
+		status = blk_check_zone_append(q, bio);
+		if (status != BLK_STS_OK)
+			goto end_io;
+		break;
+	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_OPEN:
+	case REQ_OP_ZONE_CLOSE:
+	case REQ_OP_ZONE_FINISH:
+		if (!blk_queue_is_zoned(q))
+			goto not_supported;
+		break;
+	case REQ_OP_ZONE_RESET_ALL:
+		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+			goto not_supported;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		if (!q->limits.max_write_zeroes_sectors)
+			goto not_supported;
+		break;
+	default:
+		break;
+	}
+
+	if (blk_throtl_bio(bio))
 		return;
+
+	blk_cgroup_bio_start(bio);
+	blkcg_bio_issue_init(bio);
+
+	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+		trace_block_bio_queue(bio);
+		/* Now that enqueuing has been traced, we need to trace
+		 * completion as well.
+		 */
+		bio_set_flag(bio, BIO_TRACE_COMPLETION);
+	}
 	submit_bio_noacct_nocheck(bio);
+	return;
+
+not_supported:
+	status = BLK_STS_NOTSUPP;
+end_io:
+	bio->bi_status = status;
+	bio_endio(bio);
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
-- 
GitLab


From 9f5ede3c01f9951b0ae7d68b28762ad51d9bacc8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:12 +0800
Subject: [PATCH 0469/1586] block: throttle split bio in case of iops limit

Commit 111be8839817 ("block-throttle: avoid double charge") marks bio as
BIO_THROTTLED unconditionally if __blk_throtl_bio() is called on this bio,
then this bio won't be called into __blk_throtl_bio() any more. This way
is to avoid double charge in case of bio splitting. It is reasonable for
read/write throughput limit, but not reasonable for IOPS limit because
block layer provides io accounting against split bio.

Chunguang Xu has already observed this issue and fixed it in commit
4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO scenarios").
However, that patch only covers bio splitting in __blk_queue_split(), and
we have other kind of bio splitting, such as bio_split() &
submit_bio_noacct() and other ways.

This patch tries to fix the issue in one generic way by always charging
the bio for iops limit in blk_throtl_bio(). This way is reasonable:
re-submission & fast-cloned bio is charged if it is submitted to same
disk/queue, and BIO_THROTTLED will be cleared if bio->bi_bdev is changed.

This new approach can get much more smooth/stable iops limit compared with
commit 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large IO
scenarios") since that commit can't throttle current split bios actually.

Also this way won't cause new double bio iops charge in
blk_throtl_dispatch_work_fn() in which blk_throtl_bio() won't be called
any more.

Reported-by: Ning Li <lining2020x@163.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Chunguang Xu <brookxu@tencent.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-7-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c    |  2 --
 block/blk-throttle.c | 10 +++++++---
 block/blk-throttle.h |  2 --
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4de34a332c9fd..f5255991b773c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -368,8 +368,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
 		trace_block_split(split, (*bio)->bi_iter.bi_sector);
 		submit_bio_noacct(*bio);
 		*bio = split;
-
-		blk_throtl_charge_bio_split(*bio);
 	}
 }
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8770768f10009..c7aa26d52e845 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -807,7 +807,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 	unsigned int bio_size = throtl_bio_data_size(bio);
 
-	if (bps_limit == U64_MAX) {
+	/* no need to throttle if this bio's bytes have been accounted */
+	if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
 		if (wait)
 			*wait = 0;
 		return true;
@@ -919,9 +920,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	unsigned int bio_size = throtl_bio_data_size(bio);
 
 	/* Charge the bio to the group */
-	tg->bytes_disp[rw] += bio_size;
+	if (!bio_flagged(bio, BIO_THROTTLED)) {
+		tg->bytes_disp[rw] += bio_size;
+		tg->last_bytes_disp[rw] += bio_size;
+	}
+
 	tg->io_disp[rw]++;
-	tg->last_bytes_disp[rw] += bio_size;
 	tg->last_io_disp[rw]++;
 
 	/*
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 175f03abd9e41..cb43f4417d6ea 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -170,8 +170,6 @@ static inline bool blk_throtl_bio(struct bio *bio)
 {
 	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
 
-	if (bio_flagged(bio, BIO_THROTTLED))
-		return false;
 	if (!tg->has_rules[bio_data_dir(bio)])
 		return false;
 
-- 
GitLab


From 5a93b6027eb4ef5db60a4bc5bdbeba5fb9f29384 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:13 +0800
Subject: [PATCH 0470/1586] block: don't try to throttle split bio if iops
 limit isn't set

We need to throttle split bio in case of IOPS limit even though the
split bio has been marked as BIO_THROTTLED since block layer
accounts split bio actually.

If only throughput throttle is setup, no need to throttle any more
if BIO_THROTTLED is set since we have accounted & considered the
whole bio bytes already.

Add one flag of THROTL_TG_HAS_IOPS_LIMIT for serving this purpose.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-8-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 21 ++++++++++++++-------
 block/blk-throttle.h | 11 +++++++++++
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c7aa26d52e845..ec72eced24d2c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -41,11 +41,6 @@
 /* A workqueue to queue throttle related work */
 static struct workqueue_struct *kthrotld_workqueue;
 
-enum tg_state_flags {
-	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
-	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
-};
-
 #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
 
 /* We measure latency for request size from <= 4k to >= 1M */
@@ -425,12 +420,24 @@ static void tg_update_has_rules(struct throtl_grp *tg)
 	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
 	struct throtl_data *td = tg->td;
 	int rw;
+	int has_iops_limit = 0;
+
+	for (rw = READ; rw <= WRITE; rw++) {
+		unsigned int iops_limit = tg_iops_limit(tg, rw);
 
-	for (rw = READ; rw <= WRITE; rw++)
 		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
 			(td->limit_valid[td->limit_index] &&
 			 (tg_bps_limit(tg, rw) != U64_MAX ||
-			  tg_iops_limit(tg, rw) != UINT_MAX));
+			  iops_limit != UINT_MAX));
+
+		if (iops_limit != UINT_MAX)
+			has_iops_limit = 1;
+	}
+
+	if (has_iops_limit)
+		tg->flags |= THROTL_TG_HAS_IOPS_LIMIT;
+	else
+		tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT;
 }
 
 static void throtl_pd_online(struct blkg_policy_data *pd)
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index cb43f4417d6ea..c996a15f290e8 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -52,6 +52,12 @@ struct throtl_service_queue {
 	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
 };
 
+enum tg_state_flags {
+	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
+	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
+	THROTL_TG_HAS_IOPS_LIMIT = 1 << 2,	/* tg has iops limit */
+};
+
 enum {
 	LIMIT_LOW,
 	LIMIT_MAX,
@@ -170,6 +176,11 @@ static inline bool blk_throtl_bio(struct bio *bio)
 {
 	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
 
+	/* no need to throttle bps any more if the bio has been throttled */
+	if (bio_flagged(bio, BIO_THROTTLED) &&
+	    !(tg->flags & THROTL_TG_HAS_IOPS_LIMIT))
+		return false;
+
 	if (!tg->has_rules[bio_data_dir(bio)])
 		return false;
 
-- 
GitLab


From 34841e6fb125aa3f0e33e4eaac9f5eb86b2bb34b Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 16 Feb 2022 12:45:14 +0800
Subject: [PATCH 0471/1586] block: revert 4f1e9630afe6 ("blk-throtl: optimize
 IOPS throttle for large IO scenarios")

Revert commit 4f1e9630afe6 ("blk-throtl: optimize IOPS throttle for large
IO scenarios") since we have another easier way to address this issue and
get better iops throttling result.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220216044514.2903784-9-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 28 ----------------------------
 block/blk-throttle.h |  5 -----
 2 files changed, 33 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index ec72eced24d2c..a3b3ebc72dd47 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -640,8 +640,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
 	tg->bytes_disp[rw] = 0;
 	tg->io_disp[rw] = 0;
 
-	atomic_set(&tg->io_split_cnt[rw], 0);
-
 	/*
 	 * Previous slice has expired. We must have trimmed it after last
 	 * bio dispatch. That means since start of last slice, we never used
@@ -665,8 +663,6 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
 	tg->slice_start[rw] = jiffies;
 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
 
-	atomic_set(&tg->io_split_cnt[rw], 0);
-
 	throtl_log(&tg->service_queue,
 		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -900,9 +896,6 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 				jiffies + tg->td->throtl_slice);
 	}
 
-	if (iops_limit != UINT_MAX)
-		tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
-
 	if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
 	    tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
 		if (wait)
@@ -1927,14 +1920,12 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
 	}
 
 	if (tg->iops[READ][LIMIT_LOW]) {
-		tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
 		iops = tg->last_io_disp[READ] * HZ / elapsed_time;
 		if (iops >= tg->iops[READ][LIMIT_LOW])
 			tg->last_low_overflow_time[READ] = now;
 	}
 
 	if (tg->iops[WRITE][LIMIT_LOW]) {
-		tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
 		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
 		if (iops >= tg->iops[WRITE][LIMIT_LOW])
 			tg->last_low_overflow_time[WRITE] = now;
@@ -2053,25 +2044,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
-void blk_throtl_charge_bio_split(struct bio *bio)
-{
-	struct blkcg_gq *blkg = bio->bi_blkg;
-	struct throtl_grp *parent = blkg_to_tg(blkg);
-	struct throtl_service_queue *parent_sq;
-	bool rw = bio_data_dir(bio);
-
-	do {
-		if (!parent->has_rules[rw])
-			break;
-
-		atomic_inc(&parent->io_split_cnt[rw]);
-		atomic_inc(&parent->last_io_split_cnt[rw]);
-
-		parent_sq = parent->service_queue.parent_sq;
-		parent = sq_to_tg(parent_sq);
-	} while (parent);
-}
-
 bool __blk_throtl_bio(struct bio *bio)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index c996a15f290e8..b23a9f3abb824 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -138,9 +138,6 @@ struct throtl_grp {
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
 	unsigned long bio_cnt_reset_time;
 
-	atomic_t io_split_cnt[2];
-	atomic_t last_io_split_cnt[2];
-
 	struct blkg_rwstat stat_bytes;
 	struct blkg_rwstat stat_ios;
 };
@@ -164,13 +161,11 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
 static inline void blk_throtl_register_queue(struct request_queue *q) { }
-static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
 static inline bool blk_throtl_bio(struct bio *bio) { return false; }
 #else /* CONFIG_BLK_DEV_THROTTLING */
 int blk_throtl_init(struct request_queue *q);
 void blk_throtl_exit(struct request_queue *q);
 void blk_throtl_register_queue(struct request_queue *q);
-void blk_throtl_charge_bio_split(struct bio *bio);
 bool __blk_throtl_bio(struct bio *bio);
 static inline bool blk_throtl_bio(struct bio *bio)
 {
-- 
GitLab


From 76792055c4c8b2472ca1ae48e0ddaf8497529f08 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 10:45:10 +0100
Subject: [PATCH 0472/1586] block: add a ->free_disk method

Add a method to notify the driver that the gendisk is about to be freed.
This allows drivers to tie the lifetime of their private data to that of
the gendisk and thus deal with device removal races without expensive
synchronization and boilerplate code.

A new flag is added so that ->free_disk is only called after a successful
call to add_disk, which significantly simplifies the error handling path
during probing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220215094514.3828912-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          | 5 +++++
 include/linux/blkdev.h | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 9589d1d59afab..e351fac41bf25 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -526,6 +526,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 
 	disk_update_readahead(disk);
 	disk_add_events(disk);
+	set_bit(GD_ADDED, &disk->state);
 	return 0;
 
 out_unregister_bdi:
@@ -1119,6 +1120,10 @@ static void disk_release(struct device *dev)
 	xa_destroy(&disk->part_tbl);
 	disk->queue->disk = NULL;
 	blk_put_queue(disk->queue);
+
+	if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
+		disk->fops->free_disk(disk);
+
 	iput(disk->part0->bd_inode);	/* frees the disk */
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3bfc75a2a4509..f757f9c2871f8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -146,6 +146,7 @@ struct gendisk {
 #define GD_READ_ONLY			1
 #define GD_DEAD				2
 #define GD_NATIVE_CAPACITY		3
+#define GD_ADDED			4
 
 	struct mutex open_mutex;	/* open/close mutex */
 	unsigned open_partitions;	/* number of open partitions */
@@ -1464,6 +1465,7 @@ struct block_device_operations {
 	void (*unlock_native_capacity) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	int (*set_read_only)(struct block_device *bdev, bool ro);
+	void (*free_disk)(struct gendisk *disk);
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
-- 
GitLab


From e2efa0796607efe60c708271be483c3a2b0128de Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 10:45:11 +0100
Subject: [PATCH 0473/1586] memstick/ms_block: simplify refcounting

Implement the ->free_disk method to free the msb_data structure only once
the last gendisk reference goes away instead of keeping a local refcount.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220215094514.3828912-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/memstick/core/ms_block.c | 64 ++++++++------------------------
 drivers/memstick/core/ms_block.h |  1 -
 2 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 0cda6c6baefc3..3993bdd4b519c 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1943,22 +1943,6 @@ static void msb_io_work(struct work_struct *work)
 static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
 static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */
 
-static int msb_bd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct msb_data *msb = disk->private_data;
-
-	dbg_verbose("block device open");
-
-	mutex_lock(&msb_disk_lock);
-
-	if (msb && msb->card)
-		msb->usage_count++;
-
-	mutex_unlock(&msb_disk_lock);
-	return 0;
-}
-
 static void msb_data_clear(struct msb_data *msb)
 {
 	kfree(msb->boot_page);
@@ -1968,33 +1952,6 @@ static void msb_data_clear(struct msb_data *msb)
 	msb->card = NULL;
 }
 
-static int msb_disk_release(struct gendisk *disk)
-{
-	struct msb_data *msb = disk->private_data;
-
-	dbg_verbose("block device release");
-	mutex_lock(&msb_disk_lock);
-
-	if (msb) {
-		if (msb->usage_count)
-			msb->usage_count--;
-
-		if (!msb->usage_count) {
-			disk->private_data = NULL;
-			idr_remove(&msb_disk_idr, msb->disk_id);
-			put_disk(disk);
-			kfree(msb);
-		}
-	}
-	mutex_unlock(&msb_disk_lock);
-	return 0;
-}
-
-static void msb_bd_release(struct gendisk *disk, fmode_t mode)
-{
-	msb_disk_release(disk);
-}
-
 static int msb_bd_getgeo(struct block_device *bdev,
 				 struct hd_geometry *geo)
 {
@@ -2003,6 +1960,17 @@ static int msb_bd_getgeo(struct block_device *bdev,
 	return 0;
 }
 
+static void msb_bd_free_disk(struct gendisk *disk)
+{
+	struct msb_data *msb = disk->private_data;
+
+	mutex_lock(&msb_disk_lock);
+	idr_remove(&msb_disk_idr, msb->disk_id);
+	mutex_unlock(&msb_disk_lock);
+
+	kfree(msb);
+}
+
 static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx,
 				 const struct blk_mq_queue_data *bd)
 {
@@ -2096,10 +2064,9 @@ static void msb_start(struct memstick_dev *card)
 }
 
 static const struct block_device_operations msb_bdops = {
-	.open    = msb_bd_open,
-	.release = msb_bd_release,
-	.getgeo  = msb_bd_getgeo,
-	.owner   = THIS_MODULE
+	.owner		= THIS_MODULE,
+	.getgeo		= msb_bd_getgeo,
+	.free_disk	= msb_bd_free_disk, 
 };
 
 static const struct blk_mq_ops msb_mq_ops = {
@@ -2147,7 +2114,6 @@ static int msb_init_disk(struct memstick_dev *card)
 	set_capacity(msb->disk, capacity);
 	dbg("Set total disk size to %lu sectors", capacity);
 
-	msb->usage_count = 1;
 	msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
 	INIT_WORK(&msb->io_work, msb_io_work);
 	sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
@@ -2229,7 +2195,7 @@ static void msb_remove(struct memstick_dev *card)
 	msb_data_clear(msb);
 	mutex_unlock(&msb_disk_lock);
 
-	msb_disk_release(msb->disk);
+	put_disk(msb->disk);
 	memstick_set_drvdata(card, NULL);
 }
 
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
index 122e1a8a8bd5b..7058f9aefeb92 100644
--- a/drivers/memstick/core/ms_block.h
+++ b/drivers/memstick/core/ms_block.h
@@ -143,7 +143,6 @@ struct ms_boot_page {
 } __packed;
 
 struct msb_data {
-	unsigned int			usage_count;
 	struct memstick_dev		*card;
 	struct gendisk			*disk;
 	struct request_queue		*queue;
-- 
GitLab


From 6dab421bfe06a59bf8f212a72e34673e8acf2018 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 10:45:12 +0100
Subject: [PATCH 0474/1586] memstick/mspro_block: fix handling of read-only
 devices

Use set_disk_ro to propagate the read-only state to the block layer
instead of checking for it in ->open and leaking a reference in case
of a read-only device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220215094514.3828912-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/memstick/core/mspro_block.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c0450397b6735..7ea312f0840e0 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -186,13 +186,8 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
 
 	mutex_lock(&mspro_block_disk_lock);
 
-	if (msb && msb->card) {
+	if (msb && msb->card)
 		msb->usage_count++;
-		if ((mode & FMODE_WRITE) && msb->read_only)
-			rc = -EROFS;
-		else
-			rc = 0;
-	}
 
 	mutex_unlock(&mspro_block_disk_lock);
 
@@ -1239,6 +1234,9 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	set_capacity(msb->disk, capacity);
 	dev_dbg(&card->dev, "capacity set %ld\n", capacity);
 
+	if (msb->read_only)
+		set_disk_ro(msb->disk, true);
+
 	rc = device_add_disk(&card->dev, msb->disk, NULL);
 	if (rc)
 		goto out_cleanup_disk;
-- 
GitLab


From 185ed423d1898ead071c18f6161959cd3cab2dde Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 10:45:13 +0100
Subject: [PATCH 0475/1586] memstick/mspro_block: simplify refcounting

Implement the ->free_disk method to free the msb_data structure only once
the last gendisk reference goes away instead of keeping a local
refcount.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220215094514.3828912-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/memstick/core/mspro_block.c | 49 +++++------------------------
 1 file changed, 7 insertions(+), 42 deletions(-)

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 7ea312f0840e0..725ba74ded308 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -133,7 +133,6 @@ struct mspro_devinfo {
 
 struct mspro_block_data {
 	struct memstick_dev   *card;
-	unsigned int          usage_count;
 	unsigned int          caps;
 	struct gendisk        *disk;
 	struct request_queue  *queue;
@@ -178,48 +177,16 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error);
 
 /*** Block device ***/
 
-static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct mspro_block_data *msb = disk->private_data;
-	int rc = -ENXIO;
-
-	mutex_lock(&mspro_block_disk_lock);
-
-	if (msb && msb->card)
-		msb->usage_count++;
-
-	mutex_unlock(&mspro_block_disk_lock);
-
-	return rc;
-}
-
-
-static void mspro_block_disk_release(struct gendisk *disk)
+static void mspro_block_bd_free_disk(struct gendisk *disk)
 {
 	struct mspro_block_data *msb = disk->private_data;
 	int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
 
 	mutex_lock(&mspro_block_disk_lock);
-
-	if (msb) {
-		if (msb->usage_count)
-			msb->usage_count--;
-
-		if (!msb->usage_count) {
-			kfree(msb);
-			disk->private_data = NULL;
-			idr_remove(&mspro_block_disk_idr, disk_id);
-			put_disk(disk);
-		}
-	}
-
+	idr_remove(&mspro_block_disk_idr, disk_id);
 	mutex_unlock(&mspro_block_disk_lock);
-}
 
-static void mspro_block_bd_release(struct gendisk *disk, fmode_t mode)
-{
-	mspro_block_disk_release(disk);
+	kfree(msb);
 }
 
 static int mspro_block_bd_getgeo(struct block_device *bdev,
@@ -235,10 +202,9 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
 }
 
 static const struct block_device_operations ms_block_bdops = {
-	.open    = mspro_block_bd_open,
-	.release = mspro_block_bd_release,
-	.getgeo  = mspro_block_bd_getgeo,
-	.owner   = THIS_MODULE
+	.owner		= THIS_MODULE,
+	.getgeo		= mspro_block_bd_getgeo,
+	.free_disk	= mspro_block_bd_free_disk,
 };
 
 /*** Information ***/
@@ -1221,7 +1187,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
 	msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
 	msb->disk->fops = &ms_block_bdops;
-	msb->usage_count = 1;
 	msb->disk->private_data = msb;
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
@@ -1339,7 +1304,7 @@ static void mspro_block_remove(struct memstick_dev *card)
 	mspro_block_data_clear(msb);
 	mutex_unlock(&mspro_block_disk_lock);
 
-	mspro_block_disk_release(msb->disk);
+	put_disk(msb->disk);
 	memstick_set_drvdata(card, NULL);
 }
 
-- 
GitLab


From 24b45e6c25173abcf8d5e82285212b47f2b0f86b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Feb 2022 10:45:14 +0100
Subject: [PATCH 0476/1586] virtio_blk: simplify refcounting

Implement the ->free_disk method to free the virtio_blk structure only
once the last gendisk reference goes away instead of keeping a local
refcount.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/20220215094514.3828912-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/virtio_blk.c | 66 ++++++++------------------------------
 1 file changed, 14 insertions(+), 52 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c443cd64fc9b4..5c636ca7f1a7f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -69,13 +69,6 @@ struct virtio_blk {
 	/* Process context for config space updates */
 	struct work_struct config_work;
 
-	/*
-	 * Tracks references from block_device_operations open/release and
-	 * virtio_driver probe/remove so this object can be freed once no
-	 * longer in use.
-	 */
-	refcount_t refs;
-
 	/* What host tells us, plus 2 for header & tailer. */
 	unsigned int sg_elems;
 
@@ -391,43 +384,6 @@ out:
 	return err;
 }
 
-static void virtblk_get(struct virtio_blk *vblk)
-{
-	refcount_inc(&vblk->refs);
-}
-
-static void virtblk_put(struct virtio_blk *vblk)
-{
-	if (refcount_dec_and_test(&vblk->refs)) {
-		ida_simple_remove(&vd_index_ida, vblk->index);
-		mutex_destroy(&vblk->vdev_mutex);
-		kfree(vblk);
-	}
-}
-
-static int virtblk_open(struct block_device *bd, fmode_t mode)
-{
-	struct virtio_blk *vblk = bd->bd_disk->private_data;
-	int ret = 0;
-
-	mutex_lock(&vblk->vdev_mutex);
-
-	if (vblk->vdev)
-		virtblk_get(vblk);
-	else
-		ret = -ENXIO;
-
-	mutex_unlock(&vblk->vdev_mutex);
-	return ret;
-}
-
-static void virtblk_release(struct gendisk *disk, fmode_t mode)
-{
-	struct virtio_blk *vblk = disk->private_data;
-
-	virtblk_put(vblk);
-}
-
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
@@ -460,11 +416,19 @@ out:
 	return ret;
 }
 
+static void virtblk_free_disk(struct gendisk *disk)
+{
+	struct virtio_blk *vblk = disk->private_data;
+
+	ida_simple_remove(&vd_index_ida, vblk->index);
+	mutex_destroy(&vblk->vdev_mutex);
+	kfree(vblk);
+}
+
 static const struct block_device_operations virtblk_fops = {
-	.owner  = THIS_MODULE,
-	.open = virtblk_open,
-	.release = virtblk_release,
-	.getgeo = virtblk_getgeo,
+	.owner  	= THIS_MODULE,
+	.getgeo		= virtblk_getgeo,
+	.free_disk	= virtblk_free_disk,
 };
 
 static int index_to_minor(int index)
@@ -791,8 +755,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 		goto out_free_index;
 	}
 
-	/* This reference is dropped in virtblk_remove(). */
-	refcount_set(&vblk->refs, 1);
 	mutex_init(&vblk->vdev_mutex);
 
 	vblk->vdev = vdev;
@@ -970,7 +932,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 	flush_work(&vblk->config_work);
 
 	del_gendisk(vblk->disk);
-	blk_cleanup_disk(vblk->disk);
+	blk_cleanup_queue(vblk->disk->queue);
 	blk_mq_free_tag_set(&vblk->tag_set);
 
 	mutex_lock(&vblk->vdev_mutex);
@@ -986,7 +948,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 
 	mutex_unlock(&vblk->vdev_mutex);
 
-	virtblk_put(vblk);
+	put_disk(vblk->disk);
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From 8f5fea65b06de1cc51d4fc23fb4d378d1abd6ed7 Mon Sep 17 00:00:00 2001
From: David Jeffery <djeffery@redhat.com>
Date: Mon, 31 Jan 2022 15:33:37 -0500
Subject: [PATCH 0477/1586] blk-mq: avoid extending delays of active hctx from
 blk_mq_delay_run_hw_queues

When blk_mq_delay_run_hw_queues sets an hctx to run in the future, it can
reset the delay length for an already pending delayed work run_work. This
creates a scenario where multiple hctx may have their queues set to run,
but if one runs first and finds nothing to do, it can reset the delay of
another hctx and stall the other hctx's ability to run requests.

To avoid this I/O stall when an hctx's run_work is already pending,
leave it untouched to run at its current designated time rather than
extending its delay. The work will still run which keeps closed the race
calling blk_mq_delay_run_hw_queues is needed for while also avoiding the
I/O stall.

Signed-off-by: David Jeffery <djeffery@redhat.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220131203337.GA17666@redhat
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7ca0b47246a61..a05ce77250316 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2179,6 +2179,14 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (blk_mq_hctx_stopped(hctx))
 			continue;
+		/*
+		 * If there is already a run_work pending, leave the
+		 * pending delay untouched. Otherwise, a hctx can stall
+		 * if another hctx is re-delaying the other's work
+		 * before the work executes.
+		 */
+		if (delayed_work_pending(&hctx->run_work))
+			continue;
 		/*
 		 * Dispatch from this hctx either if there's no hctx preferred
 		 * by IO scheduler or if it has requests that bypass the
-- 
GitLab


From bcd2be763252f3a4d5fc4d6008d4d96c601ee74b Mon Sep 17 00:00:00 2001
From: Yahu Gao <gaoyahu19@gmail.com>
Date: Fri, 7 Jan 2022 14:58:59 +0800
Subject: [PATCH 0478/1586] block/bfq_wf2q: correct weight to ioprio

The return value is ioprio * BFQ_WEIGHT_CONVERSION_COEFF or 0.
What we want is ioprio or 0.
Correct this by changing the calculation.

Signed-off-by: Yahu Gao <gaoyahu19@gmail.com>
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20220107065859.25689-1-gaoyahu19@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-wf2q.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index b74cc0da118ec..709b901de3ca9 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -519,7 +519,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio)
 static unsigned short bfq_weight_to_ioprio(int weight)
 {
 	return max_t(int, 0,
-		     IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
+		     IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);
 }
 
 static void bfq_get_entity(struct bfq_entity *entity)
-- 
GitLab


From 869f2c94db92f0f1d6acd0dff1c1ebb8160f5e29 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Wed, 16 Feb 2022 09:40:25 +0800
Subject: [PATCH 0479/1586] spi: rockchip: Stop spi slave dma receiver when cs
 inactive

The spi which's version is higher than ver 2 will automatically
enable this feature.

If the length of master transmission is uncertain, the RK spi slave
is better to automatically stop after cs inactive instead of waiting
for xfer_completion forever.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20220216014028.8123-4-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 81 ++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index c6a1bb09be056..5ecd0692cca1c 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -133,7 +133,8 @@
 #define INT_TF_OVERFLOW				(1 << 1)
 #define INT_RF_UNDERFLOW			(1 << 2)
 #define INT_RF_OVERFLOW				(1 << 3)
-#define INT_RF_FULL					(1 << 4)
+#define INT_RF_FULL				(1 << 4)
+#define INT_CS_INACTIVE				(1 << 6)
 
 /* Bit fields in ICR, 4bit */
 #define ICR_MASK					0x0f
@@ -194,6 +195,8 @@ struct rockchip_spi {
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
 
 	bool slave_abort;
+	bool cs_inactive; /* spi slave tansmition stop when cs inactive */
+	struct spi_transfer *xfer; /* Store xfer temporarily */
 };
 
 static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
@@ -343,6 +346,15 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 	struct spi_controller *ctlr = dev_id;
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
+	/* When int_cs_inactive comes, spi slave abort */
+	if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) {
+		ctlr->slave_abort(ctlr);
+		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+		writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
+
+		return IRQ_HANDLED;
+	}
+
 	if (rs->tx_left)
 		rockchip_spi_pio_writer(rs);
 
@@ -350,6 +362,7 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 	if (!rs->rx_left) {
 		spi_enable_chip(rs, false);
 		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+		writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
 		spi_finalize_current_transfer(ctlr);
 	}
 
@@ -357,14 +370,18 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 }
 
 static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
-		struct spi_transfer *xfer)
+				    struct spi_controller *ctlr,
+				    struct spi_transfer *xfer)
 {
 	rs->tx = xfer->tx_buf;
 	rs->rx = xfer->rx_buf;
 	rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
 	rs->rx_left = xfer->len / rs->n_bytes;
 
-	writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+	if (rs->cs_inactive)
+		writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
+	else
+		writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
 	spi_enable_chip(rs, true);
 
 	if (rs->tx_left)
@@ -383,6 +400,9 @@ static void rockchip_spi_dma_rxcb(void *data)
 	if (state & TXDMA && !rs->slave_abort)
 		return;
 
+	if (rs->cs_inactive)
+		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+
 	spi_enable_chip(rs, false);
 	spi_finalize_current_transfer(ctlr);
 }
@@ -423,14 +443,16 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 
 	atomic_set(&rs->state, 0);
 
+	rs->tx = xfer->tx_buf;
+	rs->rx = xfer->rx_buf;
+
 	rxdesc = NULL;
 	if (xfer->rx_buf) {
 		struct dma_slave_config rxconf = {
 			.direction = DMA_DEV_TO_MEM,
 			.src_addr = rs->dma_addr_rx,
 			.src_addr_width = rs->n_bytes,
-			.src_maxburst = rockchip_spi_calc_burst_size(xfer->len /
-								     rs->n_bytes),
+			.src_maxburst = rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes),
 		};
 
 		dmaengine_slave_config(ctlr->dma_rx, &rxconf);
@@ -474,10 +496,13 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
 		atomic_or(RXDMA, &rs->state);
-		dmaengine_submit(rxdesc);
+		ctlr->dma_rx->cookie = dmaengine_submit(rxdesc);
 		dma_async_issue_pending(ctlr->dma_rx);
 	}
 
+	if (rs->cs_inactive)
+		writel_relaxed(INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
+
 	spi_enable_chip(rs, true);
 
 	if (txdesc) {
@@ -584,7 +609,42 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
 static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
 {
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+	u32 rx_fifo_left;
+	struct dma_tx_state state;
+	enum dma_status status;
+
+	/* Get current dma rx point */
+	if (atomic_read(&rs->state) & RXDMA) {
+		dmaengine_pause(ctlr->dma_rx);
+		status = dmaengine_tx_status(ctlr->dma_rx, ctlr->dma_rx->cookie, &state);
+		if (status == DMA_ERROR) {
+			rs->rx = rs->xfer->rx_buf;
+			rs->xfer->len = 0;
+			rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+			for (; rx_fifo_left; rx_fifo_left--)
+				readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+			goto out;
+		} else {
+			rs->rx += rs->xfer->len - rs->n_bytes * state.residue;
+		}
+	}
 
+	/* Get the valid data left in rx fifo and set rs->xfer->len real rx size */
+	if (rs->rx) {
+		rx_fifo_left = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+		for (; rx_fifo_left; rx_fifo_left--) {
+			u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+
+			if (rs->n_bytes == 1)
+				*(u8 *)rs->rx = (u8)rxw;
+			else
+				*(u16 *)rs->rx = (u16)rxw;
+			rs->rx += rs->n_bytes;
+		}
+		rs->xfer->len = (unsigned int)(rs->rx - rs->xfer->rx_buf);
+	}
+
+out:
 	if (atomic_read(&rs->state) & RXDMA)
 		dmaengine_terminate_sync(ctlr->dma_rx);
 	if (atomic_read(&rs->state) & TXDMA)
@@ -626,7 +686,7 @@ static int rockchip_spi_transfer_one(
 	}
 
 	rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
-
+	rs->xfer = xfer;
 	use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
 
 	ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
@@ -636,7 +696,7 @@ static int rockchip_spi_transfer_one(
 	if (use_dma)
 		return rockchip_spi_prepare_dma(rs, ctlr, xfer);
 
-	return rockchip_spi_prepare_irq(rs, xfer);
+	return rockchip_spi_prepare_irq(rs, ctlr, xfer);
 }
 
 static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
@@ -815,8 +875,13 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) {
 	case ROCKCHIP_SPI_VER2_TYPE2:
 		ctlr->mode_bits |= SPI_CS_HIGH;
+		if (ctlr->can_dma && slave_mode)
+			rs->cs_inactive = true;
+		else
+			rs->cs_inactive = false;
 		break;
 	default:
+		rs->cs_inactive = false;
 		break;
 	}
 
-- 
GitLab


From 3a4bf922d42efa4e9a3dc803d1fd786d43e8a501 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Wed, 16 Feb 2022 09:40:26 +0800
Subject: [PATCH 0480/1586] spi: rockchip: Preset cs-high and clk polarity in
 setup progress

After power up, the cs and clock is in default status, and the cs-high
and clock polarity dts property configuration will take no effect until
the calling of rockchip_spi_config in the first transmission.
So preset them to make sure a correct voltage before the first
transmission coming.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20220216014028.8123-5-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 5ecd0692cca1c..83da8fdb3c022 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -713,6 +713,29 @@ static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
 	return xfer->len / bytes_per_word >= rs->fifo_len;
 }
 
+static int rockchip_spi_setup(struct spi_device *spi)
+{
+	struct rockchip_spi *rs = spi_controller_get_devdata(spi->controller);
+	u32 cr0;
+
+	pm_runtime_get_sync(rs->dev);
+
+	cr0 = readl_relaxed(rs->regs + ROCKCHIP_SPI_CTRLR0);
+
+	cr0 &= ~(0x3 << CR0_SCPH_OFFSET);
+	cr0 |= ((spi->mode & 0x3) << CR0_SCPH_OFFSET);
+	if (spi->mode & SPI_CS_HIGH && spi->chip_select <= 1)
+		cr0 |= BIT(spi->chip_select) << CR0_SOI_OFFSET;
+	else if (spi->chip_select <= 1)
+		cr0 &= ~(BIT(spi->chip_select) << CR0_SOI_OFFSET);
+
+	writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
+
+	pm_runtime_put(rs->dev);
+
+	return 0;
+}
+
 static int rockchip_spi_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -840,6 +863,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
 	ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
 
+	ctlr->setup = rockchip_spi_setup;
 	ctlr->set_cs = rockchip_spi_set_cs;
 	ctlr->transfer_one = rockchip_spi_transfer_one;
 	ctlr->max_transfer_size = rockchip_spi_max_transfer_size;
-- 
GitLab


From e882575efc771f130a24322377dc1033551da11d Mon Sep 17 00:00:00 2001
From: shengfei Xu <xsf@rock-chips.com>
Date: Wed, 16 Feb 2022 09:40:27 +0800
Subject: [PATCH 0481/1586] spi: rockchip: Suspend and resume the bus during
 NOIRQ_SYSTEM_SLEEP_PM ops

the wakeup interrupt handler which is guaranteed not to run while
@resume noirq() is being executed. the patch can help to avoid the
wakeup source try to access spi when the spi is in suspend mode.

Signed-off-by: shengfei Xu <xsf@rock-chips.com>
Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20220216014028.8123-6-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 83da8fdb3c022..8b4d56ee21938 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -964,14 +964,14 @@ static int rockchip_spi_suspend(struct device *dev)
 {
 	int ret;
 	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	ret = spi_controller_suspend(ctlr);
 	if (ret < 0)
 		return ret;
 
-	ret = pm_runtime_force_suspend(dev);
-	if (ret < 0)
-		return ret;
+	clk_disable_unprepare(rs->spiclk);
+	clk_disable_unprepare(rs->apb_pclk);
 
 	pinctrl_pm_select_sleep_state(dev);
 
@@ -986,10 +986,14 @@ static int rockchip_spi_resume(struct device *dev)
 
 	pinctrl_pm_select_default_state(dev);
 
-	ret = pm_runtime_force_resume(dev);
+	ret = clk_prepare_enable(rs->apb_pclk);
 	if (ret < 0)
 		return ret;
 
+	ret = clk_prepare_enable(rs->spiclk);
+	if (ret < 0)
+		clk_disable_unprepare(rs->apb_pclk);
+
 	ret = spi_controller_resume(ctlr);
 	if (ret < 0) {
 		clk_disable_unprepare(rs->spiclk);
@@ -1031,7 +1035,7 @@ static int rockchip_spi_runtime_resume(struct device *dev)
 #endif /* CONFIG_PM */
 
 static const struct dev_pm_ops rockchip_spi_pm = {
-	SET_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)
 	SET_RUNTIME_PM_OPS(rockchip_spi_runtime_suspend,
 			   rockchip_spi_runtime_resume, NULL)
 };
-- 
GitLab


From 2fcdde56c44fe1cd13ce328128f509bbda2cdb41 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Wed, 16 Feb 2022 09:40:28 +0800
Subject: [PATCH 0482/1586] spi: rockchip: clear interrupt status in error
 handler

The interrupt status bit of the previous error data transmition will
affect the next operation and cause continuous SPI transmission failure.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20220216014028.8123-7-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 8b4d56ee21938..cdc16eecaf6b5 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -278,8 +278,9 @@ static void rockchip_spi_handle_err(struct spi_controller *ctlr,
 	 */
 	spi_enable_chip(rs, false);
 
-	/* make sure all interrupts are masked */
+	/* make sure all interrupts are masked and status cleared */
 	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+	writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
 
 	if (atomic_read(&rs->state) & TXDMA)
 		dmaengine_terminate_async(ctlr->dma_tx);
-- 
GitLab


From 7fd786dfbd2c55ddee3b87f33c82f1c58bdb1dd6 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 18 Jan 2022 15:02:57 -0800
Subject: [PATCH 0483/1586] tools/power/x86/intel-speed-select: OOB daemon mode

It is possible that some out of band agent changed config level. In this
case CPUs need to be online/offline to support this config change. Add
a command line option --oob, so that this tool can run as daemon and poll
for config level change and take action. The poll interval is configurable
in seconds using config option --poll-interval.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 tools/power/x86/intel-speed-select/Build      |   2 +-
 .../x86/intel-speed-select/isst-config.c      |  51 +++-
 .../x86/intel-speed-select/isst-daemon.c      | 239 ++++++++++++++++++
 tools/power/x86/intel-speed-select/isst.h     |  11 +
 4 files changed, 292 insertions(+), 11 deletions(-)
 create mode 100644 tools/power/x86/intel-speed-select/isst-daemon.c

diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build
index b61456d75190f..86fb9020cca2b 100644
--- a/tools/power/x86/intel-speed-select/Build
+++ b/tools/power/x86/intel-speed-select/Build
@@ -1 +1 @@
-intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o
+intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o isst-daemon.o
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index efe72fa48224a..de5029b54050d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -368,7 +368,7 @@ int get_topo_max_cpus(void)
 	return topo_max_cpus;
 }
 
-static void set_cpu_online_offline(int cpu, int state)
+void set_cpu_online_offline(int cpu, int state)
 {
 	char buffer[128];
 	int fd, ret;
@@ -409,12 +409,10 @@ static void force_all_cpus_online(void)
 	unlink("/var/run/isst_cpu_topology.dat");
 }
 
-#define MAX_PACKAGE_COUNT 8
-#define MAX_DIE_PER_PACKAGE 2
-static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
-							    void *, void *),
-					   void *arg1, void *arg2, void *arg3,
-					   void *arg4)
+void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+						     void *, void *),
+				    void *arg1, void *arg2, void *arg3,
+				    void *arg4)
 {
 	int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT];
 	int pkg_index = 0, i;
@@ -2803,7 +2801,9 @@ static void usage(void)
 	printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
 	printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n");
 	printf("\t[-v|--version] : Print version\n");
-
+	printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n");
+	printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n");
+	printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n");
 	printf("\nResult format\n");
 	printf("\tResult display uses a common format for each command:\n");
 	printf("\tResults are formatted in text/JSON with\n");
@@ -2837,6 +2837,9 @@ static void cmdline(int argc, char **argv)
 	int opt, force_cpus_online = 0;
 	int option_index = 0;
 	int ret;
+	int oob_mode = 0;
+	int poll_interval = -1;
+	int no_daemon = 0;
 
 	static struct option long_options[] = {
 		{ "all-cpus-online", no_argument, 0, 'a' },
@@ -2849,6 +2852,9 @@ static void cmdline(int argc, char **argv)
 		{ "out", required_argument, 0, 'o' },
 		{ "retry", required_argument, 0, 'r' },
 		{ "version", no_argument, 0, 'v' },
+		{ "oob", no_argument, 0, 'b' },
+		{ "no-daemon", no_argument, 0, 'n' },
+		{ "poll-interval", required_argument, 0, 'w' },
 		{ 0, 0, 0, 0 }
 	};
 
@@ -2875,7 +2881,7 @@ static void cmdline(int argc, char **argv)
 	}
 
 	progname = argv[0];
-	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
+	while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options,
 				       &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -2920,12 +2926,26 @@ static void cmdline(int argc, char **argv)
 		case 'v':
 			print_version();
 			break;
+		case 'b':
+			oob_mode = 1;
+			break;
+		case 'n':
+			no_daemon = 1;
+			break;
+		case 'w':
+			ret = strtol(optarg, &ptr, 10);
+			if (!ret) {
+				fprintf(stderr, "Invalid poll interval count\n");
+				exit(0);
+			}
+			poll_interval = ret;
+			break;
 		default:
 			usage();
 		}
 	}
 
-	if (optind > (argc - 2)) {
+	if (optind > (argc - 2) && !oob_mode) {
 		usage();
 		exit(0);
 	}
@@ -2936,6 +2956,17 @@ static void cmdline(int argc, char **argv)
 	set_cpu_present_cpu_mask();
 	set_cpu_target_cpu_mask();
 
+	if (oob_mode) {
+		create_cpu_map();
+		if (debug_flag)
+			fprintf(stderr, "OOB mode is enabled in debug mode\n");
+
+		ret = isst_daemon(debug_flag, poll_interval, no_daemon);
+		if (ret)
+			fprintf(stderr, "OOB mode enable failed\n");
+		goto out;
+	}
+
 	if (!is_clx_n_platform()) {
 		ret = isst_fill_platform_info();
 		if (ret)
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
new file mode 100644
index 0000000000000..15a70bba8d760
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Allow speed select to daemonize
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <time.h>
+
+#include "isst.h"
+
+static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+
+static void init_levels(void)
+{
+	int i, j;
+
+	for (i = 0; i < MAX_PACKAGE_COUNT; ++i)
+		for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j)
+			per_package_levels_info[i][j] = -1;
+}
+
+void process_level_change(int cpu)
+{
+	struct isst_pkg_ctdp_level_info ctdp_level;
+	int pkg_id = get_physical_package_id(cpu);
+	int die_id = get_physical_die_id(cpu);
+	struct isst_pkg_ctdp pkg_dev;
+	time_t tm;
+	int ret;
+
+	if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) {
+		debug_printf("Invalid package/die info for cpu:%d\n", cpu);
+		return;
+	}
+
+	tm = time(NULL);
+	if (tm - per_package_levels_tm[pkg_id][die_id] < 2 )
+		return;
+
+	per_package_levels_tm[pkg_id][die_id] = tm;
+
+	ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+	if (ret) {
+		debug_printf("Can't get tdp levels for cpu:%d\n", cpu);
+		return;
+	}
+
+	debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu,
+		      pkg_id, die_id, pkg_dev.current_level);
+
+	if (pkg_dev.locked) {
+		debug_printf("config TDP s locked \n");
+		return;
+	}
+
+	if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level)
+		return;
+
+	debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n",
+		      cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id],
+		      pkg_dev.current_level);
+
+	per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level;
+
+	ctdp_level.core_cpumask_size =
+		alloc_cpu_set(&ctdp_level.core_cpumask);
+	ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level);
+	if (ret) {
+		free_cpu_set(ctdp_level.core_cpumask);
+		debug_printf("Can't get core_mask:%d\n", cpu);
+		return;
+	}
+
+	if (ctdp_level.cpu_count) {
+		int i, max_cpus = get_topo_max_cpus();
+		for (i = 0; i < max_cpus; ++i) {
+			if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+				continue;
+			if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+				fprintf(stderr, "online cpu %d\n", i);
+				set_cpu_online_offline(i, 1);
+			} else {
+				fprintf(stderr, "offline cpu %d\n", i);
+				set_cpu_online_offline(i, 0);
+			}
+		}
+	}
+
+	free_cpu_set(ctdp_level.core_cpumask);
+}
+
+static void _poll_for_config_change(int cpu, void *arg1, void *arg2,
+				    void *arg3, void *arg4)
+{
+	process_level_change(cpu);
+}
+
+static void poll_for_config_change(void)
+{
+	for_each_online_package_in_set(_poll_for_config_change, NULL, NULL,
+				       NULL, NULL);
+}
+
+static int done = 0;
+static int pid_file_handle;
+
+static void signal_handler(int sig)
+{
+	switch (sig) {
+	case SIGINT:
+	case SIGTERM:
+		done = 1;
+		exit(0);
+		break;
+	default:
+		break;
+	}
+}
+
+static void daemonize(char *rundir, char *pidfile)
+{
+	int pid, sid, i;
+	char str[10];
+	struct sigaction sig_actions;
+	sigset_t sig_set;
+	int ret;
+
+	if (getppid() == 1)
+		return;
+
+	sigemptyset(&sig_set);
+	sigaddset(&sig_set, SIGCHLD);
+	sigaddset(&sig_set, SIGTSTP);
+	sigaddset(&sig_set, SIGTTOU);
+	sigaddset(&sig_set, SIGTTIN);
+	sigprocmask(SIG_BLOCK, &sig_set, NULL);
+
+	sig_actions.sa_handler = signal_handler;
+	sigemptyset(&sig_actions.sa_mask);
+	sig_actions.sa_flags = 0;
+
+	sigaction(SIGHUP, &sig_actions, NULL);
+	sigaction(SIGTERM, &sig_actions, NULL);
+	sigaction(SIGINT, &sig_actions, NULL);
+
+	pid = fork();
+	if (pid < 0) {
+		/* Could not fork */
+		exit(EXIT_FAILURE);
+	}
+	if (pid > 0)
+		exit(EXIT_SUCCESS);
+
+	umask(027);
+
+	sid = setsid();
+	if (sid < 0)
+		exit(EXIT_FAILURE);
+
+	/* close all descriptors */
+	for (i = getdtablesize(); i >= 0; --i)
+		close(i);
+
+	i = open("/dev/null", O_RDWR);
+	ret = dup(i);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	ret = dup(i);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	ret = chdir(rundir);
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600);
+	if (pid_file_handle == -1) {
+		/* Couldn't open lock file */
+		exit(1);
+	}
+	/* Try to lock file */
+#ifdef LOCKF_SUPPORT
+	if (lockf(pid_file_handle, F_TLOCK, 0) == -1) {
+#else
+	if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) {
+#endif
+		/* Couldn't get lock on lock file */
+		fprintf(stderr, "Couldn't get lock file %d\n", getpid());
+		exit(1);
+	}
+	snprintf(str, sizeof(str), "%d\n", getpid());
+	ret = write(pid_file_handle, str, strlen(str));
+	if (ret == -1)
+		exit(EXIT_FAILURE);
+
+	close(i);
+}
+
+int isst_daemon(int debug_mode, int poll_interval, int no_daemon)
+{
+	int ret;
+
+	if (!no_daemon && poll_interval < 0 && !debug_mode) {
+		fprintf(stderr, "OOB mode is enabled and will run as daemon\n");
+		daemonize((char *) "/tmp/",
+				(char *)"/tmp/hfi-events.pid");
+	} else {
+		signal(SIGINT, signal_handler);
+	}
+
+	init_levels();
+
+	if (poll_interval < 0) {
+		fprintf(stderr, "Must specify poll-interval\n");
+		return ret;
+	}
+
+	debug_printf("Starting loop\n");
+	while (!done) {
+		sleep(poll_interval);
+		poll_for_config_change();
+	}
+
+	return 0;
+}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 1aa15d5ea57ce..b33f2c68d2ce0 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -76,6 +76,9 @@
 
 #define DISP_FREQ_MULTIPLIER 100
 
+#define MAX_PACKAGE_COUNT 8
+#define MAX_DIE_PER_PACKAGE 2
+
 struct isst_clos_config {
 	int pkg_id;
 	int die_id;
@@ -260,4 +263,12 @@ extern int is_skx_based_platform(void);
 extern int is_spr_platform(void);
 extern int is_icx_platform(void);
 extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
+
+extern void set_cpu_online_offline(int cpu, int state);
+extern void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+							    void *, void *),
+					   void *arg1, void *arg2, void *arg3,
+					   void *arg4);
+extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon);
+extern void process_level_change(int cpu);
 #endif
-- 
GitLab


From 7d440da009b6cd2a559cdb63d97e2cb569357dbc Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 18 Jan 2022 16:44:20 -0800
Subject: [PATCH 0484/1586] tools/power/x86/intel-speed-select: HFI support

Read HFI (Hardware Feedback Interface) events to process config level
changes in oob mode. When HFI is supported there is no need for polling
to check config level change.

Subscribe to Linux thermal netlink messages and process message:
THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE.
This message contains cpu number, performance and energy efficiency.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 tools/power/x86/intel-speed-select/Build      |   2 +-
 tools/power/x86/intel-speed-select/Makefile   |  10 +-
 .../power/x86/intel-speed-select/hfi-events.c | 309 ++++++++++++++++++
 .../x86/intel-speed-select/isst-daemon.c      |   5 +
 tools/power/x86/intel-speed-select/isst.h     |   2 +
 5 files changed, 324 insertions(+), 4 deletions(-)
 create mode 100644 tools/power/x86/intel-speed-select/hfi-events.c

diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build
index 86fb9020cca2b..81e36bd578b1c 100644
--- a/tools/power/x86/intel-speed-select/Build
+++ b/tools/power/x86/intel-speed-select/Build
@@ -1 +1 @@
-intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o isst-daemon.o
+intel-speed-select-y +=  isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 12c6939dca2a4..d2fba1297d964 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,8 +13,8 @@ endif
 # Do not use make's built-in rules
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
-
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override LDFLAGS += -lnl-genl-3 -lnl-3
 
 ALL_TARGETS := intel-speed-select
 ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
@@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h
 	mkdir -p $(OUTPUT)include/linux 2>&1 || true
 	ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@
 
-prepare: $(OUTPUT)include/linux/isst_if.h
+$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h
+	mkdir -p $(OUTPUT)include/linux 2>&1 || true
+	ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@
+
+prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h
 
 ISST_IN := $(OUTPUT)intel-speed-select-in.o
 
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
new file mode 100644
index 0000000000000..e856767113725
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Read HFI events for OOB
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+/*
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+
+ * WPA Supplicant - driver interaction with Linux nl80211/cfg80211
+ * Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of
+ * BSD license.
+ *
+ * Requires
+ * libnl-genl-3-dev
+ *
+ * For Fedora/CenOS
+ * dnf install libnl3-devel
+ * For Ubuntu
+ * apt install libnl-3-dev libnl-genl-3-dev
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <linux/thermal.h>
+#include "isst.h"
+
+struct hfi_event_data {
+	struct nl_sock *nl_handle;
+	struct nl_cb *nl_cb;
+};
+
+struct hfi_event_data drv;
+
+static int ack_handler(struct nl_msg *msg, void *arg)
+{
+	int *err = arg;
+	*err = 0;
+	return NL_STOP;
+}
+
+static int finish_handler(struct nl_msg *msg, void *arg)
+{
+	int *ret = arg;
+	*ret = 0;
+	return NL_SKIP;
+}
+
+static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err,
+			 void *arg)
+{
+	int *ret = arg;
+	*ret = err->error;
+	return NL_SKIP;
+}
+
+static int seq_check_handler(struct nl_msg *msg, void *arg)
+{
+	return NL_OK;
+}
+
+static int send_and_recv_msgs(struct hfi_event_data *drv,
+			      struct nl_msg *msg,
+			      int (*valid_handler)(struct nl_msg *, void *),
+			      void *valid_data)
+{
+	struct nl_cb *cb;
+	int err = -ENOMEM;
+
+	cb = nl_cb_clone(drv->nl_cb);
+	if (!cb)
+		goto out;
+
+	err = nl_send_auto_complete(drv->nl_handle, msg);
+	if (err < 0)
+		goto out;
+
+	err = 1;
+
+	nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err);
+	nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err);
+	nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err);
+
+	if (valid_handler)
+		nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM,
+			  valid_handler, valid_data);
+
+	while (err > 0)
+		nl_recvmsgs(drv->nl_handle, cb);
+ out:
+	nl_cb_put(cb);
+	nlmsg_free(msg);
+	return err;
+}
+
+struct family_data {
+	const char *group;
+	int id;
+};
+
+static int family_handler(struct nl_msg *msg, void *arg)
+{
+	struct family_data *res = arg;
+	struct nlattr *tb[CTRL_ATTR_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *mcgrp;
+	int i;
+
+	nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+	if (!tb[CTRL_ATTR_MCAST_GROUPS])
+		return NL_SKIP;
+
+	nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) {
+		struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1];
+		nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp),
+			  nla_len(mcgrp), NULL);
+		if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] ||
+		    !tb2[CTRL_ATTR_MCAST_GRP_ID] ||
+		    strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]),
+				res->group,
+				nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0)
+			continue;
+		res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]);
+		break;
+	};
+
+	return 0;
+}
+
+static int nl_get_multicast_id(struct hfi_event_data *drv,
+			       const char *family, const char *group)
+{
+	struct nl_msg *msg;
+	int ret = -1;
+	struct family_data res = { group, -ENOENT };
+
+	msg = nlmsg_alloc();
+	if (!msg)
+		return -ENOMEM;
+	genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"),
+		    0, 0, CTRL_CMD_GETFAMILY, 0);
+	NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+	ret = send_and_recv_msgs(drv, msg, family_handler, &res);
+	msg = NULL;
+	if (ret == 0)
+		ret = res.id;
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return ret;
+}
+
+struct perf_cap {
+	int cpu;
+	int perf;
+	int eff;
+};
+
+static void process_hfi_event(struct perf_cap *perf_cap)
+{
+	process_level_change(perf_cap->cpu);
+}
+
+static int handle_event(struct nl_msg *n, void *arg)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(n);
+	struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+	struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+	int ret;
+	struct perf_cap perf_cap;
+
+	ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+	debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret);
+	if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) {
+		struct nlattr *cap;
+		int j, index = 0;
+
+		debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n");
+		nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) {
+			switch (index) {
+			case 0:
+				perf_cap.cpu = nla_get_u32(cap);
+				break;
+			case 1:
+				perf_cap.perf = nla_get_u32(cap);
+				break;
+			case 2:
+				perf_cap.eff = nla_get_u32(cap);
+				break;
+			default:
+				break;
+			}
+			++index;
+			if (index == 3) {
+				index = 0;
+				process_hfi_event(&perf_cap);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int _hfi_exit;
+
+static int check_hf_suport(void)
+{
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+	__cpuid(6, eax, ebx, ecx, edx);
+	if (eax & BIT(19))
+		return 1;
+
+	return 0;
+}
+
+int hfi_main(void)
+{
+	struct nl_sock *sock;
+	struct nl_cb *cb;
+	int err = 0;
+	int mcast_id;
+	int no_block = 0;
+
+	if (!check_hf_suport()) {
+		fprintf(stderr, "CPU Doesn't support HFI\n");
+		return -1;
+	}
+
+	sock = nl_socket_alloc();
+	if (!sock) {
+		fprintf(stderr, "nl_socket_alloc failed\n");
+		return -1;
+	}
+
+	if (genl_connect(sock)) {
+		fprintf(stderr, "genl_connect(sk_event) failed\n");
+		goto free_sock;
+	}
+
+	drv.nl_handle = sock;
+	drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (drv.nl_cb == NULL) {
+		printf("Failed to allocate netlink callbacks");
+		goto free_sock;
+	}
+
+	mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME,
+				   THERMAL_GENL_EVENT_GROUP_NAME);
+	if (mcast_id < 0) {
+		fprintf(stderr, "nl_get_multicast_id failed\n");
+		goto free_sock;
+	}
+
+	if (nl_socket_add_membership(sock, mcast_id)) {
+		fprintf(stderr, "nl_socket_add_membership failed");
+		goto free_sock;
+	}
+
+	nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL);
+
+	if (no_block)
+		nl_socket_set_nonblocking(sock);
+
+	debug_printf("hfi is initialized\n");
+
+	while (!_hfi_exit && !err) {
+		err = nl_recvmsgs(sock, cb);
+		debug_printf("nl_recv_message err:%d\n", err);
+	}
+
+	return 0;
+
+	/* Netlink library doesn't have calls to dealloc cb or disconnect */
+free_sock:
+	nl_socket_free(sock);
+
+	return -1;
+}
+
+void hfi_exit(void)
+{
+	_hfi_exit = 1;
+}
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
index 15a70bba8d760..dd372924bc826 100644
--- a/tools/power/x86/intel-speed-select/isst-daemon.c
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -123,6 +123,7 @@ static void signal_handler(int sig)
 	case SIGINT:
 	case SIGTERM:
 		done = 1;
+		hfi_exit();
 		exit(0);
 		break;
 	default:
@@ -225,6 +226,10 @@ int isst_daemon(int debug_mode, int poll_interval, int no_daemon)
 	init_levels();
 
 	if (poll_interval < 0) {
+		ret = hfi_main();
+		if (ret) {
+			fprintf(stderr, "HFI initialization failed\n");
+		}
 		fprintf(stderr, "Must specify poll-interval\n");
 		return ret;
 	}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index b33f2c68d2ce0..0796d8c6a8827 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -271,4 +271,6 @@ extern void for_each_online_package_in_set(void (*callback)(int, void *, void *,
 					   void *arg4);
 extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon);
 extern void process_level_change(int cpu);
+extern int hfi_main(void);
+extern void hfi_exit(void);
 #endif
-- 
GitLab


From f3874e96fad596cd2c2441802be7e1d0c9fa8677 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 18 Jan 2022 16:52:26 -0800
Subject: [PATCH 0485/1586] tools/power/x86/intel-speed-select: v1.12 release

This version allows out of band SST support, where some remote agent
changes SST profiles via some Board Management Controller.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index de5029b54050d..060390e88e374 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,8 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.11";
+static const char *version_str = "v1.12";
+
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
-- 
GitLab


From b06e15ebd5bfb670f93c7f11a29b8299c1178bc6 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Mon, 14 Feb 2022 23:41:08 +0500
Subject: [PATCH 0486/1586] selftests/x86: Add validity check and allow field
 splitting

Add check to test if CC has a string. CC can have multiple sub-strings
like "ccache gcc". Erorr pops up if it is treated as single string and
double quotes are used around it. This can be fixed by removing the
quotes and not treating CC as a single string.

Fixes: e9886ace222e ("selftests, x86: Rework x86 target architecture detection")
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20220214184109.3739179-2-usama.anjum@collabora.com
---
 tools/testing/selftests/x86/check_cc.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf549..8c669c0d662ee 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
 TESTPROG="$2"
 shift 2
 
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
     echo 1
 else
     echo 0
-- 
GitLab


From 6170abb21e2380477080b25145da9747ad467d3d Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Mon, 14 Feb 2022 23:41:09 +0500
Subject: [PATCH 0487/1586] selftests/sgx: Treat CC as one argument

CC can have multiple sub-strings like "ccache gcc". For check_cc.sh,
CC needs to be treated like one argument. Put double quotes around it to
make CC one string and hence one argument.

Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX")
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20220214184109.3739179-3-usama.anjum@collabora.com
---
 tools/testing/selftests/sgx/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 2956584e1e37f..75af864e07b65 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
 
 .PHONY: all clean
 
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
 			    ../x86/trivial_64bit_program.c)
 
 ifndef OBJCOPY
-- 
GitLab


From 4ba31cdd88c9008777a48d3ac2b045dce5634389 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 9 Feb 2022 10:21:58 +0000
Subject: [PATCH 0488/1586] crypto: cavium/nitrox - fix typo on crypto

crypto had a typo, fix it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_req.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
index ed174883c8e30..6bf088bcdd117 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_req.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -440,7 +440,7 @@ struct aqmq_command_s {
 /**
  * struct ctx_hdr - Book keeping data about the crypto context
  * @pool: Pool used to allocate crypto context
- * @dma: Base DMA address of the cypto context
+ * @dma: Base DMA address of the crypto context
  * @ctx_dma: Actual usable crypto context for NITROX
  */
 struct ctx_hdr {
-- 
GitLab


From fffe799b6cc980f967df2f3773dc53639dd68d7e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Feb 2022 20:00:47 +0200
Subject: [PATCH 0489/1586] crypto: qat - don't cast parameter in bit
 operations

While in this particular case it would not be a (critical) issue,
the pattern itself is bad and error prone in case the location
of the parameter is changed.

Don't cast parameter to unsigned long pointer in the bit operations.
Instead copy to a local variable on stack of a proper type and use.

Fixes: b4b7e67c917f ("crypto: qat - Intel(R) QAT ucode part of fw loader")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_uclo.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 2026cc6be8f0e..6356402a2c9e8 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -387,7 +387,9 @@ static int qat_uclo_init_ustore(struct icp_qat_fw_loader_handle *handle,
 	page = image->page;
 
 	for_each_set_bit(ae, &ae_mask, handle->hal_handle->ae_max_num) {
-		if (!test_bit(ae, (unsigned long *)&uof_image->ae_assigned))
+		unsigned long ae_assigned = uof_image->ae_assigned;
+
+		if (!test_bit(ae, &ae_assigned))
 			continue;
 
 		if (!test_bit(ae, &cfg_ae_mask))
@@ -664,8 +666,9 @@ static int qat_uclo_map_ae(struct icp_qat_fw_loader_handle *handle, int max_ae)
 			continue;
 
 		for (i = 0; i < obj_handle->uimage_num; i++) {
-			if (!test_bit(ae, (unsigned long *)
-			&obj_handle->ae_uimage[i].img_ptr->ae_assigned))
+			unsigned long ae_assigned = obj_handle->ae_uimage[i].img_ptr->ae_assigned;
+
+			if (!test_bit(ae, &ae_assigned))
 				continue;
 			mflag = 1;
 			if (qat_uclo_init_ae_data(obj_handle, ae, i))
-- 
GitLab


From dfe085d8dcd0bb1fe20cc2327e81c8064cead441 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Feb 2022 13:09:40 +1100
Subject: [PATCH 0490/1586] crypto: xts - Add softdep on ecb

The xts module needs ecb to be present as it's meant to work
on top of ecb.  This patch adds a softdep so ecb can be included
automatically into the initramfs.

Reported-by: rftc <rftc@gmx.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/xts.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/xts.c b/crypto/xts.c
index 6c12f30dbdd6d..63c85b9e64e08 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -466,3 +466,4 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("XTS block cipher mode");
 MODULE_ALIAS_CRYPTO("xts");
 MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_SOFTDEP("pre: ecb");
-- 
GitLab


From f60bbbbe8039e59341055e827bb404c14a2688a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Feb 2022 13:31:13 +1100
Subject: [PATCH 0491/1586] crypto: lrw - Add dependency on ecb

The lrw template relies on ecb to work.  So we need to declare
a Kconfig dependency as well as a module softdep on it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig | 1 +
 crypto/lrw.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index fa1741bb568f9..d9573b3f081ff 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -425,6 +425,7 @@ config CRYPTO_LRW
 	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
 	select CRYPTO_GF128MUL
+	select CRYPTO_ECB
 	help
 	  LRW: Liskov Rivest Wagner, a tweakable, non malleable, non movable
 	  narrow block cipher mode for dm-crypt.  Use it with cipher
diff --git a/crypto/lrw.c b/crypto/lrw.c
index bcf09fbc750af..8d59a66b65255 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -428,3 +428,4 @@ module_exit(lrw_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LRW block cipher mode");
 MODULE_ALIAS_CRYPTO("lrw");
+MODULE_SOFTDEP("pre: ecb");
-- 
GitLab


From 605b84ae0beb8eef078b3d55e548b1dd6e75aeb1 Mon Sep 17 00:00:00 2001
From: Wojciech Ziemba <wojciech.ziemba@intel.com>
Date: Thu, 10 Feb 2022 13:38:25 +0000
Subject: [PATCH 0492/1586] crypto: qat - add misc workqueue

In an effort to reduce the amount of workqueues, scattered across
the QAT driver, introduce the misc workqueue. This queue will be used
to handle bottom halves, Power Management and more in the future.

The function adf_misc_wq_queue_work() has been added to simplify
the enqueuing of jobs.

Signed-off-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Marco Chiappero <marco.chiappero@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/qat/qat_common/adf_common_drv.h    |  3 ++
 drivers/crypto/qat/qat_common/adf_ctl_drv.c   |  6 ++++
 drivers/crypto/qat/qat_common/adf_isr.c       | 28 +++++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 76f4f96ec5eb0..0775491768797 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -188,6 +188,9 @@ int qat_uclo_map_obj(struct icp_qat_fw_loader_handle *handle,
 		     void *addr_ptr, u32 mem_size, char *obj_name);
 int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle,
 			     unsigned int cfg_ae_mask);
+int adf_init_misc_wq(void);
+void adf_exit_misc_wq(void);
+bool adf_misc_wq_queue_work(struct work_struct *work);
 #if defined(CONFIG_PCI_IOV)
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 6f64aa6931461..e8ac932bbaab6 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -419,6 +419,9 @@ static int __init adf_register_ctl_device_driver(void)
 	if (adf_chr_drv_create())
 		goto err_chr_dev;
 
+	if (adf_init_misc_wq())
+		goto err_misc_wq;
+
 	if (adf_init_aer())
 		goto err_aer;
 
@@ -440,6 +443,8 @@ err_vf_wq:
 err_pf_wq:
 	adf_exit_aer();
 err_aer:
+	adf_exit_misc_wq();
+err_misc_wq:
 	adf_chr_drv_destroy();
 err_chr_dev:
 	mutex_destroy(&adf_ctl_lock);
@@ -449,6 +454,7 @@ err_chr_dev:
 static void __exit adf_unregister_ctl_device_driver(void)
 {
 	adf_chr_drv_destroy();
+	adf_exit_misc_wq();
 	adf_exit_aer();
 	adf_exit_vf_wq();
 	adf_exit_pf_wq();
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index 4ca482aa69f7c..803b89ba9670c 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -16,6 +16,7 @@
 #include "adf_transport_internal.h"
 
 #define ADF_MAX_NUM_VFS	32
+static struct workqueue_struct *adf_misc_wq;
 
 static int adf_enable_msix(struct adf_accel_dev *accel_dev)
 {
@@ -341,3 +342,30 @@ err_out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
+
+/**
+ * adf_init_misc_wq() - Init misc workqueue
+ *
+ * Function init workqueue 'qat_misc_wq' for general purpose.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int __init adf_init_misc_wq(void)
+{
+	adf_misc_wq = alloc_workqueue("qat_misc_wq", WQ_MEM_RECLAIM, 0);
+
+	return !adf_misc_wq ? -ENOMEM : 0;
+}
+
+void adf_exit_misc_wq(void)
+{
+	if (adf_misc_wq)
+		destroy_workqueue(adf_misc_wq);
+
+	adf_misc_wq = NULL;
+}
+
+bool adf_misc_wq_queue_work(struct work_struct *work)
+{
+	return queue_work(adf_misc_wq, work);
+}
-- 
GitLab


From f734409c77d7e422bc759c53ad234e6af9b56938 Mon Sep 17 00:00:00 2001
From: Wojciech Ziemba <wojciech.ziemba@intel.com>
Date: Thu, 10 Feb 2022 13:38:26 +0000
Subject: [PATCH 0493/1586] crypto: qat - move and rename GEN4 error register
 definitions

Move error source related CSRs from 4xxx to the wider GEN4 header file.

Signed-off-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Marco Chiappero <marco.chiappero@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c   |  6 +++---
 drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h   | 14 --------------
 drivers/crypto/qat/qat_common/adf_gen4_hw_data.h | 14 ++++++++++++++
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 68d39c833332e..69fb271c85dd6 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -229,7 +229,7 @@ static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
 	void __iomem *csr = misc_bar->virt_addr;
 
 	/* Enable all in errsou3 except VFLR notification on host */
-	ADF_CSR_WR(csr, ADF_4XXX_ERRMSK3, ADF_4XXX_VFLNOTIFY);
+	ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY);
 }
 
 static void adf_enable_ints(struct adf_accel_dev *accel_dev)
@@ -256,9 +256,9 @@ static int adf_init_device(struct adf_accel_dev *accel_dev)
 	addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr;
 
 	/* Temporarily mask PM interrupt */
-	csr = ADF_CSR_RD(addr, ADF_4XXX_ERRMSK2);
+	csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2);
 	csr |= ADF_4XXX_PM_SOU;
-	ADF_CSR_WR(addr, ADF_4XXX_ERRMSK2, csr);
+	ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr);
 
 	/* Set DRV_ACTIVE bit to power up the device */
 	ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE);
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
index 12e4fb9b40cef..857b93a3c032d 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -39,20 +39,6 @@
 #define ADF_4XXX_NUM_RINGS_PER_BANK	2
 #define ADF_4XXX_NUM_BANKS_PER_VF	4
 
-/* Error source registers */
-#define ADF_4XXX_ERRSOU0	(0x41A200)
-#define ADF_4XXX_ERRSOU1	(0x41A204)
-#define ADF_4XXX_ERRSOU2	(0x41A208)
-#define ADF_4XXX_ERRSOU3	(0x41A20C)
-
-/* Error source mask registers */
-#define ADF_4XXX_ERRMSK0	(0x41A210)
-#define ADF_4XXX_ERRMSK1	(0x41A214)
-#define ADF_4XXX_ERRMSK2	(0x41A218)
-#define ADF_4XXX_ERRMSK3	(0x41A21C)
-
-#define ADF_4XXX_VFLNOTIFY	BIT(7)
-
 /* Arbiter configuration */
 #define ADF_4XXX_ARB_CONFIG			(BIT(31) | BIT(6) | BIT(0))
 #define ADF_4XXX_ARB_OFFSET			(0x0)
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
index f0f71ca44ca36..43b8f864806bd 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
@@ -122,6 +122,20 @@ do { \
 #define ADF_WQM_CSR_RPRESETSTS_STATUS	BIT(0)
 #define ADF_WQM_CSR_RPRESETSTS(bank)	(ADF_WQM_CSR_RPRESETCTL(bank) + 4)
 
+/* Error source registers */
+#define ADF_GEN4_ERRSOU0	(0x41A200)
+#define ADF_GEN4_ERRSOU1	(0x41A204)
+#define ADF_GEN4_ERRSOU2	(0x41A208)
+#define ADF_GEN4_ERRSOU3	(0x41A20C)
+
+/* Error source mask registers */
+#define ADF_GEN4_ERRMSK0	(0x41A210)
+#define ADF_GEN4_ERRMSK1	(0x41A214)
+#define ADF_GEN4_ERRMSK2	(0x41A218)
+#define ADF_GEN4_ERRMSK3	(0x41A21C)
+
+#define ADF_GEN4_VFLNOTIFY	BIT(7)
+
 void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
 void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
 int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
-- 
GitLab


From e5745f34113b758b45d134dec04a7df94dc67131 Mon Sep 17 00:00:00 2001
From: Wojciech Ziemba <wojciech.ziemba@intel.com>
Date: Thu, 10 Feb 2022 13:38:27 +0000
Subject: [PATCH 0494/1586] crypto: qat - enable power management for QAT GEN4

Add support for HW QAT Power Management (PM) feature.
This feature is enabled at init time (1) by sending an admin message to
the firmware, targeting the admin AE, that sets the idle time before
the device changes state and (2) by unmasking the PM source of interrupt
in ERRMSK2.

The interrupt handler is extended to handle a PM interrupt which
is triggered by HW when a PM transition occurs. In this case, the
driver responds acknowledging the transaction using the HOST_MSG
mailbox.

Signed-off-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Co-developed-by: Marcinx Malinowski <marcinx.malinowski@intel.com>
Signed-off-by: Marcinx Malinowski <marcinx.malinowski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Marco Chiappero <marco.chiappero@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.c    |  15 +-
 .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.h    |  10 --
 drivers/crypto/qat/qat_common/Makefile        |   1 +
 .../crypto/qat/qat_common/adf_accel_devices.h |   2 +
 drivers/crypto/qat/qat_common/adf_admin.c     |  37 +++++
 .../crypto/qat/qat_common/adf_common_drv.h    |   1 +
 drivers/crypto/qat/qat_common/adf_gen4_pm.c   | 137 ++++++++++++++++++
 drivers/crypto/qat/qat_common/adf_gen4_pm.h   |  44 ++++++
 drivers/crypto/qat/qat_common/adf_init.c      |   6 +
 drivers/crypto/qat/qat_common/adf_isr.c       |  14 ++
 .../qat/qat_common/icp_qat_fw_init_admin.h    |   1 +
 11 files changed, 252 insertions(+), 16 deletions(-)
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_pm.c
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_pm.h

diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 69fb271c85dd6..fb5970a684844 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -6,6 +6,7 @@
 #include <adf_common_drv.h>
 #include <adf_gen4_hw_data.h>
 #include <adf_gen4_pfvf.h>
+#include <adf_gen4_pm.h>
 #include "adf_4xxx_hw_data.h"
 #include "icp_qat_hw.h"
 
@@ -257,18 +258,18 @@ static int adf_init_device(struct adf_accel_dev *accel_dev)
 
 	/* Temporarily mask PM interrupt */
 	csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2);
-	csr |= ADF_4XXX_PM_SOU;
+	csr |= ADF_GEN4_PM_SOU;
 	ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr);
 
 	/* Set DRV_ACTIVE bit to power up the device */
-	ADF_CSR_WR(addr, ADF_4XXX_PM_INTERRUPT, ADF_4XXX_PM_DRV_ACTIVE);
+	ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE);
 
 	/* Poll status register to make sure the device is powered up */
 	ret = read_poll_timeout(ADF_CSR_RD, status,
-				status & ADF_4XXX_PM_INIT_STATE,
-				ADF_4XXX_PM_POLL_DELAY_US,
-				ADF_4XXX_PM_POLL_TIMEOUT_US, true, addr,
-				ADF_4XXX_PM_STATUS);
+				status & ADF_GEN4_PM_INIT_STATE,
+				ADF_GEN4_PM_POLL_DELAY_US,
+				ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr,
+				ADF_GEN4_PM_STATUS);
 	if (ret)
 		dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n");
 
@@ -354,6 +355,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
 	hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
 	hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
+	hw_data->enable_pm = adf_gen4_enable_pm;
+	hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
index 857b93a3c032d..1034752845ca2 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -49,16 +49,6 @@
 #define ADF_4XXX_ADMINMSGLR_OFFSET	(0x500578)
 #define ADF_4XXX_MAILBOX_BASE_OFFSET	(0x600970)
 
-/* Power management */
-#define ADF_4XXX_PM_POLL_DELAY_US	20
-#define ADF_4XXX_PM_POLL_TIMEOUT_US	USEC_PER_SEC
-#define ADF_4XXX_PM_STATUS		(0x50A00C)
-#define ADF_4XXX_PM_INTERRUPT		(0x50A028)
-#define ADF_4XXX_PM_DRV_ACTIVE		BIT(20)
-#define ADF_4XXX_PM_INIT_STATE		BIT(21)
-/* Power management source in ERRSOU2 and ERRMSK2 */
-#define ADF_4XXX_PM_SOU			BIT(18)
-
 /* Firmware Binaries */
 #define ADF_4XXX_FW		"qat_4xxx.bin"
 #define ADF_4XXX_MMP		"qat_4xxx_mmp.bin"
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 7e191a42a5c7e..f25a6c8edfc73 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_hw_arbiter.o \
 	adf_gen2_hw_data.o \
 	adf_gen4_hw_data.o \
+	adf_gen4_pm.o \
 	qat_crypto.o \
 	qat_algs.o \
 	qat_asym_algs.o \
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 2d4cd7c7cf33b..a03c6cf723312 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -184,6 +184,8 @@ struct adf_hw_device_data {
 	void (*exit_arb)(struct adf_accel_dev *accel_dev);
 	const u32 *(*get_arb_mapping)(void);
 	int (*init_device)(struct adf_accel_dev *accel_dev);
+	int (*enable_pm)(struct adf_accel_dev *accel_dev);
+	bool (*handle_pm_interrupt)(struct adf_accel_dev *accel_dev);
 	void (*disable_iov)(struct adf_accel_dev *accel_dev);
 	void (*configure_iov_threads)(struct adf_accel_dev *accel_dev,
 				      bool enable);
diff --git a/drivers/crypto/qat/qat_common/adf_admin.c b/drivers/crypto/qat/qat_common/adf_admin.c
index 498eb6f690e37..3b6184c350811 100644
--- a/drivers/crypto/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/qat/qat_common/adf_admin.c
@@ -251,6 +251,43 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev)
 }
 EXPORT_SYMBOL_GPL(adf_send_admin_init);
 
+/**
+ * adf_init_admin_pm() - Function sends PM init message to FW
+ * @accel_dev: Pointer to acceleration device.
+ * @idle_delay: QAT HW idle time before power gating is initiated.
+ *		000 - 64us
+ *		001 - 128us
+ *		010 - 256us
+ *		011 - 512us
+ *		100 - 1ms
+ *		101 - 2ms
+ *		110 - 4ms
+ *		111 - 8ms
+ *
+ * Function sends to the FW the admin init message for the PM state
+ * configuration.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay)
+{
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+	struct icp_qat_fw_init_admin_resp resp = {0};
+	struct icp_qat_fw_init_admin_req req = {0};
+	u32 ae_mask = hw_data->admin_ae_mask;
+
+	if (!accel_dev->admin) {
+		dev_err(&GET_DEV(accel_dev), "adf_admin is not available\n");
+		return -EFAULT;
+	}
+
+	req.cmd_id = ICP_QAT_FW_PM_STATE_CONFIG;
+	req.idle_filter = idle_delay;
+
+	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+EXPORT_SYMBOL_GPL(adf_init_admin_pm);
+
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
 {
 	struct adf_admin_comms *admin;
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 0775491768797..e8c9b77c0d66b 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -102,6 +102,7 @@ void adf_exit_aer(void);
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev);
 void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
 int adf_send_admin_init(struct adf_accel_dev *accel_dev);
+int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
 void adf_update_ring_arb(struct adf_etr_ring_data *ring);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.c b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
new file mode 100644
index 0000000000000..7037c0892a8a2
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2022 Intel Corporation */
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_gen4_pm.h"
+#include "adf_cfg_strings.h"
+#include "icp_qat_fw_init_admin.h"
+#include "adf_gen4_hw_data.h"
+#include "adf_cfg.h"
+
+enum qat_pm_host_msg {
+	PM_NO_CHANGE = 0,
+	PM_SET_MIN,
+};
+
+struct adf_gen4_pm_data {
+	struct work_struct pm_irq_work;
+	struct adf_accel_dev *accel_dev;
+	u32 pm_int_sts;
+};
+
+static int send_host_msg(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	u32 msg;
+
+	msg = ADF_CSR_RD(pmisc, ADF_GEN4_PM_HOST_MSG);
+	if (msg & ADF_GEN4_PM_MSG_PENDING)
+		return -EBUSY;
+
+	/* Send HOST_MSG */
+	msg = FIELD_PREP(ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK, PM_SET_MIN);
+	msg |= ADF_GEN4_PM_MSG_PENDING;
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_HOST_MSG, msg);
+
+	/* Poll status register to make sure the HOST_MSG has been processed */
+	return read_poll_timeout(ADF_CSR_RD, msg,
+				!(msg & ADF_GEN4_PM_MSG_PENDING),
+				ADF_GEN4_PM_MSG_POLL_DELAY_US,
+				ADF_GEN4_PM_POLL_TIMEOUT_US, true, pmisc,
+				ADF_GEN4_PM_HOST_MSG);
+}
+
+static void pm_bh_handler(struct work_struct *work)
+{
+	struct adf_gen4_pm_data *pm_data =
+		container_of(work, struct adf_gen4_pm_data, pm_irq_work);
+	struct adf_accel_dev *accel_dev = pm_data->accel_dev;
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	u32 pm_int_sts = pm_data->pm_int_sts;
+	u32 val;
+
+	/* PM Idle interrupt */
+	if (pm_int_sts & ADF_GEN4_PM_IDLE_STS) {
+		/* Issue host message to FW */
+		if (send_host_msg(accel_dev))
+			dev_warn_ratelimited(&GET_DEV(accel_dev),
+					     "Failed to send host msg to FW\n");
+	}
+
+	/* Clear interrupt status */
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, pm_int_sts);
+
+	/* Reenable PM interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val &= ~ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	kfree(pm_data);
+}
+
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	struct adf_gen4_pm_data *pm_data = NULL;
+	u32 errsou2;
+	u32 errmsk2;
+	u32 val;
+
+	/* Only handle the interrupt triggered by PM */
+	errmsk2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	if (errmsk2 & ADF_GEN4_PM_SOU)
+		return false;
+
+	errsou2 = ADF_CSR_RD(pmisc, ADF_GEN4_ERRSOU2);
+	if (!(errsou2 & ADF_GEN4_PM_SOU))
+		return false;
+
+	/* Disable interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val |= ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+
+	pm_data = kzalloc(sizeof(*pm_data), GFP_ATOMIC);
+	if (!pm_data)
+		return false;
+
+	pm_data->pm_int_sts = val;
+	pm_data->accel_dev = accel_dev;
+
+	INIT_WORK(&pm_data->pm_irq_work, pm_bh_handler);
+	adf_misc_wq_queue_work(&pm_data->pm_irq_work);
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_handle_pm_interrupt);
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev)
+{
+	void __iomem *pmisc = adf_get_pmisc_base(accel_dev);
+	int ret;
+	u32 val;
+
+	ret = adf_init_admin_pm(accel_dev, ADF_GEN4_PM_DEFAULT_IDLE_FILTER);
+	if (ret)
+		return ret;
+
+	/* Enable default PM interrupts: IDLE, THROTTLE */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_INTERRUPT);
+	val |= ADF_GEN4_PM_INT_EN_DEFAULT;
+
+	/* Clear interrupt status */
+	val |= ADF_GEN4_PM_INT_STS_MASK;
+	ADF_CSR_WR(pmisc, ADF_GEN4_PM_INTERRUPT, val);
+
+	/* Unmask PM Interrupt */
+	val = ADF_CSR_RD(pmisc, ADF_GEN4_ERRMSK2);
+	val &= ~ADF_GEN4_PM_SOU;
+	ADF_CSR_WR(pmisc, ADF_GEN4_ERRMSK2, val);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_enable_pm);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pm.h b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
new file mode 100644
index 0000000000000..f8f8a9ee29e5b
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN4_PM_H
+#define ADF_GEN4_PM_H
+
+#include "adf_accel_devices.h"
+
+/* Power management registers */
+#define ADF_GEN4_PM_HOST_MSG (0x50A01C)
+
+/* Power management */
+#define ADF_GEN4_PM_POLL_DELAY_US	20
+#define ADF_GEN4_PM_POLL_TIMEOUT_US	USEC_PER_SEC
+#define ADF_GEN4_PM_MSG_POLL_DELAY_US	(10 * USEC_PER_MSEC)
+#define ADF_GEN4_PM_STATUS		(0x50A00C)
+#define ADF_GEN4_PM_INTERRUPT		(0x50A028)
+
+/* Power management source in ERRSOU2 and ERRMSK2 */
+#define ADF_GEN4_PM_SOU			BIT(18)
+
+#define ADF_GEN4_PM_IDLE_INT_EN		BIT(18)
+#define ADF_GEN4_PM_THROTTLE_INT_EN	BIT(19)
+#define ADF_GEN4_PM_DRV_ACTIVE		BIT(20)
+#define ADF_GEN4_PM_INIT_STATE		BIT(21)
+#define ADF_GEN4_PM_INT_EN_DEFAULT	(ADF_GEN4_PM_IDLE_INT_EN | \
+					ADF_GEN4_PM_THROTTLE_INT_EN)
+
+#define ADF_GEN4_PM_THR_STS	BIT(0)
+#define ADF_GEN4_PM_IDLE_STS	BIT(1)
+#define ADF_GEN4_PM_FW_INT_STS	BIT(2)
+#define ADF_GEN4_PM_INT_STS_MASK (ADF_GEN4_PM_THR_STS | \
+				 ADF_GEN4_PM_IDLE_STS | \
+				 ADF_GEN4_PM_FW_INT_STS)
+
+#define ADF_GEN4_PM_MSG_PENDING			BIT(0)
+#define ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK	GENMASK(28, 1)
+
+#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER		(0x0)
+#define ADF_GEN4_PM_MAX_IDLE_FILTER		(0x7)
+
+int adf_gen4_enable_pm(struct adf_accel_dev *accel_dev);
+bool adf_gen4_handle_pm_interrupt(struct adf_accel_dev *accel_dev);
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 2edc63c6b6caa..c2c718f1b4895 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -181,6 +181,12 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
 	if (hw_data->set_ssm_wdtimer)
 		hw_data->set_ssm_wdtimer(accel_dev);
 
+	/* Enable Power Management */
+	if (hw_data->enable_pm && hw_data->enable_pm(accel_dev)) {
+		dev_err(&GET_DEV(accel_dev), "Failed to configure Power Management\n");
+		return -EFAULT;
+	}
+
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
 		if (service->event_hld(accel_dev, ADF_EVENT_START)) {
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index 803b89ba9670c..a35149f8bf1ee 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -124,6 +124,17 @@ static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev)
 }
 #endif /* CONFIG_PCI_IOV */
 
+static bool adf_handle_pm_int(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+	if (hw_data->handle_pm_interrupt &&
+	    hw_data->handle_pm_interrupt(accel_dev))
+		return true;
+
+	return false;
+}
+
 static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
 {
 	struct adf_accel_dev *accel_dev = dev_ptr;
@@ -134,6 +145,9 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
 		return IRQ_HANDLED;
 #endif /* CONFIG_PCI_IOV */
 
+	if (adf_handle_pm_int(accel_dev))
+		return IRQ_HANDLED;
+
 	dev_dbg(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n",
 		accel_dev->accel_id);
 
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
index afe59a7684ac5..56cb827f93ea3 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_init_admin.h
@@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id {
 	ICP_QAT_FW_HEARTBEAT_SYNC = 7,
 	ICP_QAT_FW_HEARTBEAT_GET = 8,
 	ICP_QAT_FW_COMP_CAPABILITY_GET = 9,
+	ICP_QAT_FW_PM_STATE_CONFIG = 128,
 };
 
 enum icp_qat_fw_init_admin_resp_status {
-- 
GitLab


From 882f6c602b65cd384bec2cea4fbfc091a7bbfc50 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Thu, 10 Feb 2022 21:28:03 +0100
Subject: [PATCH 0495/1586] crypto: omap-aes - Constify static attribute_group

The only usage of omap_aes_attr_group is to pass its address to
sysfs_{create,remove}_group(), which takes pointers to const struct
attribute_group. Make it const to allow the compiler to put it in
read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-aes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index a196bb8b17010..581211a926283 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1093,7 +1093,7 @@ static struct attribute *omap_aes_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group omap_aes_attr_group = {
+static const struct attribute_group omap_aes_attr_group = {
 	.attrs = omap_aes_attrs,
 };
 
-- 
GitLab


From 83b5a23b6604028ef635a72465b23a85a425b695 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Thu, 10 Feb 2022 21:28:04 +0100
Subject: [PATCH 0496/1586] crypto: omap-sham - Constify static attribute_group

The only usage of omap_sham_attr_group is to pass its address to
sysfs_{create,remove}_group(), which takes pointers to const struct
attribute_group. Make it const to allow the compiler to put it in
read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-sham.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index f6bf53c00b614..4b37dc69a50ce 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2045,7 +2045,7 @@ static struct attribute *omap_sham_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group omap_sham_attr_group = {
+static const struct attribute_group omap_sham_attr_group = {
 	.attrs = omap_sham_attrs,
 };
 
-- 
GitLab


From bd75b4ef4977f567c7a567cf8f48dc122a097aa9 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Thu, 10 Feb 2022 21:28:05 +0100
Subject: [PATCH 0497/1586] crypto: nx - Constify static attribute_group
 structs

The only usage of these is to pass their address to
sysfs_{create,remove}_group(), which takes pointers to const struct
attribute_group. Make them const to allow the compiler to put them in
read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-common-pseries.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c
index 4e304f6081e47..7584a34ba88c2 100644
--- a/drivers/crypto/nx/nx-common-pseries.c
+++ b/drivers/crypto/nx/nx-common-pseries.c
@@ -962,7 +962,7 @@ static struct attribute *nx842_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group nx842_attribute_group = {
+static const struct attribute_group nx842_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = nx842_sysfs_entries,
 };
@@ -992,7 +992,7 @@ static struct attribute *nxcop_caps_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group nxcop_caps_attr_group = {
+static const struct attribute_group nxcop_caps_attr_group = {
 	.name	=	"nx_gzip_caps",
 	.attrs	=	nxcop_caps_sysfs_entries,
 };
-- 
GitLab


From 142be74078a2cf24f9694093e21216b2d8740d17 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Thu, 10 Feb 2022 21:42:18 +0100
Subject: [PATCH 0498/1586] crypto: ux500 - use GFP_KERNEL

Platform_driver probe functions aren't called with locks held
and thus don't need GFP_ATOMIC. Use GFP_KERNEL instead.

Problem found with Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ux500/cryp/cryp_core.c | 2 +-
 drivers/crypto/ux500/hash/hash_core.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 97277b7150cb4..5a57c9afd8c88 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1264,7 +1264,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 
 	dev_dbg(dev, "[%s]", __func__);
-	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
 	if (!device_data) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 51a6e1a424349..5157c118d642c 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1658,7 +1658,7 @@ static int ux500_hash_probe(struct platform_device *pdev)
 	struct hash_device_data *device_data;
 	struct device		*dev = &pdev->dev;
 
-	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
+	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
 	if (!device_data) {
 		ret = -ENOMEM;
 		goto out;
-- 
GitLab


From aec01cc8d119b453b26da9ba45ec60ac2b395e18 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Feb 2022 17:07:58 +0800
Subject: [PATCH 0499/1586] crypto: hisilicon/sec - add the register
 configuration for HW V3

Added the register configuration of the SVA mode for HW V3.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_main.c | 51 +++++++++++++++++++-----
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 26d3ab1d308ba..45d2b27da9ad3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -90,6 +90,10 @@
 					SEC_USER1_WB_DATA_SSV)
 #define SEC_USER1_SMMU_SVA		(SEC_USER1_SMMU_NORMAL | SEC_USER1_SVA_SET)
 #define SEC_USER1_SMMU_MASK		(~SEC_USER1_SVA_SET)
+#define SEC_INTERFACE_USER_CTRL0_REG_V3	0x302220
+#define SEC_INTERFACE_USER_CTRL1_REG_V3	0x302224
+#define SEC_USER1_SMMU_NORMAL_V3	(BIT(23) | BIT(17) | BIT(11) | BIT(5))
+#define SEC_USER1_SMMU_MASK_V3		0xFF79E79E
 #define SEC_CORE_INT_STATUS_M_ECC	BIT(2)
 
 #define SEC_PREFETCH_CFG		0x301130
@@ -335,6 +339,41 @@ static void sec_set_endian(struct hisi_qm *qm)
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 }
 
+static void sec_engine_sva_config(struct hisi_qm *qm)
+{
+	u32 reg;
+
+	if (qm->ver > QM_HW_V2) {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG_V3);
+
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+		reg &= SEC_USER1_SMMU_MASK_V3;
+		reg |= SEC_USER1_SMMU_NORMAL_V3;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG_V3);
+	} else {
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg |= SEC_USER0_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL0_REG);
+		reg = readl_relaxed(qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+		reg &= SEC_USER1_SMMU_MASK;
+		if (qm->use_sva)
+			reg |= SEC_USER1_SMMU_SVA;
+		else
+			reg |= SEC_USER1_SMMU_NORMAL;
+		writel_relaxed(reg, qm->io_base +
+				SEC_INTERFACE_USER_CTRL1_REG);
+	}
+}
+
 static void sec_open_sva_prefetch(struct hisi_qm *qm)
 {
 	u32 val;
@@ -426,17 +465,7 @@ static int sec_engine_init(struct hisi_qm *qm)
 	reg |= (0x1 << SEC_TRNG_EN_SHIFT);
 	writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
 
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-	reg |= SEC_USER0_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL0_REG);
-
-	reg = readl_relaxed(qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
-	reg &= SEC_USER1_SMMU_MASK;
-	if (qm->use_sva && qm->ver == QM_HW_V2)
-		reg |= SEC_USER1_SMMU_SVA;
-	else
-		reg |= SEC_USER1_SMMU_NORMAL;
-	writel_relaxed(reg, qm->io_base + SEC_INTERFACE_USER_CTRL1_REG);
+	sec_engine_sva_config(qm);
 
 	writel(SEC_SINGLE_PORT_MAX_TRANS,
 	       qm->io_base + AM_CFG_SINGLE_PORT_MAX_TRANS);
-- 
GitLab


From f8a2652826444d13181061840b96a5d975d5b6c6 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Feb 2022 17:08:18 +0800
Subject: [PATCH 0500/1586] crypto: hisilicon/sec - not need to enable sm4
 extra mode at HW V3

It is not need to enable sm4 extra mode in at HW V3. Here is fix it.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 45d2b27da9ad3..0b9906ff69e3c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -472,9 +472,11 @@ static int sec_engine_init(struct hisi_qm *qm)
 
 	writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
 
-	/* Enable sm4 extra mode, as ctr/ecb */
-	writel_relaxed(SEC_BD_ERR_CHK_EN0,
-		       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+	/* HW V2 enable sm4 extra mode, as ctr/ecb */
+	if (qm->ver < QM_HW_V3)
+		writel_relaxed(SEC_BD_ERR_CHK_EN0,
+			       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
+
 	/* Enable sm4 xts mode multiple iv */
 	writel_relaxed(SEC_BD_ERR_CHK_EN1,
 		       qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
-- 
GitLab


From 973d74e93820d99d8ea203882631c76edab699c9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Fri, 11 Feb 2022 12:16:17 +0000
Subject: [PATCH 0501/1586] crypto: rockchip - ECB does not need IV

When loading rockchip crypto module, testmgr complains that ivsize of ecb-des3-ede-rk
is not the same than generic implementation.
In fact ECB does not use an IV.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 1cece1a7d3f00..5bbf0d2722e11 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 		.exit			= rk_ablk_exit_tfm,
 		.min_keysize		= DES3_EDE_KEY_SIZE,
 		.max_keysize		= DES3_EDE_KEY_SIZE,
-		.ivsize			= DES_BLOCK_SIZE,
 		.setkey			= rk_tdes_setkey,
 		.encrypt		= rk_des3_ede_ecb_encrypt,
 		.decrypt		= rk_des3_ede_ecb_decrypt,
-- 
GitLab


From c90e453916bd4d74297303d000f011cdb47a7d94 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 13 Feb 2022 21:46:28 +0100
Subject: [PATCH 0502/1586] hwrng: core - do not bother to order list of
 devices by quality

There is no real reason why this list needs to be kept ordered by
the driver-provided quality value -- a value which is set only by
a handful of hw_random devices anyway.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 36 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index f327f7493585e..6f09f4e5af20f 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -31,7 +31,7 @@ static struct hwrng *current_rng;
 /* the current rng has been explicitly chosen by user via sysfs */
 static int cur_rng_set_by_user;
 static struct task_struct *hwrng_fill;
-/* list of registered rngs, sorted decending by quality */
+/* list of registered rngs */
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
@@ -297,24 +297,28 @@ static struct miscdevice rng_miscdev = {
 
 static int enable_best_rng(void)
 {
+	struct hwrng *rng, *new_rng = NULL;
 	int ret = -ENODEV;
 
 	BUG_ON(!mutex_is_locked(&rng_mutex));
 
-	/* rng_list is sorted by quality, use the best (=first) one */
-	if (!list_empty(&rng_list)) {
-		struct hwrng *new_rng;
-
-		new_rng = list_entry(rng_list.next, struct hwrng, list);
-		ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
-		if (!ret)
-			cur_rng_set_by_user = 0;
-	} else {
+	/* no rng to use? */
+	if (list_empty(&rng_list)) {
 		drop_current_rng();
 		cur_rng_set_by_user = 0;
-		ret = 0;
+		return 0;
 	}
 
+	/* use the rng which offers the best quality */
+	list_for_each_entry(rng, &rng_list, list) {
+		if (!new_rng || rng->quality > new_rng->quality)
+			new_rng = rng;
+	}
+
+	ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
+	if (!ret)
+		cur_rng_set_by_user = 0;
+
 	return ret;
 }
 
@@ -475,7 +479,6 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *tmp;
-	struct list_head *rng_list_ptr;
 	bool is_new_current = false;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
@@ -489,18 +492,11 @@ int hwrng_register(struct hwrng *rng)
 		if (strcmp(tmp->name, rng->name) == 0)
 			goto out_unlock;
 	}
+	list_add_tail(&rng->list, &rng_list);
 
 	init_completion(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 
-	/* rng_list is sorted by decreasing quality */
-	list_for_each(rng_list_ptr, &rng_list) {
-		tmp = list_entry(rng_list_ptr, struct hwrng, list);
-		if (tmp->quality < rng->quality)
-			break;
-	}
-	list_add_tail(&rng->list, rng_list_ptr);
-
 	if (!current_rng ||
 	    (!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
 		/*
-- 
GitLab


From 077bb7a1baec75bd2d7d08e2cbdeb5b72344a4ad Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 13 Feb 2022 21:46:29 +0100
Subject: [PATCH 0503/1586] hwrng: core - start and stop in-kernel rngd in
 separate function

Extract the start/stop logic for the in-kernel rngd thread to
a separate function.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 6f09f4e5af20f..29febf55b0d46 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -51,7 +51,7 @@ MODULE_PARM_DESC(default_quality,
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
-static void start_khwrngd(void);
+static void hwrng_manage_rngd(void);
 
 static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
 			       int wait);
@@ -164,10 +164,7 @@ skip_init:
 	if (current_quality > 1024)
 		current_quality = 1024;
 
-	if (current_quality == 0 && hwrng_fill)
-		kthread_stop(hwrng_fill);
-	if (current_quality > 0 && !hwrng_fill)
-		start_khwrngd();
+	hwrng_manage_rngd();
 
 	return 0;
 }
@@ -466,12 +463,19 @@ static int hwrng_fillfn(void *unused)
 	return 0;
 }
 
-static void start_khwrngd(void)
+static void hwrng_manage_rngd(void)
 {
-	hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
-	if (IS_ERR(hwrng_fill)) {
-		pr_err("hwrng_fill thread creation failed\n");
-		hwrng_fill = NULL;
+	if (WARN_ON(!mutex_is_locked(&rng_mutex)))
+		return;
+
+	if (current_quality == 0 && hwrng_fill)
+		kthread_stop(hwrng_fill);
+	if (current_quality > 0 && !hwrng_fill) {
+		hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
+		if (IS_ERR(hwrng_fill)) {
+			pr_err("hwrng_fill thread creation failed\n");
+			hwrng_fill = NULL;
+		}
 	}
 }
 
-- 
GitLab


From f0fb6953b39e015acfecb8b5158642614e6ce364 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 13 Feb 2022 21:46:30 +0100
Subject: [PATCH 0504/1586] hwrng: core - use per-rng quality value instead of
 global setting

The current_quality variable exposed as a module parameter is
fundamentally broken: If it is set at boot time, it is overwritten once
the first hw rng device is loaded; if it is set at runtime, it is
without effect if the hw rng device had its quality value set to 0 (and
no default_quality was set); and if a new rng is selected, it gets
overwritten. Therefore, mark it as obsolete, and replace it by the
per-rng quality setting.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 29febf55b0d46..8df102b39b35d 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -44,14 +44,14 @@ static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
-		 "current hwrng entropy estimation per 1024 bits of input");
+		 "current hwrng entropy estimation per 1024 bits of input -- obsolete");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
 		 "default entropy content of hwrng per 1024 bits of input");
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
-static void hwrng_manage_rngd(void);
+static void hwrng_manage_rngd(struct hwrng *rng);
 
 static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
 			       int wait);
@@ -160,11 +160,13 @@ static int hwrng_init(struct hwrng *rng)
 	reinit_completion(&rng->cleanup_done);
 
 skip_init:
-	current_quality = rng->quality ? : default_quality;
-	if (current_quality > 1024)
-		current_quality = 1024;
+	if (!rng->quality)
+		rng->quality = default_quality;
+	if (rng->quality > 1024)
+		rng->quality = 1024;
+	current_quality = rng->quality; /* obsolete */
 
-	hwrng_manage_rngd();
+	hwrng_manage_rngd(rng);
 
 	return 0;
 }
@@ -429,19 +431,24 @@ static int hwrng_fillfn(void *unused)
 	long rc;
 
 	while (!kthread_should_stop()) {
+		unsigned short quality;
 		struct hwrng *rng;
 
-		if (!current_quality)
-			break;
-
 		rng = get_current_rng();
 		if (IS_ERR(rng) || !rng)
 			break;
 		mutex_lock(&reading_mutex);
 		rc = rng_get_data(rng, rng_fillbuf,
 				  rng_buffer_size(), 1);
+		if (current_quality != rng->quality)
+			rng->quality = current_quality; /* obsolete */
+		quality = rng->quality;
 		mutex_unlock(&reading_mutex);
 		put_rng(rng);
+
+		if (!quality)
+			break;
+
 		if (rc <= 0) {
 			pr_warn("hwrng: no data available\n");
 			msleep_interruptible(10000);
@@ -451,7 +458,7 @@ static int hwrng_fillfn(void *unused)
 		/* If we cannot credit at least one bit of entropy,
 		 * keep track of the remainder for the next iteration
 		 */
-		entropy = rc * current_quality * 8 + entropy_credit;
+		entropy = rc * quality * 8 + entropy_credit;
 		if ((entropy >> 10) == 0)
 			entropy_credit = entropy;
 
@@ -463,14 +470,14 @@ static int hwrng_fillfn(void *unused)
 	return 0;
 }
 
-static void hwrng_manage_rngd(void)
+static void hwrng_manage_rngd(struct hwrng *rng)
 {
 	if (WARN_ON(!mutex_is_locked(&rng_mutex)))
 		return;
 
-	if (current_quality == 0 && hwrng_fill)
+	if (rng->quality == 0 && hwrng_fill)
 		kthread_stop(hwrng_fill);
-	if (current_quality > 0 && !hwrng_fill) {
+	if (rng->quality > 0 && !hwrng_fill) {
 		hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
 		if (IS_ERR(hwrng_fill)) {
 			pr_err("hwrng_fill thread creation failed\n");
-- 
GitLab


From 8208285632f950d2bfd489b10148e05134b7119e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 13 Feb 2022 21:46:31 +0100
Subject: [PATCH 0505/1586] hwrng: core - introduce rng_quality sysfs attribute

The rng_quality sysfs attribute returns the quality setting for the
currently active hw_random device, in entropy bits per 1024 bits of
input. Storing a value between 0 and 1024 to this file updates this
estimate accordingly.

Based on the updates to the quality setting, the rngd kernel thread
may be stopped (if no hw_random device is trusted to return entropy),
may be started (if the quality setting is increased from zero), or
may use a different hw_random source (if that has higher quality
output).

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 64 ++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 8df102b39b35d..71eafcc451e12 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -44,7 +44,7 @@ static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
-		 "current hwrng entropy estimation per 1024 bits of input -- obsolete");
+		 "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
 		 "default entropy content of hwrng per 1024 bits of input");
@@ -402,14 +402,76 @@ static ssize_t rng_selected_show(struct device *dev,
 	return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
 }
 
+static ssize_t rng_quality_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret;
+	struct hwrng *rng;
+
+	rng = get_current_rng();
+	if (IS_ERR(rng))
+		return PTR_ERR(rng);
+
+	if (!rng) /* no need to put_rng */
+		return -ENODEV;
+
+	ret = sysfs_emit(buf, "%hu\n", rng->quality);
+	put_rng(rng);
+
+	return ret;
+}
+
+static ssize_t rng_quality_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t len)
+{
+	u16 quality;
+	int ret = -EINVAL;
+
+	if (len < 2)
+		return -EINVAL;
+
+	ret = mutex_lock_interruptible(&rng_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	ret = kstrtou16(buf, 0, &quality);
+	if (ret || quality > 1024) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!current_rng) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	current_rng->quality = quality;
+	current_quality = quality; /* obsolete */
+
+	/* the best available RNG may have changed */
+	ret = enable_best_rng();
+
+	/* start/stop rngd if necessary */
+	if (current_rng)
+		hwrng_manage_rngd(current_rng);
+
+out:
+	mutex_unlock(&rng_mutex);
+	return ret ? ret : len;
+}
+
 static DEVICE_ATTR_RW(rng_current);
 static DEVICE_ATTR_RO(rng_available);
 static DEVICE_ATTR_RO(rng_selected);
+static DEVICE_ATTR_RW(rng_quality);
 
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
 	&dev_attr_rng_selected.attr,
+	&dev_attr_rng_quality.attr,
 	NULL
 };
 
-- 
GitLab


From 43a4b1fee098bd38eed9c334d0e0df221ecdf719 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 29 Jan 2022 09:59:22 +0800
Subject: [PATCH 0506/1586] block, bfq: cleanup bfq_bfqq_to_bfqg()

Use bfq_group() instead, which do the same thing.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20220129015924.3958918-2-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c |  4 ++--
 block/bfq-iosched.h |  1 -
 block/bfq-wf2q.c    | 15 ---------------
 3 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0c612a9116967..2f2b97cad9802 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -774,7 +774,7 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 	if (!bfqq->next_rq)
 		return;
 
-	bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	bfqq->pos_root = &bfqq_group(bfqq)->rq_pos_tree;
 	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
 			blk_rq_pos(bfqq->next_rq), &parent, &p);
 	if (!__bfqq) {
@@ -2669,7 +2669,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
 					 struct bfq_queue *bfqq,
 					 sector_t sector)
 {
-	struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
+	struct rb_root *root = &bfqq_group(bfqq)->rq_pos_tree;
 	struct rb_node *parent, *node;
 	struct bfq_queue *__bfqq;
 
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 72255ec44f8f8..3b83e3d1c2e58 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -1050,7 +1050,6 @@ extern struct blkcg_policy blkcg_policy_bfq;
 	for (parent = NULL; entity ; entity = parent)
 #endif /* CONFIG_BFQ_GROUP_IOSCHED */
 
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
 struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
 unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd);
 struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 709b901de3ca9..f8eb340381cf1 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -142,16 +142,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
-{
-	struct bfq_entity *group_entity = bfqq->entity.parent;
-
-	if (!group_entity)
-		group_entity = &bfqq->bfqd->root_group->entity;
-
-	return container_of(group_entity, struct bfq_group, entity);
-}
-
 /*
  * Returns true if this budget changes may let next_in_service->parent
  * become the next_in_service entity for its parent entity.
@@ -230,11 +220,6 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED */
 
-struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
-{
-	return bfqq->bfqd->root_group;
-}
-
 static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 {
 	return false;
-- 
GitLab


From c5e4cb0fcbbaa5ad853818c4a2383e9bd147fad6 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 29 Jan 2022 09:59:23 +0800
Subject: [PATCH 0507/1586] block, bfq: avoid moving bfqq to it's parent bfqg

Moving bfqq to it's parent bfqg is pointless.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20220129015924.3958918-3-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 24a5c5329bcd0..9783c11561591 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -645,6 +645,14 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		   struct bfq_group *bfqg)
 {
 	struct bfq_entity *entity = &bfqq->entity;
+	struct bfq_group *old_parent = bfqq_group(bfqq);
+
+	/*
+	 * No point to move bfqq to the same group, which can happen when
+	 * root group is offlined
+	 */
+	if (old_parent == bfqg)
+		return;
 
 	/*
 	 * Get extra reference to prevent bfqq from being freed in
@@ -666,7 +674,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
 	else if (entity->on_st_or_in_serv)
 		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
-	bfqg_and_blkg_put(bfqq_group(bfqq));
+	bfqg_and_blkg_put(old_parent);
 
 	if (entity->parent &&
 	    entity->parent->last_bfqq_created == bfqq)
-- 
GitLab


From 8410f70977734f21b8ed45c37e925d311dfda2e7 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 29 Jan 2022 09:59:24 +0800
Subject: [PATCH 0508/1586] block, bfq: don't move oom_bfqq

Our test report a UAF:

[ 2073.019181] ==================================================================
[ 2073.019188] BUG: KASAN: use-after-free in __bfq_put_async_bfqq+0xa0/0x168
[ 2073.019191] Write of size 8 at addr ffff8000ccf64128 by task rmmod/72584
[ 2073.019192]
[ 2073.019196] CPU: 0 PID: 72584 Comm: rmmod Kdump: loaded Not tainted 4.19.90-yk #5
[ 2073.019198] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[ 2073.019200] Call trace:
[ 2073.019203]  dump_backtrace+0x0/0x310
[ 2073.019206]  show_stack+0x28/0x38
[ 2073.019210]  dump_stack+0xec/0x15c
[ 2073.019216]  print_address_description+0x68/0x2d0
[ 2073.019220]  kasan_report+0x238/0x2f0
[ 2073.019224]  __asan_store8+0x88/0xb0
[ 2073.019229]  __bfq_put_async_bfqq+0xa0/0x168
[ 2073.019233]  bfq_put_async_queues+0xbc/0x208
[ 2073.019236]  bfq_pd_offline+0x178/0x238
[ 2073.019240]  blkcg_deactivate_policy+0x1f0/0x420
[ 2073.019244]  bfq_exit_queue+0x128/0x178
[ 2073.019249]  blk_mq_exit_sched+0x12c/0x160
[ 2073.019252]  elevator_exit+0xc8/0xd0
[ 2073.019256]  blk_exit_queue+0x50/0x88
[ 2073.019259]  blk_cleanup_queue+0x228/0x3d8
[ 2073.019267]  null_del_dev+0xfc/0x1e0 [null_blk]
[ 2073.019274]  null_exit+0x90/0x114 [null_blk]
[ 2073.019278]  __arm64_sys_delete_module+0x358/0x5a0
[ 2073.019282]  el0_svc_common+0xc8/0x320
[ 2073.019287]  el0_svc_handler+0xf8/0x160
[ 2073.019290]  el0_svc+0x10/0x218
[ 2073.019291]
[ 2073.019294] Allocated by task 14163:
[ 2073.019301]  kasan_kmalloc+0xe0/0x190
[ 2073.019305]  kmem_cache_alloc_node_trace+0x1cc/0x418
[ 2073.019308]  bfq_pd_alloc+0x54/0x118
[ 2073.019313]  blkcg_activate_policy+0x250/0x460
[ 2073.019317]  bfq_create_group_hierarchy+0x38/0x110
[ 2073.019321]  bfq_init_queue+0x6d0/0x948
[ 2073.019325]  blk_mq_init_sched+0x1d8/0x390
[ 2073.019330]  elevator_switch_mq+0x88/0x170
[ 2073.019334]  elevator_switch+0x140/0x270
[ 2073.019338]  elv_iosched_store+0x1a4/0x2a0
[ 2073.019342]  queue_attr_store+0x90/0xe0
[ 2073.019348]  sysfs_kf_write+0xa8/0xe8
[ 2073.019351]  kernfs_fop_write+0x1f8/0x378
[ 2073.019359]  __vfs_write+0xe0/0x360
[ 2073.019363]  vfs_write+0xf0/0x270
[ 2073.019367]  ksys_write+0xdc/0x1b8
[ 2073.019371]  __arm64_sys_write+0x50/0x60
[ 2073.019375]  el0_svc_common+0xc8/0x320
[ 2073.019380]  el0_svc_handler+0xf8/0x160
[ 2073.019383]  el0_svc+0x10/0x218
[ 2073.019385]
[ 2073.019387] Freed by task 72584:
[ 2073.019391]  __kasan_slab_free+0x120/0x228
[ 2073.019394]  kasan_slab_free+0x10/0x18
[ 2073.019397]  kfree+0x94/0x368
[ 2073.019400]  bfqg_put+0x64/0xb0
[ 2073.019404]  bfqg_and_blkg_put+0x90/0xb0
[ 2073.019408]  bfq_put_queue+0x220/0x228
[ 2073.019413]  __bfq_put_async_bfqq+0x98/0x168
[ 2073.019416]  bfq_put_async_queues+0xbc/0x208
[ 2073.019420]  bfq_pd_offline+0x178/0x238
[ 2073.019424]  blkcg_deactivate_policy+0x1f0/0x420
[ 2073.019429]  bfq_exit_queue+0x128/0x178
[ 2073.019433]  blk_mq_exit_sched+0x12c/0x160
[ 2073.019437]  elevator_exit+0xc8/0xd0
[ 2073.019440]  blk_exit_queue+0x50/0x88
[ 2073.019443]  blk_cleanup_queue+0x228/0x3d8
[ 2073.019451]  null_del_dev+0xfc/0x1e0 [null_blk]
[ 2073.019459]  null_exit+0x90/0x114 [null_blk]
[ 2073.019462]  __arm64_sys_delete_module+0x358/0x5a0
[ 2073.019467]  el0_svc_common+0xc8/0x320
[ 2073.019471]  el0_svc_handler+0xf8/0x160
[ 2073.019474]  el0_svc+0x10/0x218
[ 2073.019475]
[ 2073.019479] The buggy address belongs to the object at ffff8000ccf63f00
 which belongs to the cache kmalloc-1024 of size 1024
[ 2073.019484] The buggy address is located 552 bytes inside of
 1024-byte region [ffff8000ccf63f00, ffff8000ccf64300)
[ 2073.019486] The buggy address belongs to the page:
[ 2073.019492] page:ffff7e000333d800 count:1 mapcount:0 mapping:ffff8000c0003a00 index:0x0 compound_mapcount: 0
[ 2073.020123] flags: 0x7ffff0000008100(slab|head)
[ 2073.020403] raw: 07ffff0000008100 ffff7e0003334c08 ffff7e00001f5a08 ffff8000c0003a00
[ 2073.020409] raw: 0000000000000000 00000000001c001c 00000001ffffffff 0000000000000000
[ 2073.020411] page dumped because: kasan: bad access detected
[ 2073.020412]
[ 2073.020414] Memory state around the buggy address:
[ 2073.020420]  ffff8000ccf64000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 2073.020424]  ffff8000ccf64080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 2073.020428] >ffff8000ccf64100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 2073.020430]                                   ^
[ 2073.020434]  ffff8000ccf64180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 2073.020438]  ffff8000ccf64200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 2073.020439] ==================================================================

The same problem exist in mainline as well.

This is because oom_bfqq is moved to a non-root group, thus root_group
is freed earlier.

Thus fix the problem by don't move oom_bfqq.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20220129015924.3958918-4-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 9783c11561591..420eda2589c0e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -654,6 +654,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	if (old_parent == bfqg)
 		return;
 
+	/*
+	 * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
+	 * until elevator exit.
+	 */
+	if (bfqq == &bfqd->oom_bfqq)
+		return;
 	/*
 	 * Get extra reference to prevent bfqq from being freed in
 	 * next possible expire or deactivate.
-- 
GitLab


From 07025ceaac9f4f7a9e1a3285c3216469bf066320 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Thu, 17 Feb 2022 09:00:24 +0800
Subject: [PATCH 0509/1586] spi: clean up some inconsistent indenting

Eliminate the follow smatch warning:
drivers/spi/spi-sunplus-sp7021.c:379 sp7021_spi_slave_transfer_one()
warn: inconsistent indenting

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220217010024.111904-1-yang.lee@linux.alibaba.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sunplus-sp7021.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-sunplus-sp7021.c b/drivers/spi/spi-sunplus-sp7021.c
index ade7a0fca8cb3..f989f7b992961 100644
--- a/drivers/spi/spi-sunplus-sp7021.c
+++ b/drivers/spi/spi-sunplus-sp7021.c
@@ -376,8 +376,8 @@ static int sp7021_spi_slave_transfer_one(struct spi_controller *ctlr, struct spi
 					      xfer->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, xfer->tx_dma))
 			return -ENOMEM;
-		 ret = sp7021_spi_slave_tx(spi, xfer);
-		 dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
+		ret = sp7021_spi_slave_tx(spi, xfer);
+		dma_unmap_single(dev, xfer->tx_dma, xfer->len, DMA_TO_DEVICE);
 	} else if (xfer->rx_buf && !xfer->tx_buf) {
 		xfer->rx_dma = dma_map_single(dev, xfer->rx_buf, xfer->len,
 					      DMA_FROM_DEVICE);
-- 
GitLab


From 043786303b175977e515d4e99cf6b5f886b136dc Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 18 Feb 2022 14:58:35 +0100
Subject: [PATCH 0510/1586] spi: use sysfs_emit() for printing statistics and
 add trailing newline

Use dedicated function sysfs_emit() that does some extra checking,
e.g. to ensure that no more than PAGESIZE bytes are written.
In addition add a trailing newline to the output, that makes it
better readable from the console.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/56e1588d-d53b-73e9-fdc8-7fe30bf91f11@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 85f8ae4cc0c0d..cd4dc3131e17f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -143,7 +143,7 @@ static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \
 	unsigned long flags;						\
 	ssize_t len;							\
 	spin_lock_irqsave(&stat->lock, flags);				\
-	len = sprintf(buf, format_string, stat->field);			\
+	len = sysfs_emit(buf, format_string "\n", stat->field);		\
 	spin_unlock_irqrestore(&stat->lock, flags);			\
 	return len;							\
 }									\
-- 
GitLab


From 5ea33af9d430cd1dbfada1b839e0d317ed77bfac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Thu, 17 Feb 2022 15:21:29 +0100
Subject: [PATCH 0511/1586] selinux: drop return statement at end of void
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those return statements at the end of a void function are redundant.

Reported by clang-tidy [readability-redundant-control-flow]

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c          | 2 --
 security/selinux/ss/conditional.c | 2 --
 security/selinux/ss/ebitmap.c     | 1 -
 security/selinux/ss/mls.c         | 1 -
 security/selinux/ss/services.c    | 2 --
 5 files changed, 8 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index dafabb4dcc640..1e69f88eb326a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3284,8 +3284,6 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
 	isec->sid = newsid;
 	isec->initialized = LABEL_INITIALIZED;
 	spin_unlock(&isec->lock);
-
-	return;
 }
 
 static int selinux_inode_getxattr(struct dentry *dentry, const char *name)
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 2ec6e5cd25d9b..c46c419af512e 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -566,8 +566,6 @@ void cond_compute_xperms(struct avtab *ctab, struct avtab_key *key,
 		if (node->key.specified & AVTAB_ENABLED)
 			services_compute_xperms_decision(xpermd, node);
 	}
-	return;
-
 }
 /* Determine whether additional permissions are granted by the conditional
  * av table, and if so, add them to the result
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 61fcbb8d0f880..abde349c83217 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -359,7 +359,6 @@ void ebitmap_destroy(struct ebitmap *e)
 
 	e->highbit = 0;
 	e->node = NULL;
-	return;
 }
 
 int ebitmap_read(struct ebitmap *e, void *fp)
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 3f5fd124342c9..99571b19d4a9e 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -156,7 +156,6 @@ void mls_sid_to_context(struct policydb *p,
 	}
 
 	*scontext = scontextp;
-	return;
 }
 
 int mls_level_isvalid(struct policydb *p, struct mls_level *l)
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 2f8db93e53b29..6901dc07680de 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -529,8 +529,6 @@ out:
 	/* release scontext/tcontext */
 	kfree(tcontext_name);
 	kfree(scontext_name);
-
-	return;
 }
 
 /*
-- 
GitLab


From b97df7c098c531010e445da88d02b7bf7bf59ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Thu, 17 Feb 2022 15:21:25 +0100
Subject: [PATCH 0512/1586] selinux: use correct type for context length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

security_sid_to_context() expects a pointer to an u32 as the address
where to store the length of the computed context.

Reported by sparse:

    security/selinux/xfrm.c:359:39: warning: incorrect type in arg 4
                                    (different signedness)
    security/selinux/xfrm.c:359:39:    expected unsigned int
                                       [usertype] *scontext_len
    security/selinux/xfrm.c:359:39:    got int *

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
[PM: wrapped commit description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/xfrm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 90697317895fb..c576832febc67 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
 	int rc;
 	struct xfrm_sec_ctx *ctx;
 	char *ctx_str = NULL;
-	int str_len;
+	u32 str_len;
 
 	if (!polsec)
 		return 0;
-- 
GitLab


From ad2f3b08d1ab17e481df753aae221f085fe1cc8a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Feb 2022 21:05:33 +0100
Subject: [PATCH 0513/1586] ACPI: scan: Use ida_alloc() instead of
 ida_simple_get()

As recommended in include/linux/idr.h, use ida_alloc() instead of
ida_simple_get() for creating unique device object names and for
symmetry replace ida_simple_remove() with ida_free() (and fix up
the related overly long code line while at it).

Also drop the ACPI_MAX_DEVICE_INSTANCES limit that is not necessary
any more and may not be sufficient for future platforms.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/acpi/internal.h | 2 --
 drivers/acpi/scan.c     | 5 +++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 457e11d851b8d..628bf8f181304 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -96,8 +96,6 @@ void acpi_scan_table_notify(void);
 
 extern struct list_head acpi_bus_id_list;
 
-#define ACPI_MAX_DEVICE_INSTANCES	4096
-
 struct acpi_device_bus_id {
 	const char *bus_id;
 	struct ida instance_ida;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1331756d4cfce..5fa34f5316f11 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -477,7 +477,8 @@ static void acpi_device_del(struct acpi_device *device)
 	list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node)
 		if (!strcmp(acpi_device_bus_id->bus_id,
 			    acpi_device_hid(device))) {
-			ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no);
+			ida_free(&acpi_device_bus_id->instance_ida,
+				 device->pnp.instance_no);
 			if (ida_is_empty(&acpi_device_bus_id->instance_ida)) {
 				list_del(&acpi_device_bus_id->node);
 				kfree_const(acpi_device_bus_id->bus_id);
@@ -642,7 +643,7 @@ static int acpi_device_set_name(struct acpi_device *device,
 	struct ida *instance_ida = &acpi_device_bus_id->instance_ida;
 	int result;
 
-	result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL);
+	result = ida_alloc(instance_ida, GFP_KERNEL);
 	if (result < 0)
 		return result;
 
-- 
GitLab


From 9978f446d406a7dae080ca4a682bac927c9b8773 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Feb 2022 16:53:18 +0200
Subject: [PATCH 0514/1586] ACPI: property: Get rid of redundant 'else'

In the snippets like the following

	if (...)
		return / goto / break / continue ...;
	else
		...

the 'else' is redundant. Get rid of it.

While at it, replace conditional '<= 0' for unsigned type by '== 0'
in acpi_data_prop_read(); update comment in the
__acpi_node_get_property_reference() on how we parse the reference.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 3fceb4681ec9f..12bbfe8336095 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -541,7 +541,8 @@ acpi_device_data_of_node(const struct fwnode_handle *fwnode)
 	if (is_acpi_device_node(fwnode)) {
 		const struct acpi_device *adev = to_acpi_device_node(fwnode);
 		return &adev->data;
-	} else if (is_acpi_data_node(fwnode)) {
+	}
+	if (is_acpi_data_node(fwnode)) {
 		const struct acpi_data_node *dn = to_acpi_data_node(fwnode);
 		return &dn->data;
 	}
@@ -739,14 +740,19 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 					return -EINVAL;
 			}
 
-			/* assume following integer elements are all args */
+			/*
+			 * Assume the following integer elements are all args.
+			 * Stop counting on the first reference or end of the
+			 * package arguments. In case of neither reference,
+			 * nor integer, return an error, we can't parse it.
+			 */
 			for (i = 0; element + i < end && i < num_args; i++) {
 				int type = element[i].type;
 
+				if (type == ACPI_TYPE_LOCAL_REFERENCE)
+					break;
 				if (type == ACPI_TYPE_INTEGER)
 					nargs++;
-				else if (type == ACPI_TYPE_LOCAL_REFERENCE)
-					break;
 				else
 					return -EINVAL;
 			}
@@ -950,7 +956,7 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
 
 	if (proptype != DEV_PROP_STRING && nval > obj->package.count)
 		return -EOVERFLOW;
-	else if (nval <= 0)
+	if (nval == 0)
 		return -EINVAL;
 
 	items = obj->package.elements;
@@ -1012,14 +1018,10 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 	const struct list_head *head;
 	struct list_head *next;
 
-	if (!child || is_acpi_device_node(child)) {
+	if ((!child || is_acpi_device_node(child)) && adev) {
 		struct acpi_device *child_adev;
 
-		if (adev)
-			head = &adev->children;
-		else
-			goto nondev;
-
+		head = &adev->children;
 		if (list_empty(head))
 			goto nondev;
 
@@ -1089,7 +1091,8 @@ acpi_node_get_parent(const struct fwnode_handle *fwnode)
 	if (is_acpi_data_node(fwnode)) {
 		/* All data nodes have parent pointer so just return that */
 		return to_acpi_data_node(fwnode)->parent;
-	} else if (is_acpi_device_node(fwnode)) {
+	}
+	if (is_acpi_device_node(fwnode)) {
 		struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
 
 		if (dev)
-- 
GitLab


From 509853f9e1e7b1490dc79f735a5dbafc9298f40d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 Feb 2022 19:14:54 +0100
Subject: [PATCH 0515/1586] genirq: Provide generic_handle_irq_safe()

Provide generic_handle_irq_safe() which can used from any context.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20220211181500.1856198-2-bigeasy@linutronix.de
---
 include/linux/irqdesc.h |  1 +
 kernel/irq/irqdesc.c    | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 93d270ca0c567..a77584593f7d1 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 
 int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
 
 #ifdef CONFIG_IRQ_DOMAIN
 /*
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2267e6527db3c..346d283d2da14 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -662,6 +662,29 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ *			     context.
+ * @irq:	The irq number to handle
+ *
+ * Returns:	0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = handle_irq_desc(irq_to_desc(irq));
+	local_irq_restore(flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
+
 #ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
-- 
GitLab


From 91d7b75a5888c1824c87bded439db6e3e3ad7e3c Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Fri, 21 Jan 2022 21:07:34 +0100
Subject: [PATCH 0516/1586] m68k: Add asm/config.h

To avoid 'warning: no previous prototype for' errors, declare all the
parse_bootinfo and config function prototypes into asm/config.h and
include it in arch/m68k/kernel/setup_mm.c and arch/m68k/*/config.c.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Link: https://lore.kernel.org/r/20220121200738.2577697-2-laurent@vivier.eu
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c       |  1 +
 arch/m68k/apollo/config.c      |  1 +
 arch/m68k/atari/config.c       |  1 +
 arch/m68k/bvme6000/config.c    |  1 +
 arch/m68k/hp300/config.c       |  1 +
 arch/m68k/include/asm/config.h | 33 +++++++++++++++++++++++++++++++++
 arch/m68k/kernel/setup_mm.c    | 23 +----------------------
 arch/m68k/mac/config.c         |  1 +
 arch/m68k/mvme147/config.c     |  1 +
 arch/m68k/mvme16x/config.c     |  1 +
 arch/m68k/q40/config.c         |  1 +
 11 files changed, 43 insertions(+), 22 deletions(-)
 create mode 100644 arch/m68k/include/asm/config.h

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index be2dfab48fd42..3137b45750dfc 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -37,6 +37,7 @@
 #include <asm/irq.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
+#include <asm/config.h>
 
 static unsigned long amiga_model;
 
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 581a5f68d1029..42a8b8e2b6642 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -16,6 +16,7 @@
 #include <asm/apollohw.h>
 #include <asm/irq.h>
 #include <asm/machdep.h>
+#include <asm/config.h>
 
 u_long sio01_physaddr;
 u_long sio23_physaddr;
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 261a0f57cc9ac..38a7c05781059 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -46,6 +46,7 @@
 #include <asm/machdep.h>
 #include <asm/hwtest.h>
 #include <asm/io.h>
+#include <asm/config.h>
 
 u_long atari_mch_cookie;
 EXPORT_SYMBOL(atari_mch_cookie);
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 0c6feafbbd110..9b060d466e036 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -36,6 +36,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/bvme6000hw.h>
+#include <asm/config.h>
 
 static void bvme6000_get_model(char *model);
 extern void bvme6000_sched_init(void);
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index ce1eb3d3d55d2..2c92843397c34 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -22,6 +22,7 @@
 #include <asm/blinken.h>
 #include <asm/io.h>                               /* readb() and writeb() */
 #include <asm/hp300hw.h>
+#include <asm/config.h>
 
 #include "time.h"
 
diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h
new file mode 100644
index 0000000000000..e73ffa23c4f56
--- /dev/null
+++ b/arch/m68k/include/asm/config.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file contains prototypes provided by each m68k machine
+ * to parse bootinfo data structures and to configure the machine
+ */
+
+#ifndef _M68K_CONFIG_H
+#define _M68K_CONFIG_H
+
+extern int amiga_parse_bootinfo(const struct bi_record *record);
+extern int apollo_parse_bootinfo(const struct bi_record *record);
+extern int atari_parse_bootinfo(const struct bi_record *record);
+extern int bvme6000_parse_bootinfo(const struct bi_record *record);
+extern int hp300_parse_bootinfo(const struct bi_record *record);
+extern int mac_parse_bootinfo(const struct bi_record *record);
+extern int mvme147_parse_bootinfo(const struct bi_record *record);
+extern int mvme16x_parse_bootinfo(const struct bi_record *record);
+extern int q40_parse_bootinfo(const struct bi_record *record);
+
+extern void config_amiga(void);
+extern void config_apollo(void);
+extern void config_atari(void);
+extern void config_bvme6000(void);
+extern void config_hp300(void);
+extern void config_mac(void);
+extern void config_mvme147(void);
+extern void config_mvme16x(void);
+extern void config_q40(void);
+extern void config_sun3(void);
+extern void config_sun3x(void);
+
+#endif /* _M68K_CONFIG_H */
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 49e573b943268..8228275aae3e8 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -47,6 +47,7 @@
 #endif
 #include <asm/macintosh.h>
 #include <asm/natfeat.h>
+#include <asm/config.h>
 
 #if !FPSTATESIZE || !NR_IRQS
 #warning No CPU/platform type selected, your kernel will not work!
@@ -113,28 +114,6 @@ EXPORT_SYMBOL(isa_type);
 EXPORT_SYMBOL(isa_sex);
 #endif
 
-extern int amiga_parse_bootinfo(const struct bi_record *);
-extern int atari_parse_bootinfo(const struct bi_record *);
-extern int mac_parse_bootinfo(const struct bi_record *);
-extern int q40_parse_bootinfo(const struct bi_record *);
-extern int bvme6000_parse_bootinfo(const struct bi_record *);
-extern int mvme16x_parse_bootinfo(const struct bi_record *);
-extern int mvme147_parse_bootinfo(const struct bi_record *);
-extern int hp300_parse_bootinfo(const struct bi_record *);
-extern int apollo_parse_bootinfo(const struct bi_record *);
-
-extern void config_amiga(void);
-extern void config_atari(void);
-extern void config_mac(void);
-extern void config_sun3(void);
-extern void config_apollo(void);
-extern void config_mvme147(void);
-extern void config_mvme16x(void);
-extern void config_bvme6000(void);
-extern void config_hp300(void);
-extern void config_q40(void);
-extern void config_sun3x(void);
-
 #define MASK_256K 0xfffc0000
 
 extern void paging_init(void);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 5d16f9b47aa90..65d124ec80bb6 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -47,6 +47,7 @@
 #include <asm/mac_via.h>
 #include <asm/mac_oss.h>
 #include <asm/mac_psc.h>
+#include <asm/config.h>
 
 /* Mac bootinfo struct */
 struct mac_booter_data mac_bi_data;
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index dfd6202fd403e..b96ea7c76a197 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -34,6 +34,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/mvme147hw.h>
+#include <asm/config.h>
 
 
 static void mvme147_get_model(char *model);
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index b4422c2dfbbf4..88cbdc10925b9 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -37,6 +37,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/mvme16xhw.h>
+#include <asm/config.h>
 
 extern t_bdid mvme_bdid;
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 5caf1e5be1c2b..9237243077cee 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -34,6 +34,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/q40_master.h>
+#include <asm/config.h>
 
 extern void q40_init_IRQ(void);
 static void q40_get_model(char *model);
-- 
GitLab


From c4d5b6eef2581dd893bea4edb374e480e5a90ce6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 28 Jan 2022 18:30:06 +0100
Subject: [PATCH 0517/1586] m68k: mm: Remove check for VM_IO to fix deferred
 I/O

When an application accesses a mapped frame buffer backed by deferred
I/O, it receives a segmentation fault.  Fix this by removing the check
for VM_IO in do_page_fault().

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Link: https://lore.kernel.org/r/20220128173006.1713210-1-geert@linux-m68k.org
---
 arch/m68k/mm/fault.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 1493cf5eac1e7..71aa9f6315dc8 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -93,8 +93,6 @@ retry:
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto map_err;
-	if (vma->vm_flags & VM_IO)
-		goto acc_err;
 	if (vma->vm_start <= address)
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
-- 
GitLab


From b252ada293d5d30566121c61fa7552e74396d533 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 26 Jan 2022 12:26:05 +0100
Subject: [PATCH 0518/1586] dt-bindings: mtd: spi-nor: Allow two CS per device

The Xilinx QSPI controller has two advanced modes which allow the
controller to behave differently and consider two flashes as one single
storage.

One of these two modes is quite complex to support from a binding point
of view and is the dual parallel memories. In this mode, each byte of
data is stored in both devices: the even bits in one, the odd bits in
the other. The split is automatically handled by the QSPI controller and
is transparent for the user.

The other mode is simpler to support, it is called dual stacked
memories. The controller shares the same SPI bus but each of the devices
contain half of the data. Once in this mode, the controller does not
follow CS requests but instead internally wires the two CS levels with
the value of the most significant address bit.

Supporting these two modes will involve core changes which include the
possibility of providing two CS for a single SPI device

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Pratyush Yadav <p.yadav@ti.com>
Link: https://lore.kernel.org/r/20220126112608.955728-2-miquel.raynal@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
index 39421f7233e4c..4abfb4cfc1571 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
@@ -47,7 +47,8 @@ properties:
       identified by the JEDEC READ ID opcode (0x9F).
 
   reg:
-    maxItems: 1
+    minItems: 1
+    maxItems: 2
 
   spi-max-frequency: true
   spi-rx-bus-width: true
-- 
GitLab


From e2edd1b64f1c79e8abda365149ed62a2a9a494b4 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 26 Jan 2022 12:26:06 +0100
Subject: [PATCH 0519/1586] spi: dt-bindings: Describe stacked/parallel
 memories modes

Describe two new memories modes:
- A stacked mode when the bus is common but the address space extended
  with an additinals wires.
- A parallel mode with parallel busses accessing parallel flashes where
  the data is spread.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Pratyush Yadav <p.yadav@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220126112608.955728-3-miquel.raynal@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/spi-peripheral-props.yaml    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
index 5dd209206e880..fedb7ae98ff6f 100644
--- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
@@ -82,6 +82,31 @@ properties:
     description:
       Delay, in microseconds, after a write transfer.
 
+  stacked-memories:
+    description: Several SPI memories can be wired in stacked mode.
+      This basically means that either a device features several chip
+      selects, or that different devices must be seen as a single
+      bigger chip. This basically doubles (or more) the total address
+      space with only a single additional wire, while still needing
+      to repeat the commands when crossing a chip boundary. The size of
+      each chip should be provided as members of the array.
+    $ref: /schemas/types.yaml#/definitions/uint64-array
+    minItems: 2
+    maxItems: 4
+
+  parallel-memories:
+    description: Several SPI memories can be wired in parallel mode.
+      The devices are physically on a different buses but will always
+      act synchronously as each data word is spread across the
+      different memories (eg. even bits are stored in one memory, odd
+      bits in the other). This basically doubles the address space and
+      the throughput while greatly complexifying the wiring because as
+      many busses as devices must be wired. The size of each chip should
+      be provided as members of the array.
+    $ref: /schemas/types.yaml#/definitions/uint64-array
+    minItems: 2
+    maxItems: 4
+
 # The controller specific properties go here.
 allOf:
   - $ref: cdns,qspi-nor-peripheral-props.yaml#
-- 
GitLab


From eba5368503b4291db7819512600fa014ea17c5a8 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 26 Jan 2022 12:26:07 +0100
Subject: [PATCH 0520/1586] spi: dt-bindings: Add an example with two stacked
 flashes

Provide an example of how to describe two flashes in eg. stacked mode.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220126112608.955728-4-miquel.raynal@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-controller.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 36b72518f5654..0f4d40218400e 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -139,4 +139,11 @@ examples:
             spi-max-frequency = <100000>;
             reg = <1>;
         };
+
+        flash@2 {
+          compatible = "jedec,spi-nor";
+          spi-max-frequency = <50000000>;
+          reg = <2>, <3>;
+          stacked-memories = /bits/ 64 <0x10000000 0x10000000>;
+        };
     };
-- 
GitLab


From c5a3106aa4923bec979c2a76667a493cb5d134fd Mon Sep 17 00:00:00 2001
From: "Minghao Chi (CGEL ZTE)" <chi.minghao@zte.com.cn>
Date: Mon, 21 Feb 2022 02:02:33 +0000
Subject: [PATCH 0521/1586] spi: Use of_device_get_match_data()

Use of_device_get_match_data() to simplify the code.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi (CGEL ZTE) <chi.minghao@zte.com.cn>
Link: https://lore.kernel.org/r/20220221020233.1925154-1-chi.minghao@zte.com.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-lantiq-ssc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index bcb52601804a9..aae26f62ea87a 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -906,17 +906,11 @@ static int lantiq_ssc_probe(struct platform_device *pdev)
 	struct spi_master *master;
 	struct lantiq_ssc_spi *spi;
 	const struct lantiq_ssc_hwcfg *hwcfg;
-	const struct of_device_id *match;
 	u32 id, supports_dma, revision;
 	unsigned int num_cs;
 	int err;
 
-	match = of_match_device(lantiq_ssc_match, dev);
-	if (!match) {
-		dev_err(dev, "no device match\n");
-		return -EINVAL;
-	}
-	hwcfg = match->data;
+	hwcfg = of_device_get_match_data(dev);
 
 	master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi));
 	if (!master)
-- 
GitLab


From 6e8ec2552c7d13991148e551e3325a624d73fac6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 16 Jan 2022 14:23:10 +0100
Subject: [PATCH 0522/1586] random: use computational hash for entropy
 extraction

The current 4096-bit LFSR used for entropy collection had a few
desirable attributes for the context in which it was created. For
example, the state was huge, which meant that /dev/random would be able
to output quite a bit of accumulated entropy before blocking. It was
also, in its time, quite fast at accumulating entropy byte-by-byte,
which matters given the varying contexts in which mix_pool_bytes() is
called. And its diffusion was relatively high, which meant that changes
would ripple across several words of state rather quickly.

However, it also suffers from a few security vulnerabilities. In
particular, inputs learned by an attacker can be undone, but moreover,
if the state of the pool leaks, its contents can be controlled and
entirely zeroed out. I've demonstrated this attack with this SMT2
script, <https://xn--4db.cc/5o9xO8pb>, which Boolector/CaDiCal solves in
a matter of seconds on a single core of my laptop, resulting in little
proof of concept C demonstrators such as <https://xn--4db.cc/jCkvvIaH/c>.

For basically all recent formal models of RNGs, these attacks represent
a significant cryptographic flaw. But how does this manifest
practically? If an attacker has access to the system to such a degree
that he can learn the internal state of the RNG, arguably there are
other lower hanging vulnerabilities -- side-channel, infoleak, or
otherwise -- that might have higher priority. On the other hand, seed
files are frequently used on systems that have a hard time generating
much entropy on their own, and these seed files, being files, often leak
or are duplicated and distributed accidentally, or are even seeded over
the Internet intentionally, where their contents might be recorded or
tampered with. Seen this way, an otherwise quasi-implausible
vulnerability is a bit more practical than initially thought.

Another aspect of the current mix_pool_bytes() function is that, while
its performance was arguably competitive for the time in which it was
created, it's no longer considered so. This patch improves performance
significantly: on a high-end CPU, an i7-11850H, it improves performance
of mix_pool_bytes() by 225%, and on a low-end CPU, a Cortex-A7, it
improves performance by 103%.

This commit replaces the LFSR of mix_pool_bytes() with a straight-
forward cryptographic hash function, BLAKE2s, which is already in use
for pool extraction. Universal hashing with a secret seed was considered
too, something along the lines of <https://eprint.iacr.org/2013/338>,
but the requirement for a secret seed makes for a chicken & egg problem.
Instead we go with a formally proven scheme using a computational hash
function, described in sections 5.1, 6.4, and B.1.8 of
<https://eprint.iacr.org/2019/198>.

BLAKE2s outputs 256 bits, which should give us an appropriate amount of
min-entropy accumulation, and a wide enough margin of collision
resistance against active attacks. mix_pool_bytes() becomes a simple
call to blake2s_update(), for accumulation, while the extraction step
becomes a blake2s_final() to generate a seed, with which we can then do
a HKDF-like or BLAKE2X-like expansion, the first part of which we fold
back as an init key for subsequent blake2s_update()s, and the rest we
produce to the caller. This then is provided to our CRNG like usual. In
that expansion step, we make opportunistic use of 32 bytes of RDRAND
output, just as before. We also always reseed the crng with 32 bytes,
unconditionally, or not at all, rather than sometimes with 16 as before,
as we don't win anything by limiting beyond the 16 byte threshold.

Going for a hash function as an entropy collector is a conservative,
proven approach. The result of all this is a much simpler and much less
bespoke construction than what's there now, which not only plugs a
vulnerability but also improves performance considerably.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 304 ++++++++----------------------------------
 1 file changed, 55 insertions(+), 249 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3404a91edf292..882f78829a24c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -42,61 +42,6 @@
  */
 
 /*
- * (now, with legal B.S. out of the way.....)
- *
- * This routine gathers environmental noise from device drivers, etc.,
- * and returns good random numbers, suitable for cryptographic use.
- * Besides the obvious cryptographic uses, these numbers are also good
- * for seeding TCP sequence numbers, and other places where it is
- * desirable to have numbers which are not only random, but hard to
- * predict by an attacker.
- *
- * Theory of operation
- * ===================
- *
- * Computers are very predictable devices.  Hence it is extremely hard
- * to produce truly random numbers on a computer --- as opposed to
- * pseudo-random numbers, which can easily generated by using a
- * algorithm.  Unfortunately, it is very easy for attackers to guess
- * the sequence of pseudo-random number generators, and for some
- * applications this is not acceptable.  So instead, we must try to
- * gather "environmental noise" from the computer's environment, which
- * must be hard for outside attackers to observe, and use that to
- * generate random numbers.  In a Unix environment, this is best done
- * from inside the kernel.
- *
- * Sources of randomness from the environment include inter-keyboard
- * timings, inter-interrupt timings from some interrupts, and other
- * events which are both (a) non-deterministic and (b) hard for an
- * outside observer to measure.  Randomness from these sources are
- * added to an "entropy pool", which is mixed using a CRC-like function.
- * This is not cryptographically strong, but it is adequate assuming
- * the randomness is not chosen maliciously, and it is fast enough that
- * the overhead of doing it on every interrupt is very reasonable.
- * As random bytes are mixed into the entropy pool, the routines keep
- * an *estimate* of how many bits of randomness have been stored into
- * the random number generator's internal state.
- *
- * When random bytes are desired, they are obtained by taking the BLAKE2s
- * hash of the contents of the "entropy pool".  The BLAKE2s hash avoids
- * exposing the internal state of the entropy pool.  It is believed to
- * be computationally infeasible to derive any useful information
- * about the input of BLAKE2s from its output.  Even if it is possible to
- * analyze BLAKE2s in some clever way, as long as the amount of data
- * returned from the generator is less than the inherent entropy in
- * the pool, the output data is totally unpredictable.  For this
- * reason, the routine decreases its internal estimate of how many
- * bits of "true randomness" are contained in the entropy pool as it
- * outputs random numbers.
- *
- * If this estimate goes to zero, the routine can still generate
- * random numbers; however, an attacker may (at least in theory) be
- * able to infer the future output of the generator from prior
- * outputs.  This requires successful cryptanalysis of BLAKE2s, which is
- * not believed to be feasible, but there is a remote possibility.
- * Nonetheless, these numbers should be useful for the vast majority
- * of purposes.
- *
  * Exported interfaces ---- output
  * ===============================
  *
@@ -298,23 +243,6 @@
  *
  *	mknod /dev/random c 1 8
  *	mknod /dev/urandom c 1 9
- *
- * Acknowledgements:
- * =================
- *
- * Ideas for constructing this random number generator were derived
- * from Pretty Good Privacy's random number generator, and from private
- * discussions with Phil Karn.  Colin Plumb provided a faster random
- * number generator, which speed up the mixing function of the entropy
- * pool, taken from PGPfone.  Dale Worley has also contributed many
- * useful ideas and suggestions to improve this driver.
- *
- * Any flaws in the design are solely my responsibility, and should
- * not be attributed to the Phil, Colin, or any of authors of PGP.
- *
- * Further background information on this topic may be obtained from
- * RFC 1750, "Randomness Recommendations for Security", by Donald
- * Eastlake, Steve Crocker, and Jeff Schiller.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -358,79 +286,15 @@
 
 /* #define ADD_INTERRUPT_BENCH */
 
-/*
- * If the entropy count falls under this number of bits, then we
- * should wake up processes which are selecting or polling on write
- * access to /dev/random.
- */
-static int random_write_wakeup_bits = 28 * (1 << 5);
-
-/*
- * Originally, we used a primitive polynomial of degree .poolwords
- * over GF(2).  The taps for various sizes are defined below.  They
- * were chosen to be evenly spaced except for the last tap, which is 1
- * to get the twisting happening as fast as possible.
- *
- * For the purposes of better mixing, we use the CRC-32 polynomial as
- * well to make a (modified) twisted Generalized Feedback Shift
- * Register.  (See M. Matsumoto & Y. Kurita, 1992.  Twisted GFSR
- * generators.  ACM Transactions on Modeling and Computer Simulation
- * 2(3):179-194.  Also see M. Matsumoto & Y. Kurita, 1994.  Twisted
- * GFSR generators II.  ACM Transactions on Modeling and Computer
- * Simulation 4:254-266)
- *
- * Thanks to Colin Plumb for suggesting this.
- *
- * The mixing operation is much less sensitive than the output hash,
- * where we use BLAKE2s.  All that we want of mixing operation is that
- * it be a good non-cryptographic hash; i.e. it not produce collisions
- * when fed "random" data of the sort we expect to see.  As long as
- * the pool state differs for different inputs, we have preserved the
- * input entropy and done a good job.  The fact that an intelligent
- * attacker can construct inputs that will produce controlled
- * alterations to the pool's state is not important because we don't
- * consider such inputs to contribute any randomness.  The only
- * property we need with respect to them is that the attacker can't
- * increase his/her knowledge of the pool's state.  Since all
- * additions are reversible (knowing the final state and the input,
- * you can reconstruct the initial state), if an attacker has any
- * uncertainty about the initial state, he/she can only shuffle that
- * uncertainty about, but never cause any collisions (which would
- * decrease the uncertainty).
- *
- * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and
- * Videau in their paper, "The Linux Pseudorandom Number Generator
- * Revisited" (see: http://eprint.iacr.org/2012/251.pdf).  In their
- * paper, they point out that we are not using a true Twisted GFSR,
- * since Matsumoto & Kurita used a trinomial feedback polynomial (that
- * is, with only three taps, instead of the six that we are using).
- * As a result, the resulting polynomial is neither primitive nor
- * irreducible, and hence does not have a maximal period over
- * GF(2**32).  They suggest a slight change to the generator
- * polynomial which improves the resulting TGFSR polynomial to be
- * irreducible, which we have made here.
- */
 enum poolinfo {
-	POOL_WORDS = 128,
-	POOL_WORDMASK = POOL_WORDS - 1,
-	POOL_BYTES = POOL_WORDS * sizeof(u32),
-	POOL_BITS = POOL_BYTES * 8,
+	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
 	POOL_BITSHIFT = ilog2(POOL_BITS),
 
 	/* To allow fractional bits to be tracked, the entropy_count field is
 	 * denominated in units of 1/8th bits. */
 	POOL_ENTROPY_SHIFT = 3,
 #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
-	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
-
-	/* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
-	POOL_TAP1 = 104,
-	POOL_TAP2 = 76,
-	POOL_TAP3 = 51,
-	POOL_TAP4 = 25,
-	POOL_TAP5 = 1,
-
-	EXTRACT_SIZE = BLAKE2S_HASH_SIZE / 2
+	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT
 };
 
 /*
@@ -438,6 +302,12 @@ enum poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static struct fasync_struct *fasync;
+/*
+ * If the entropy count falls under this number of bits, then we
+ * should wake up processes which are selecting or polling on write
+ * access to /dev/random.
+ */
+static int random_write_wakeup_bits = POOL_BITS * 3 / 4;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
@@ -493,73 +363,31 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
  *
  **********************************************************************/
 
-static u32 input_pool_data[POOL_WORDS] __latent_entropy;
-
 static struct {
+	struct blake2s_state hash;
 	spinlock_t lock;
-	u16 add_ptr;
-	u16 input_rotate;
 	int entropy_count;
 } input_pool = {
+	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
+		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
+		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
+	.hash.outlen = BLAKE2S_HASH_SIZE,
 	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 };
 
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min);
-static ssize_t _extract_entropy(void *buf, size_t nbytes);
+static bool extract_entropy(void *buf, size_t nbytes, int min);
+static void _extract_entropy(void *buf, size_t nbytes);
 
 static void crng_reseed(struct crng_state *crng, bool use_input_pool);
 
-static const u32 twist_table[8] = {
-	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
-	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
-
 /*
  * This function adds bytes into the entropy "pool".  It does not
  * update the entropy estimate.  The caller should call
  * credit_entropy_bits if this is appropriate.
- *
- * The pool is stirred with a primitive polynomial of the appropriate
- * degree, and then twisted.  We twist by three bits at a time because
- * it's cheap to do so and helps slightly in the expected case where
- * the entropy is concentrated in the low-order bits.
  */
 static void _mix_pool_bytes(const void *in, int nbytes)
 {
-	unsigned long i;
-	int input_rotate;
-	const u8 *bytes = in;
-	u32 w;
-
-	input_rotate = input_pool.input_rotate;
-	i = input_pool.add_ptr;
-
-	/* mix one byte at a time to simplify size handling and churn faster */
-	while (nbytes--) {
-		w = rol32(*bytes++, input_rotate);
-		i = (i - 1) & POOL_WORDMASK;
-
-		/* XOR in the various taps */
-		w ^= input_pool_data[i];
-		w ^= input_pool_data[(i + POOL_TAP1) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP2) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP3) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP4) & POOL_WORDMASK];
-		w ^= input_pool_data[(i + POOL_TAP5) & POOL_WORDMASK];
-
-		/* Mix the result back in with a twist */
-		input_pool_data[i] = (w >> 3) ^ twist_table[w & 7];
-
-		/*
-		 * Normally, we add 7 bits of rotation to the pool.
-		 * At the beginning of the pool, add an extra 7 bits
-		 * rotation, so that successive passes spread the
-		 * input bits across the pool evenly.
-		 */
-		input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
-	}
-
-	input_pool.input_rotate = input_rotate;
-	input_pool.add_ptr = i;
+	blake2s_update(&input_pool.hash, in, nbytes);
 }
 
 static void __mix_pool_bytes(const void *in, int nbytes)
@@ -953,15 +781,14 @@ static int crng_slow_load(const u8 *cp, size_t len)
 static void crng_reseed(struct crng_state *crng, bool use_input_pool)
 {
 	unsigned long flags;
-	int i, num;
+	int i;
 	union {
 		u8 block[CHACHA_BLOCK_SIZE];
 		u32 key[8];
 	} buf;
 
 	if (use_input_pool) {
-		num = extract_entropy(&buf, 32, 16);
-		if (num == 0)
+		if (!extract_entropy(&buf, 32, 16))
 			return;
 	} else {
 		_extract_crng(&primary_crng, buf.block);
@@ -1329,74 +1156,48 @@ retry:
 }
 
 /*
- * This function does the actual extraction for extract_entropy.
- *
- * Note: we assume that .poolwords is a multiple of 16 words.
+ * This is an HKDF-like construction for using the hashed collected entropy
+ * as a PRF key, that's then expanded block-by-block.
  */
-static void extract_buf(u8 *out)
+static void _extract_entropy(void *buf, size_t nbytes)
 {
-	struct blake2s_state state __aligned(__alignof__(unsigned long));
-	u8 hash[BLAKE2S_HASH_SIZE];
-	unsigned long *salt;
 	unsigned long flags;
-
-	blake2s_init(&state, sizeof(hash));
-
-	/*
-	 * If we have an architectural hardware random number
-	 * generator, use it for BLAKE2's salt & personal fields.
-	 */
-	for (salt = (unsigned long *)&state.h[4];
-	     salt < (unsigned long *)&state.h[8]; ++salt) {
-		unsigned long v;
-		if (!arch_get_random_long(&v))
-			break;
-		*salt ^= v;
+	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
+	struct {
+		unsigned long rdrand[32 / sizeof(long)];
+		size_t counter;
+	} block;
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) {
+		if (!arch_get_random_long(&block.rdrand[i]))
+			block.rdrand[i] = random_get_entropy();
 	}
 
-	/* Generate a hash across the pool */
 	spin_lock_irqsave(&input_pool.lock, flags);
-	blake2s_update(&state, (const u8 *)input_pool_data, POOL_BYTES);
-	blake2s_final(&state, hash); /* final zeros out state */
 
-	/*
-	 * We mix the hash back into the pool to prevent backtracking
-	 * attacks (where the attacker knows the state of the pool
-	 * plus the current outputs, and attempts to find previous
-	 * outputs), unless the hash function can be inverted. By
-	 * mixing at least a hash worth of hash data back, we make
-	 * brute-forcing the feedback as hard as brute-forcing the
-	 * hash.
-	 */
-	__mix_pool_bytes(hash, sizeof(hash));
-	spin_unlock_irqrestore(&input_pool.lock, flags);
+	/* seed = HASHPRF(last_key, entropy_input) */
+	blake2s_final(&input_pool.hash, seed);
 
-	/* Note that EXTRACT_SIZE is half of hash size here, because above
-	 * we've dumped the full length back into mixer. By reducing the
-	 * amount that we emit, we retain a level of forward secrecy.
-	 */
-	memcpy(out, hash, EXTRACT_SIZE);
-	memzero_explicit(hash, sizeof(hash));
-}
+	/* next_key = HASHPRF(seed, RDRAND || 0) */
+	block.counter = 0;
+	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
+	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
 
-static ssize_t _extract_entropy(void *buf, size_t nbytes)
-{
-	ssize_t ret = 0, i;
-	u8 tmp[EXTRACT_SIZE];
+	spin_unlock_irqrestore(&input_pool.lock, flags);
+	memzero_explicit(next_key, sizeof(next_key));
 
 	while (nbytes) {
-		extract_buf(tmp);
-		i = min_t(int, nbytes, EXTRACT_SIZE);
-		memcpy(buf, tmp, i);
+		i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
+		/* output = HASHPRF(seed, RDRAND || ++counter) */
+		++block.counter;
+		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
 		nbytes -= i;
 		buf += i;
-		ret += i;
 	}
 
-	/* Wipe data just returned from memory */
-	memzero_explicit(tmp, sizeof(tmp));
-
-	return ret;
+	memzero_explicit(seed, sizeof(seed));
+	memzero_explicit(&block, sizeof(block));
 }
 
 /*
@@ -1404,13 +1205,18 @@ static ssize_t _extract_entropy(void *buf, size_t nbytes)
  * returns it in a buffer.
  *
  * The min parameter specifies the minimum amount we can pull before
- * failing to avoid races that defeat catastrophic reseeding.
+ * failing to avoid races that defeat catastrophic reseeding. If we
+ * have less than min entropy available, we return false and buf is
+ * not filled.
  */
-static ssize_t extract_entropy(void *buf, size_t nbytes, int min)
+static bool extract_entropy(void *buf, size_t nbytes, int min)
 {
 	trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_);
-	nbytes = account(nbytes, min);
-	return _extract_entropy(buf, nbytes);
+	if (account(nbytes, min)) {
+		_extract_entropy(buf, nbytes);
+		return true;
+	}
+	return false;
 }
 
 #define warn_unseeded_randomness(previous) \
@@ -1674,7 +1480,7 @@ static void __init init_std_data(void)
 	unsigned long rv;
 
 	mix_pool_bytes(&now, sizeof(now));
-	for (i = POOL_BYTES; i > 0; i -= sizeof(rv)) {
+	for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) {
 		if (!arch_get_random_seed_long(&rv) &&
 		    !arch_get_random_long(&rv))
 			rv = random_get_entropy();
-- 
GitLab


From 9c07f57869e90140080cfc282cc628d123e27704 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 2 Feb 2022 13:30:03 +0100
Subject: [PATCH 0523/1586] random: simplify entropy debiting

Our pool is 256 bits, and we only ever use all of it or don't use it at
all, which is decided by whether or not it has at least 128 bits in it.
So we can drastically simplify the accounting and cmpxchg loop to do
exactly this.  While we're at it, we move the minimum bit size into a
constant so it can be shared between the two places where it matters.

The reason we want any of this is for the case in which an attacker has
compromised the current state, and then bruteforces small amounts of
entropy added to it. By demanding a particular minimum amount of entropy
be present before reseeding, we make that bruteforcing difficult.

Note that this rationale no longer includes anything about /dev/random
blocking at the right moment, since /dev/random no longer blocks (except
for at ~boot), but rather uses the crng. In a former life, /dev/random
was different and therefore required a more nuanced account(), but this
is no longer.

Behaviorally, nothing changes here. This is just a simplification of
the code.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c         | 91 ++++++++---------------------------
 include/trace/events/random.h | 30 +++---------
 2 files changed, 27 insertions(+), 94 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 882f78829a24c..9014fd54da647 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -289,12 +289,14 @@
 enum poolinfo {
 	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
 	POOL_BITSHIFT = ilog2(POOL_BITS),
+	POOL_MIN_BITS = POOL_BITS / 2,
 
 	/* To allow fractional bits to be tracked, the entropy_count field is
 	 * denominated in units of 1/8th bits. */
 	POOL_ENTROPY_SHIFT = 3,
 #define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
-	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT
+	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
+	POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT
 };
 
 /*
@@ -375,8 +377,7 @@ static struct {
 	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 };
 
-static bool extract_entropy(void *buf, size_t nbytes, int min);
-static void _extract_entropy(void *buf, size_t nbytes);
+static void extract_entropy(void *buf, size_t nbytes);
 
 static void crng_reseed(struct crng_state *crng, bool use_input_pool);
 
@@ -467,7 +468,7 @@ static void process_random_ready_list(void)
  */
 static void credit_entropy_bits(int nbits)
 {
-	int entropy_count, entropy_bits, orig;
+	int entropy_count, orig;
 	int nfrac = nbits << POOL_ENTROPY_SHIFT;
 
 	/* Ensure that the multiplication can avoid being 64 bits wide. */
@@ -527,8 +528,7 @@ retry:
 
 	trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_);
 
-	entropy_bits = entropy_count >> POOL_ENTROPY_SHIFT;
-	if (crng_init < 2 && entropy_bits >= 128)
+	if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS)
 		crng_reseed(&primary_crng, true);
 }
 
@@ -618,7 +618,7 @@ static void crng_initialize_secondary(struct crng_state *crng)
 
 static void __init crng_initialize_primary(void)
 {
-	_extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
+	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
 	if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) {
 		invalidate_batched_entropy();
 		numa_crng_init();
@@ -788,8 +788,17 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool)
 	} buf;
 
 	if (use_input_pool) {
-		if (!extract_entropy(&buf, 32, 16))
-			return;
+		int entropy_count;
+		do {
+			entropy_count = READ_ONCE(input_pool.entropy_count);
+			if (entropy_count < POOL_MIN_FRACBITS)
+				return;
+		} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+		extract_entropy(buf.key, sizeof(buf.key));
+		if (random_write_wakeup_bits) {
+			wake_up_interruptible(&random_write_wait);
+			kill_fasync(&fasync, SIGIO, POLL_OUT);
+		}
 	} else {
 		_extract_crng(&primary_crng, buf.block);
 		_crng_backtrack_protect(&primary_crng, buf.block,
@@ -1114,52 +1123,11 @@ EXPORT_SYMBOL_GPL(add_disk_randomness);
  *
  *********************************************************************/
 
-/*
- * This function decides how many bytes to actually take from the
- * given pool, and also debits the entropy count accordingly.
- */
-static size_t account(size_t nbytes, int min)
-{
-	int entropy_count, orig;
-	size_t ibytes, nfrac;
-
-	BUG_ON(input_pool.entropy_count > POOL_FRACBITS);
-
-	/* Can we pull enough? */
-retry:
-	entropy_count = orig = READ_ONCE(input_pool.entropy_count);
-	if (WARN_ON(entropy_count < 0)) {
-		pr_warn("negative entropy count: count %d\n", entropy_count);
-		entropy_count = 0;
-	}
-
-	/* never pull more than available */
-	ibytes = min_t(size_t, nbytes, entropy_count >> (POOL_ENTROPY_SHIFT + 3));
-	if (ibytes < min)
-		ibytes = 0;
-	nfrac = ibytes << (POOL_ENTROPY_SHIFT + 3);
-	if ((size_t)entropy_count > nfrac)
-		entropy_count -= nfrac;
-	else
-		entropy_count = 0;
-
-	if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
-		goto retry;
-
-	trace_debit_entropy(8 * ibytes);
-	if (ibytes && POOL_ENTROPY_BITS() < random_write_wakeup_bits) {
-		wake_up_interruptible(&random_write_wait);
-		kill_fasync(&fasync, SIGIO, POLL_OUT);
-	}
-
-	return ibytes;
-}
-
 /*
  * This is an HKDF-like construction for using the hashed collected entropy
  * as a PRF key, that's then expanded block-by-block.
  */
-static void _extract_entropy(void *buf, size_t nbytes)
+static void extract_entropy(void *buf, size_t nbytes)
 {
 	unsigned long flags;
 	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
@@ -1169,6 +1137,8 @@ static void _extract_entropy(void *buf, size_t nbytes)
 	} block;
 	size_t i;
 
+	trace_extract_entropy(nbytes, POOL_ENTROPY_BITS());
+
 	for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) {
 		if (!arch_get_random_long(&block.rdrand[i]))
 			block.rdrand[i] = random_get_entropy();
@@ -1200,25 +1170,6 @@ static void _extract_entropy(void *buf, size_t nbytes)
 	memzero_explicit(&block, sizeof(block));
 }
 
-/*
- * This function extracts randomness from the "entropy pool", and
- * returns it in a buffer.
- *
- * The min parameter specifies the minimum amount we can pull before
- * failing to avoid races that defeat catastrophic reseeding. If we
- * have less than min entropy available, we return false and buf is
- * not filled.
- */
-static bool extract_entropy(void *buf, size_t nbytes, int min)
-{
-	trace_extract_entropy(nbytes, POOL_ENTROPY_BITS(), _RET_IP_);
-	if (account(nbytes, min)) {
-		_extract_entropy(buf, nbytes);
-		return true;
-	}
-	return false;
-}
-
 #define warn_unseeded_randomness(previous) \
 	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
 
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index a2d9aa16a5d7a..ad149aeaf42c5 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -79,22 +79,6 @@ TRACE_EVENT(credit_entropy_bits,
 		  __entry->bits, __entry->entropy_count, (void *)__entry->IP)
 );
 
-TRACE_EVENT(debit_entropy,
-	TP_PROTO(int debit_bits),
-
-	TP_ARGS( debit_bits),
-
-	TP_STRUCT__entry(
-		__field(	  int,	debit_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->debit_bits	= debit_bits;
-	),
-
-	TP_printk("input pool: debit_bits %d", __entry->debit_bits)
-);
-
 TRACE_EVENT(add_input_randomness,
 	TP_PROTO(int input_bits),
 
@@ -161,31 +145,29 @@ DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
 );
 
 DECLARE_EVENT_CLASS(random__extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
+	TP_PROTO(int nbytes, int entropy_count),
 
-	TP_ARGS(nbytes, entropy_count, IP),
+	TP_ARGS(nbytes, entropy_count),
 
 	TP_STRUCT__entry(
 		__field(	  int,	nbytes			)
 		__field(	  int,	entropy_count		)
-		__field(unsigned long,	IP			)
 	),
 
 	TP_fast_assign(
 		__entry->nbytes		= nbytes;
 		__entry->entropy_count	= entropy_count;
-		__entry->IP		= IP;
 	),
 
-	TP_printk("input pool: nbytes %d entropy_count %d caller %pS",
-		  __entry->nbytes, __entry->entropy_count, (void *)__entry->IP)
+	TP_printk("input pool: nbytes %d entropy_count %d",
+		  __entry->nbytes, __entry->entropy_count)
 );
 
 
 DEFINE_EVENT(random__extract_entropy, extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count, unsigned long IP),
+	TP_PROTO(int nbytes, int entropy_count),
 
-	TP_ARGS(nbytes, entropy_count, IP)
+	TP_ARGS(nbytes, entropy_count)
 );
 
 TRACE_EVENT(urandom_read,
-- 
GitLab


From c570449094844527577c5c914140222cb1893e3f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 3 Feb 2022 13:28:06 +0100
Subject: [PATCH 0524/1586] random: use linear min-entropy accumulation
 crediting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

30e37ec516ae ("random: account for entropy loss due to overwrites")
assumed that adding new entropy to the LFSR pool probabilistically
cancelled out old entropy there, so entropy was credited asymptotically,
approximating Shannon entropy of independent sources (rather than a
stronger min-entropy notion) using 1/8th fractional bits and replacing
a constant 2-2/√𝑒 term (~0.786938) with 3/4 (0.75) to slightly
underestimate it. This wasn't superb, but it was perhaps better than
nothing, so that's what was done. Which entropy specifically was being
cancelled out and how much precisely each time is hard to tell, though
as I showed with the attack code in my previous commit, a motivated
adversary with sufficient information can actually cancel out
everything.

Since we're no longer using an LFSR for entropy accumulation, this
probabilistic cancellation is no longer relevant. Rather, we're now
using a computational hash function as the accumulator and we've
switched to working in the random oracle model, from which we can now
revisit the question of min-entropy accumulation, which is done in
detail in <https://eprint.iacr.org/2019/198>.

Consider a long input bit string that is built by concatenating various
smaller independent input bit strings. Each one of these inputs has a
designated min-entropy, which is what we're passing to
credit_entropy_bits(h). When we pass the concatenation of these to a
random oracle, it means that an adversary trying to receive back the
same reply as us would need to become certain about each part of the
concatenated bit string we passed in, which means becoming certain about
all of those h values. That means we can estimate the accumulation by
simply adding up the h values in calls to credit_entropy_bits(h);
there's no probabilistic cancellation at play like there was said to be
for the LFSR. Incidentally, this is also what other entropy accumulators
based on computational hash functions do as well.

So this commit replaces credit_entropy_bits(h) with essentially `total =
min(POOL_BITS, total + h)`, done with a cmpxchg loop as before.

What if we're wrong and the above is nonsense? It's not, but let's
assume we don't want the actual _behavior_ of the code to change much.
Currently that behavior is not extracting from the input pool until it
has 128 bits of entropy in it. With the old algorithm, we'd hit that
magic 128 number after roughly 256 calls to credit_entropy_bits(1). So,
we can retain more or less the old behavior by waiting to extract from
the input pool until it hits 256 bits of entropy using the new code. For
people concerned about this change, it means that there's not that much
practical behavioral change. And for folks actually trying to model
the behavior rigorously, it means that we have an even higher margin
against attacks.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 114 ++++++++----------------------------------
 1 file changed, 20 insertions(+), 94 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9014fd54da647..20538e9b1a2cf 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -286,17 +286,9 @@
 
 /* #define ADD_INTERRUPT_BENCH */
 
-enum poolinfo {
+enum {
 	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
-	POOL_BITSHIFT = ilog2(POOL_BITS),
-	POOL_MIN_BITS = POOL_BITS / 2,
-
-	/* To allow fractional bits to be tracked, the entropy_count field is
-	 * denominated in units of 1/8th bits. */
-	POOL_ENTROPY_SHIFT = 3,
-#define POOL_ENTROPY_BITS() (input_pool.entropy_count >> POOL_ENTROPY_SHIFT)
-	POOL_FRACBITS = POOL_BITS << POOL_ENTROPY_SHIFT,
-	POOL_MIN_FRACBITS = POOL_MIN_BITS << POOL_ENTROPY_SHIFT
+	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
 };
 
 /*
@@ -309,7 +301,7 @@ static struct fasync_struct *fasync;
  * should wake up processes which are selecting or polling on write
  * access to /dev/random.
  */
-static int random_write_wakeup_bits = POOL_BITS * 3 / 4;
+static int random_write_wakeup_bits = POOL_MIN_BITS;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
@@ -469,66 +461,18 @@ static void process_random_ready_list(void)
 static void credit_entropy_bits(int nbits)
 {
 	int entropy_count, orig;
-	int nfrac = nbits << POOL_ENTROPY_SHIFT;
-
-	/* Ensure that the multiplication can avoid being 64 bits wide. */
-	BUILD_BUG_ON(2 * (POOL_ENTROPY_SHIFT + POOL_BITSHIFT) > 31);
 
 	if (!nbits)
 		return;
 
-retry:
-	entropy_count = orig = READ_ONCE(input_pool.entropy_count);
-	if (nfrac < 0) {
-		/* Debit */
-		entropy_count += nfrac;
-	} else {
-		/*
-		 * Credit: we have to account for the possibility of
-		 * overwriting already present entropy.	 Even in the
-		 * ideal case of pure Shannon entropy, new contributions
-		 * approach the full value asymptotically:
-		 *
-		 * entropy <- entropy + (pool_size - entropy) *
-		 *	(1 - exp(-add_entropy/pool_size))
-		 *
-		 * For add_entropy <= pool_size/2 then
-		 * (1 - exp(-add_entropy/pool_size)) >=
-		 *    (add_entropy/pool_size)*0.7869...
-		 * so we can approximate the exponential with
-		 * 3/4*add_entropy/pool_size and still be on the
-		 * safe side by adding at most pool_size/2 at a time.
-		 *
-		 * The use of pool_size-2 in the while statement is to
-		 * prevent rounding artifacts from making the loop
-		 * arbitrarily long; this limits the loop to log2(pool_size)*2
-		 * turns no matter how large nbits is.
-		 */
-		int pnfrac = nfrac;
-		const int s = POOL_BITSHIFT + POOL_ENTROPY_SHIFT + 2;
-		/* The +2 corresponds to the /4 in the denominator */
-
-		do {
-			unsigned int anfrac = min(pnfrac, POOL_FRACBITS / 2);
-			unsigned int add =
-				((POOL_FRACBITS - entropy_count) * anfrac * 3) >> s;
-
-			entropy_count += add;
-			pnfrac -= anfrac;
-		} while (unlikely(entropy_count < POOL_FRACBITS - 2 && pnfrac));
-	}
-
-	if (WARN_ON(entropy_count < 0)) {
-		pr_warn("negative entropy/overflow: count %d\n", entropy_count);
-		entropy_count = 0;
-	} else if (entropy_count > POOL_FRACBITS)
-		entropy_count = POOL_FRACBITS;
-	if (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig)
-		goto retry;
+	do {
+		orig = READ_ONCE(input_pool.entropy_count);
+		entropy_count = min(POOL_BITS, orig + nbits);
+	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
 
-	trace_credit_entropy_bits(nbits, entropy_count >> POOL_ENTROPY_SHIFT, _RET_IP_);
+	trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_);
 
-	if (crng_init < 2 && entropy_count >= POOL_MIN_FRACBITS)
+	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
 		crng_reseed(&primary_crng, true);
 }
 
@@ -791,7 +735,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool)
 		int entropy_count;
 		do {
 			entropy_count = READ_ONCE(input_pool.entropy_count);
-			if (entropy_count < POOL_MIN_FRACBITS)
+			if (entropy_count < POOL_MIN_BITS)
 				return;
 		} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
 		extract_entropy(buf.key, sizeof(buf.key));
@@ -1014,7 +958,7 @@ void add_input_randomness(unsigned int type, unsigned int code,
 	last_value = value;
 	add_timer_randomness(&input_timer_state,
 			     (type << 4) ^ code ^ (code >> 4) ^ value);
-	trace_add_input_randomness(POOL_ENTROPY_BITS());
+	trace_add_input_randomness(input_pool.entropy_count);
 }
 EXPORT_SYMBOL_GPL(add_input_randomness);
 
@@ -1112,7 +1056,7 @@ void add_disk_randomness(struct gendisk *disk)
 		return;
 	/* first major is 1, so we get >= 0x200 here */
 	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
-	trace_add_disk_randomness(disk_devt(disk), POOL_ENTROPY_BITS());
+	trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count);
 }
 EXPORT_SYMBOL_GPL(add_disk_randomness);
 #endif
@@ -1137,7 +1081,7 @@ static void extract_entropy(void *buf, size_t nbytes)
 	} block;
 	size_t i;
 
-	trace_extract_entropy(nbytes, POOL_ENTROPY_BITS());
+	trace_extract_entropy(nbytes, input_pool.entropy_count);
 
 	for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) {
 		if (!arch_get_random_long(&block.rdrand[i]))
@@ -1486,9 +1430,9 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
 {
 	int ret;
 
-	nbytes = min_t(size_t, nbytes, INT_MAX >> (POOL_ENTROPY_SHIFT + 3));
+	nbytes = min_t(size_t, nbytes, INT_MAX >> 6);
 	ret = extract_crng_user(buf, nbytes);
-	trace_urandom_read(8 * nbytes, 0, POOL_ENTROPY_BITS());
+	trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count);
 	return ret;
 }
 
@@ -1527,7 +1471,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 	mask = 0;
 	if (crng_ready())
 		mask |= EPOLLIN | EPOLLRDNORM;
-	if (POOL_ENTROPY_BITS() < random_write_wakeup_bits)
+	if (input_pool.entropy_count < random_write_wakeup_bits)
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	return mask;
 }
@@ -1582,8 +1526,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 	case RNDGETENTCNT:
 		/* inherently racy, no point locking */
-		ent_count = POOL_ENTROPY_BITS();
-		if (put_user(ent_count, p))
+		if (put_user(input_pool.entropy_count, p))
 			return -EFAULT;
 		return 0;
 	case RNDADDTOENTCNT:
@@ -1734,23 +1677,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
-/*
- * Return entropy available scaled to integral bits
- */
-static int proc_do_entropy(struct ctl_table *table, int write, void *buffer,
-			   size_t *lenp, loff_t *ppos)
-{
-	struct ctl_table fake_table;
-	int entropy_count;
-
-	entropy_count = *(int *)table->data >> POOL_ENTROPY_SHIFT;
-
-	fake_table.data = &entropy_count;
-	fake_table.maxlen = sizeof(entropy_count);
-
-	return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
-}
-
 static int sysctl_poolsize = POOL_BITS;
 static struct ctl_table random_table[] = {
 	{
@@ -1762,10 +1688,10 @@ static struct ctl_table random_table[] = {
 	},
 	{
 		.procname	= "entropy_avail",
+		.data		= &input_pool.entropy_count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_do_entropy,
-		.data		= &input_pool.entropy_count,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "write_wakeup_threshold",
@@ -1972,7 +1898,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 	 */
 	wait_event_interruptible_timeout(random_write_wait,
 			!system_wq || kthread_should_stop() ||
-			POOL_ENTROPY_BITS() <= random_write_wakeup_bits,
+			input_pool.entropy_count <= random_write_wakeup_bits,
 			CRNG_RESEED_INTERVAL);
 	mix_pool_bytes(buffer, count);
 	credit_entropy_bits(entropy);
-- 
GitLab


From 489c7fc44b5740d377e8cfdbf0851036e493af00 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 5 Feb 2022 14:00:58 +0100
Subject: [PATCH 0525/1586] random: always wake up entropy writers after
 extraction

Now that POOL_BITS == POOL_MIN_BITS, we must unconditionally wake up
entropy writers after every extraction. Therefore there's no point of
write_wakeup_threshold, so we can move it to the dustbin of unused
compatibility sysctls. While we're at it, we can fix a small comparison
where we were waking up after <= min rather than < min.

Cc: Theodore Ts'o <tytso@mit.edu>
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 Documentation/admin-guide/sysctl/kernel.rst |  7 +++--
 drivers/char/random.c                       | 33 +++++++--------------
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d359bcfadd39a..d3c6d9a501a9c 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1029,14 +1029,17 @@ This is a directory, with the following entries:
 * ``poolsize``: the entropy pool size, in bits;
 
 * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
-  number of seconds between urandom pool reseeding).
+  number of seconds between urandom pool reseeding). This file is
+  writable for compatibility purposes, but writing to it has no effect
+  on any RNG behavior.
 
 * ``uuid``: a UUID generated every time this is retrieved (this can
   thus be used to generate UUIDs at will);
 
 * ``write_wakeup_threshold``: when the entropy count drops below this
   (as a number of bits), processes waiting to write to ``/dev/random``
-  are woken up.
+  are woken up. This file is writable for compatibility purposes, but
+  writing to it has no effect on any RNG behavior.
 
 If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
 defined, these additional entries are present:
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 20538e9b1a2cf..3b30e764eeef4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -296,12 +296,6 @@ enum {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static struct fasync_struct *fasync;
-/*
- * If the entropy count falls under this number of bits, then we
- * should wake up processes which are selecting or polling on write
- * access to /dev/random.
- */
-static int random_write_wakeup_bits = POOL_MIN_BITS;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
@@ -739,10 +733,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool)
 				return;
 		} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
 		extract_entropy(buf.key, sizeof(buf.key));
-		if (random_write_wakeup_bits) {
-			wake_up_interruptible(&random_write_wait);
-			kill_fasync(&fasync, SIGIO, POLL_OUT);
-		}
+		wake_up_interruptible(&random_write_wait);
+		kill_fasync(&fasync, SIGIO, POLL_OUT);
 	} else {
 		_extract_crng(&primary_crng, buf.block);
 		_crng_backtrack_protect(&primary_crng, buf.block,
@@ -1471,7 +1463,7 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 	mask = 0;
 	if (crng_ready())
 		mask |= EPOLLIN | EPOLLRDNORM;
-	if (input_pool.entropy_count < random_write_wakeup_bits)
+	if (input_pool.entropy_count < POOL_MIN_BITS)
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	return mask;
 }
@@ -1556,7 +1548,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		 */
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) {
+		if (xchg(&input_pool.entropy_count, 0)) {
 			wake_up_interruptible(&random_write_wait);
 			kill_fasync(&fasync, SIGIO, POLL_OUT);
 		}
@@ -1636,9 +1628,9 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
 
 #include <linux/sysctl.h>
 
-static int min_write_thresh;
-static int max_write_thresh = POOL_BITS;
 static int random_min_urandom_seed = 60;
+static int random_write_wakeup_bits = POOL_MIN_BITS;
+static int sysctl_poolsize = POOL_BITS;
 static char sysctl_bootid[16];
 
 /*
@@ -1677,7 +1669,6 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
-static int sysctl_poolsize = POOL_BITS;
 static struct ctl_table random_table[] = {
 	{
 		.procname	= "poolsize",
@@ -1698,9 +1689,7 @@ static struct ctl_table random_table[] = {
 		.data		= &random_write_wakeup_bits,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_write_thresh,
-		.extra2		= &max_write_thresh,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "urandom_min_reseed_secs",
@@ -1892,13 +1881,13 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 	}
 
 	/* Throttle writing if we're above the trickle threshold.
-	 * We'll be woken up again once below random_write_wakeup_thresh,
-	 * when the calling thread is about to terminate, or once
-	 * CRNG_RESEED_INTERVAL has lapsed.
+	 * We'll be woken up again once below POOL_MIN_BITS, when
+	 * the calling thread is about to terminate, or once
+	 * CRNG_RESEED_INTERVAL has elapsed.
 	 */
 	wait_event_interruptible_timeout(random_write_wait,
 			!system_wq || kthread_should_stop() ||
-			input_pool.entropy_count <= random_write_wakeup_bits,
+			input_pool.entropy_count < POOL_MIN_BITS,
 			CRNG_RESEED_INTERVAL);
 	mix_pool_bytes(buffer, count);
 	credit_entropy_bits(entropy);
-- 
GitLab


From a49c010e61e1938be851f5e49ac219d49b704103 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Feb 2022 01:45:53 +0100
Subject: [PATCH 0526/1586] random: make credit_entropy_bits() always safe

This is called from various hwgenerator drivers, so rather than having
one "safe" version for userspace and one "unsafe" version for the
kernel, just make everything safe; the checks are cheap and sensible to
have anyway.

Reported-by: Sultan Alsawaf <sultan@kerneltoast.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3b30e764eeef4..92aca0a539e0b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -447,18 +447,15 @@ static void process_random_ready_list(void)
 	spin_unlock_irqrestore(&random_ready_list_lock, flags);
 }
 
-/*
- * Credit (or debit) the entropy store with n bits of entropy.
- * Use credit_entropy_bits_safe() if the value comes from userspace
- * or otherwise should be checked for extreme values.
- */
 static void credit_entropy_bits(int nbits)
 {
 	int entropy_count, orig;
 
-	if (!nbits)
+	if (nbits <= 0)
 		return;
 
+	nbits = min(nbits, POOL_BITS);
+
 	do {
 		orig = READ_ONCE(input_pool.entropy_count);
 		entropy_count = min(POOL_BITS, orig + nbits);
@@ -470,18 +467,6 @@ static void credit_entropy_bits(int nbits)
 		crng_reseed(&primary_crng, true);
 }
 
-static int credit_entropy_bits_safe(int nbits)
-{
-	if (nbits < 0)
-		return -EINVAL;
-
-	/* Cap the value to avoid overflows */
-	nbits = min(nbits, POOL_BITS);
-
-	credit_entropy_bits(nbits);
-	return 0;
-}
-
 /*********************************************************************
  *
  * CRNG using CHACHA20
@@ -1526,7 +1511,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (get_user(ent_count, p))
 			return -EFAULT;
-		return credit_entropy_bits_safe(ent_count);
+		if (ent_count < 0)
+			return -EINVAL;
+		credit_entropy_bits(ent_count);
+		return 0;
 	case RNDADDENTROPY:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -1539,7 +1527,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		retval = write_pool((const char __user *)p, size);
 		if (retval < 0)
 			return retval;
-		return credit_entropy_bits_safe(ent_count);
+		credit_entropy_bits(ent_count);
+		return 0;
 	case RNDZAPENTCNT:
 	case RNDCLEARPOOL:
 		/*
-- 
GitLab


From 5d58ea3a31cc98b9fa563f6921d3d043bf0103d1 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 4 Feb 2022 14:17:33 -0800
Subject: [PATCH 0527/1586] random: remove use_input_pool parameter from
 crng_reseed()

The primary_crng is always reseeded from the input_pool, while the NUMA
crngs are always reseeded from the primary_crng.  Remove the redundant
'use_input_pool' parameter from crng_reseed() and just directly check
whether the crng is the primary_crng.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 92aca0a539e0b..968c415d1f45a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -365,7 +365,7 @@ static struct {
 
 static void extract_entropy(void *buf, size_t nbytes);
 
-static void crng_reseed(struct crng_state *crng, bool use_input_pool);
+static void crng_reseed(struct crng_state *crng);
 
 /*
  * This function adds bytes into the entropy "pool".  It does not
@@ -464,7 +464,7 @@ static void credit_entropy_bits(int nbits)
 	trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_);
 
 	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
-		crng_reseed(&primary_crng, true);
+		crng_reseed(&primary_crng);
 }
 
 /*********************************************************************
@@ -701,7 +701,7 @@ static int crng_slow_load(const u8 *cp, size_t len)
 	return 1;
 }
 
-static void crng_reseed(struct crng_state *crng, bool use_input_pool)
+static void crng_reseed(struct crng_state *crng)
 {
 	unsigned long flags;
 	int i;
@@ -710,7 +710,7 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool)
 		u32 key[8];
 	} buf;
 
-	if (use_input_pool) {
+	if (crng == &primary_crng) {
 		int entropy_count;
 		do {
 			entropy_count = READ_ONCE(input_pool.entropy_count);
@@ -748,7 +748,7 @@ static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
 		init_time = READ_ONCE(crng->init_time);
 		if (time_after(READ_ONCE(crng_global_init_time), init_time) ||
 		    time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
-			crng_reseed(crng, crng == &primary_crng);
+			crng_reseed(crng);
 	}
 	spin_lock_irqsave(&crng->lock, flags);
 	chacha20_block(&crng->state[0], out);
@@ -1547,7 +1547,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (crng_init < 2)
 			return -ENODATA;
-		crng_reseed(&primary_crng, true);
+		crng_reseed(&primary_crng);
 		WRITE_ONCE(crng_global_init_time, jiffies - 1);
 		return 0;
 	default:
-- 
GitLab


From 77760fd7f7ae3dfd03668204e708d1568d75447d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 28 Jan 2022 23:29:45 +0100
Subject: [PATCH 0528/1586] random: remove batched entropy locking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than use spinlocks to protect batched entropy, we can instead
disable interrupts locally, since we're dealing with per-cpu data, and
manage resets with a basic generation counter. At the same time, we
can't quite do this on PREEMPT_RT, where we still want spinlocks-as-
mutexes semantics. So we use a local_lock_t, which provides the right
behavior for each. Because this is a per-cpu lock, that generation
counter is still doing the necessary CPU-to-CPU communication.

This should improve performance a bit. It will also fix the linked splat
that Jonathan received with a PROVE_RAW_LOCK_NESTING=y.

Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Suggested-by: Andy Lutomirski <luto@kernel.org>
Reported-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Tested-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Link: https://lore.kernel.org/lkml/YfMa0QgsjCVdRAvJ@latitude/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 968c415d1f45a..ceded1c4f73b4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1731,13 +1731,16 @@ static int __init random_sysctls_init(void)
 device_initcall(random_sysctls_init);
 #endif	/* CONFIG_SYSCTL */
 
+static atomic_t batch_generation = ATOMIC_INIT(0);
+
 struct batched_entropy {
 	union {
 		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
 		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
 	};
+	local_lock_t lock;
 	unsigned int position;
-	spinlock_t batch_lock;
+	int generation;
 };
 
 /*
@@ -1749,7 +1752,7 @@ struct batched_entropy {
  * point prior.
  */
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock)
 };
 
 u64 get_random_u64(void)
@@ -1758,67 +1761,65 @@ u64 get_random_u64(void)
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
+	int next_gen;
 
 	warn_unseeded_randomness(&previous);
 
+	local_lock_irqsave(&batched_entropy_u64.lock, flags);
 	batch = raw_cpu_ptr(&batched_entropy_u64);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+
+	next_gen = atomic_read(&batch_generation);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 ||
+	    next_gen != batch->generation) {
 		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
+		batch->generation = next_gen;
 	}
+
 	ret = batch->entropy_u64[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u64);
 
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock)
 };
+
 u32 get_random_u32(void)
 {
 	u32 ret;
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
+	int next_gen;
 
 	warn_unseeded_randomness(&previous);
 
+	local_lock_irqsave(&batched_entropy_u32.lock, flags);
 	batch = raw_cpu_ptr(&batched_entropy_u32);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+
+	next_gen = atomic_read(&batch_generation);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 ||
+	    next_gen != batch->generation) {
 		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
+		batch->generation = next_gen;
 	}
+
 	ret = batch->entropy_u32[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u32);
 
 /* It's important to invalidate all potential batched entropy that might
  * be stored before the crng is initialized, which we can do lazily by
- * simply resetting the counter to zero so that it's re-extracted on the
- * next usage. */
+ * bumping the generation counter.
+ */
 static void invalidate_batched_entropy(void)
 {
-	int cpu;
-	unsigned long flags;
-
-	for_each_possible_cpu(cpu) {
-		struct batched_entropy *batched_entropy;
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
-		spin_lock_irqsave(&batched_entropy->batch_lock, flags);
-		batched_entropy->position = 0;
-		spin_unlock(&batched_entropy->batch_lock);
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
-		spin_lock(&batched_entropy->batch_lock);
-		batched_entropy->position = 0;
-		spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
-	}
+	atomic_inc(&batch_generation);
 }
 
 /**
-- 
GitLab


From 7c2fe2b32bf76441ff5b7a425b384e5f75aa530a Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 5 Feb 2022 11:34:57 +0100
Subject: [PATCH 0529/1586] random: fix locking in crng_fast_load()

crng_init is protected by primary_crng->lock, so keep holding that lock
when incrementing crng_init from 0 to 1 in crng_fast_load(). The call to
pr_notice() can wait until the lock is released; this code path cannot
be reached twice, as crng_fast_load() aborts early if crng_init > 0.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index ceded1c4f73b4..79cf577e5086b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -647,12 +647,13 @@ static size_t crng_fast_load(const u8 *cp, size_t len)
 		p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--; ret++;
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
 		invalidate_batched_entropy();
 		crng_init = 1;
-		pr_notice("fast init done\n");
 	}
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	if (crng_init == 1)
+		pr_notice("fast init done\n");
 	return ret;
 }
 
-- 
GitLab


From 28f425e573e906a4c15f8392cc2b1561ef448595 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Feb 2022 12:18:33 +0100
Subject: [PATCH 0530/1586] random: use RDSEED instead of RDRAND in entropy
 extraction

When /dev/random was directly connected with entropy extraction, without
any expansion stage, extract_buf() was called for every 10 bytes of data
read from /dev/random. For that reason, RDRAND was used rather than
RDSEED. At the same time, crng_reseed() was still only called every 5
minutes, so there RDSEED made sense.

Those olden days were also a time when the entropy collector did not use
a cryptographic hash function, which meant most bets were off in terms
of real preimage resistance. For that reason too it didn't matter
_that_ much whether RDSEED was mixed in before or after entropy
extraction; both choices were sort of bad.

But now we have a cryptographic hash function at work, and with that we
get real preimage resistance. We also now only call extract_entropy()
every 5 minutes, rather than every 10 bytes. This allows us to do two
important things.

First, we can switch to using RDSEED in extract_entropy(), as Dominik
suggested. Second, we can ensure that RDSEED input always goes into the
cryptographic hash function with other things before being used
directly. This eliminates a category of attacks in which the CPU knows
the current state of the crng and knows that we're going to xor RDSEED
into it, and so it computes a malicious RDSEED. By going through our
hash function, it would require the CPU to compute a preimage on the
fly, which isn't going to happen.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Suggested-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 79cf577e5086b..41f42d8875138 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -727,13 +727,8 @@ static void crng_reseed(struct crng_state *crng)
 					CHACHA_KEY_SIZE);
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	for (i = 0; i < 8; i++) {
-		unsigned long rv;
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv))
-			rv = random_get_entropy();
-		crng->state[i + 4] ^= buf.key[i] ^ rv;
-	}
+	for (i = 0; i < 8; i++)
+		crng->state[i + 4] ^= buf.key[i];
 	memzero_explicit(&buf, sizeof(buf));
 	WRITE_ONCE(crng->init_time, jiffies);
 	spin_unlock_irqrestore(&crng->lock, flags);
@@ -1054,16 +1049,17 @@ static void extract_entropy(void *buf, size_t nbytes)
 	unsigned long flags;
 	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
 	struct {
-		unsigned long rdrand[32 / sizeof(long)];
+		unsigned long rdseed[32 / sizeof(long)];
 		size_t counter;
 	} block;
 	size_t i;
 
 	trace_extract_entropy(nbytes, input_pool.entropy_count);
 
-	for (i = 0; i < ARRAY_SIZE(block.rdrand); ++i) {
-		if (!arch_get_random_long(&block.rdrand[i]))
-			block.rdrand[i] = random_get_entropy();
+	for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
+		if (!arch_get_random_seed_long(&block.rdseed[i]) &&
+		    !arch_get_random_long(&block.rdseed[i]))
+			block.rdseed[i] = random_get_entropy();
 	}
 
 	spin_lock_irqsave(&input_pool.lock, flags);
@@ -1071,7 +1067,7 @@ static void extract_entropy(void *buf, size_t nbytes)
 	/* seed = HASHPRF(last_key, entropy_input) */
 	blake2s_final(&input_pool.hash, seed);
 
-	/* next_key = HASHPRF(seed, RDRAND || 0) */
+	/* next_key = HASHPRF(seed, RDSEED || 0) */
 	block.counter = 0;
 	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
 	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
@@ -1081,7 +1077,7 @@ static void extract_entropy(void *buf, size_t nbytes)
 
 	while (nbytes) {
 		i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
-		/* output = HASHPRF(seed, RDRAND || ++counter) */
+		/* output = HASHPRF(seed, RDSEED || ++counter) */
 		++block.counter;
 		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
 		nbytes -= i;
-- 
GitLab


From a9412d510ab9a9ba411fea612903631d2e1f1601 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 6 Feb 2022 23:51:41 +0100
Subject: [PATCH 0531/1586] random: get rid of secondary crngs

As the comment said, this is indeed a "hack". Since it was introduced,
it's been a constant state machine nightmare, with lots of subtle early
boot issues and a wildly complex set of machinery to keep everything in
sync. Rather than continuing to play whack-a-mole with this approach,
this commit simply removes it entirely. This commit is preparation for
"random: use simpler fast key erasure flow on per-cpu keys" in this
series, which introduces a simpler (and faster) mechanism to accomplish
the same thing.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 225 ++++++++++--------------------------------
 1 file changed, 53 insertions(+), 172 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 41f42d8875138..be4922e09cc57 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -323,14 +323,11 @@ static struct crng_state primary_crng = {
  * its value (from 0->1->2).
  */
 static int crng_init = 0;
-static bool crng_need_final_init = false;
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
-static unsigned long crng_global_init_time = 0;
 #define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE)
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]);
-static void _crng_backtrack_protect(struct crng_state *crng,
-				    u8 tmp[CHACHA_BLOCK_SIZE], int used);
+static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]);
+static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -365,7 +362,7 @@ static struct {
 
 static void extract_entropy(void *buf, size_t nbytes);
 
-static void crng_reseed(struct crng_state *crng);
+static void crng_reseed(void);
 
 /*
  * This function adds bytes into the entropy "pool".  It does not
@@ -464,7 +461,7 @@ static void credit_entropy_bits(int nbits)
 	trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_);
 
 	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
-		crng_reseed(&primary_crng);
+		crng_reseed();
 }
 
 /*********************************************************************
@@ -477,16 +474,7 @@ static void credit_entropy_bits(int nbits)
 
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 
-/*
- * Hack to deal with crazy userspace progams when they are all trying
- * to access /dev/urandom in parallel.  The programs are almost
- * certainly doing something terribly wrong, but we'll work around
- * their brain damage.
- */
-static struct crng_state **crng_node_pool __read_mostly;
-
 static void invalidate_batched_entropy(void);
-static void numa_crng_init(void);
 
 static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
 static int __init parse_trust_cpu(char *arg)
@@ -495,24 +483,6 @@ static int __init parse_trust_cpu(char *arg)
 }
 early_param("random.trust_cpu", parse_trust_cpu);
 
-static bool crng_init_try_arch(struct crng_state *crng)
-{
-	int i;
-	bool arch_init = true;
-	unsigned long rv;
-
-	for (i = 4; i < 16; i++) {
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv)) {
-			rv = random_get_entropy();
-			arch_init = false;
-		}
-		crng->state[i] ^= rv;
-	}
-
-	return arch_init;
-}
-
 static bool __init crng_init_try_arch_early(void)
 {
 	int i;
@@ -531,100 +501,17 @@ static bool __init crng_init_try_arch_early(void)
 	return arch_init;
 }
 
-static void crng_initialize_secondary(struct crng_state *crng)
-{
-	chacha_init_consts(crng->state);
-	_get_random_bytes(&crng->state[4], sizeof(u32) * 12);
-	crng_init_try_arch(crng);
-	crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
-}
-
-static void __init crng_initialize_primary(void)
+static void __init crng_initialize(void)
 {
 	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
 	if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) {
 		invalidate_batched_entropy();
-		numa_crng_init();
 		crng_init = 2;
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
 	primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 }
 
-static void crng_finalize_init(void)
-{
-	if (!system_wq) {
-		/* We can't call numa_crng_init until we have workqueues,
-		 * so mark this for processing later. */
-		crng_need_final_init = true;
-		return;
-	}
-
-	invalidate_batched_entropy();
-	numa_crng_init();
-	crng_init = 2;
-	crng_need_final_init = false;
-	process_random_ready_list();
-	wake_up_interruptible(&crng_init_wait);
-	kill_fasync(&fasync, SIGIO, POLL_IN);
-	pr_notice("crng init done\n");
-	if (unseeded_warning.missed) {
-		pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
-			  unseeded_warning.missed);
-		unseeded_warning.missed = 0;
-	}
-	if (urandom_warning.missed) {
-		pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
-			  urandom_warning.missed);
-		urandom_warning.missed = 0;
-	}
-}
-
-static void do_numa_crng_init(struct work_struct *work)
-{
-	int i;
-	struct crng_state *crng;
-	struct crng_state **pool;
-
-	pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL | __GFP_NOFAIL);
-	for_each_online_node(i) {
-		crng = kmalloc_node(sizeof(struct crng_state),
-				    GFP_KERNEL | __GFP_NOFAIL, i);
-		spin_lock_init(&crng->lock);
-		crng_initialize_secondary(crng);
-		pool[i] = crng;
-	}
-	/* pairs with READ_ONCE() in select_crng() */
-	if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) {
-		for_each_node(i)
-			kfree(pool[i]);
-		kfree(pool);
-	}
-}
-
-static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init);
-
-static void numa_crng_init(void)
-{
-	if (IS_ENABLED(CONFIG_NUMA))
-		schedule_work(&numa_crng_init_work);
-}
-
-static struct crng_state *select_crng(void)
-{
-	if (IS_ENABLED(CONFIG_NUMA)) {
-		struct crng_state **pool;
-		int nid = numa_node_id();
-
-		/* pairs with cmpxchg_release() in do_numa_crng_init() */
-		pool = READ_ONCE(crng_node_pool);
-		if (pool && pool[nid])
-			return pool[nid];
-	}
-
-	return &primary_crng;
-}
-
 /*
  * crng_fast_load() can be called by code in the interrupt service
  * path.  So we can't afford to dilly-dally. Returns the number of
@@ -702,68 +589,71 @@ static int crng_slow_load(const u8 *cp, size_t len)
 	return 1;
 }
 
-static void crng_reseed(struct crng_state *crng)
+static void crng_reseed(void)
 {
 	unsigned long flags;
-	int i;
+	int i, entropy_count;
 	union {
 		u8 block[CHACHA_BLOCK_SIZE];
 		u32 key[8];
 	} buf;
 
-	if (crng == &primary_crng) {
-		int entropy_count;
-		do {
-			entropy_count = READ_ONCE(input_pool.entropy_count);
-			if (entropy_count < POOL_MIN_BITS)
-				return;
-		} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
-		extract_entropy(buf.key, sizeof(buf.key));
-		wake_up_interruptible(&random_write_wait);
-		kill_fasync(&fasync, SIGIO, POLL_OUT);
-	} else {
-		_extract_crng(&primary_crng, buf.block);
-		_crng_backtrack_protect(&primary_crng, buf.block,
-					CHACHA_KEY_SIZE);
-	}
-	spin_lock_irqsave(&crng->lock, flags);
+	do {
+		entropy_count = READ_ONCE(input_pool.entropy_count);
+		if (entropy_count < POOL_MIN_BITS)
+			return;
+	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+	extract_entropy(buf.key, sizeof(buf.key));
+	wake_up_interruptible(&random_write_wait);
+	kill_fasync(&fasync, SIGIO, POLL_OUT);
+
+	spin_lock_irqsave(&primary_crng.lock, flags);
 	for (i = 0; i < 8; i++)
-		crng->state[i + 4] ^= buf.key[i];
+		primary_crng.state[i + 4] ^= buf.key[i];
 	memzero_explicit(&buf, sizeof(buf));
-	WRITE_ONCE(crng->init_time, jiffies);
-	spin_unlock_irqrestore(&crng->lock, flags);
-	if (crng == &primary_crng && crng_init < 2)
-		crng_finalize_init();
+	WRITE_ONCE(primary_crng.init_time, jiffies);
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	if (crng_init < 2) {
+		invalidate_batched_entropy();
+		crng_init = 2;
+		process_random_ready_list();
+		wake_up_interruptible(&crng_init_wait);
+		kill_fasync(&fasync, SIGIO, POLL_IN);
+		pr_notice("crng init done\n");
+		if (unseeded_warning.missed) {
+			pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n",
+				  unseeded_warning.missed);
+			unseeded_warning.missed = 0;
+		}
+		if (urandom_warning.missed) {
+			pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
+				  urandom_warning.missed);
+			urandom_warning.missed = 0;
+		}
+	}
 }
 
-static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
+static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
 {
 	unsigned long flags, init_time;
 
 	if (crng_ready()) {
-		init_time = READ_ONCE(crng->init_time);
-		if (time_after(READ_ONCE(crng_global_init_time), init_time) ||
-		    time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
-			crng_reseed(crng);
+		init_time = READ_ONCE(primary_crng.init_time);
+		if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
+			crng_reseed();
 	}
-	spin_lock_irqsave(&crng->lock, flags);
-	chacha20_block(&crng->state[0], out);
-	if (crng->state[12] == 0)
-		crng->state[13]++;
-	spin_unlock_irqrestore(&crng->lock, flags);
-}
-
-static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
-{
-	_extract_crng(select_crng(), out);
+	spin_lock_irqsave(&primary_crng.lock, flags);
+	chacha20_block(&primary_crng.state[0], out);
+	if (primary_crng.state[12] == 0)
+		primary_crng.state[13]++;
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 /*
  * Use the leftover bytes from the CRNG block output (if there is
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
-static void _crng_backtrack_protect(struct crng_state *crng,
-				    u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
 {
 	unsigned long flags;
 	u32 *s, *d;
@@ -774,17 +664,12 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 		extract_crng(tmp);
 		used = 0;
 	}
-	spin_lock_irqsave(&crng->lock, flags);
+	spin_lock_irqsave(&primary_crng.lock, flags);
 	s = (u32 *)&tmp[used];
-	d = &crng->state[4];
+	d = &primary_crng.state[4];
 	for (i = 0; i < 8; i++)
 		*d++ ^= *s++;
-	spin_unlock_irqrestore(&crng->lock, flags);
-}
-
-static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
-{
-	_crng_backtrack_protect(select_crng(), tmp, used);
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
@@ -1371,10 +1256,7 @@ static void __init init_std_data(void)
 int __init rand_initialize(void)
 {
 	init_std_data();
-	if (crng_need_final_init)
-		crng_finalize_init();
-	crng_initialize_primary();
-	crng_global_init_time = jiffies;
+	crng_initialize();
 	if (ratelimit_disable) {
 		urandom_warning.interval = 0;
 		unseeded_warning.interval = 0;
@@ -1544,8 +1426,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (crng_init < 2)
 			return -ENODATA;
-		crng_reseed(&primary_crng);
-		WRITE_ONCE(crng_global_init_time, jiffies - 1);
+		crng_reseed();
 		return 0;
 	default:
 		return -EINVAL;
-- 
GitLab


From 8566417221fcec51346ec164e920dacb979c6b5f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Feb 2022 12:40:14 +0100
Subject: [PATCH 0532/1586] random: inline leaves of rand_initialize()

This is a preparatory commit for the following one. We simply inline the
various functions that rand_initialize() calls that have no other
callers. The compiler was doing this anyway before. Doing this will
allow us to reorganize this after. We can then move the trust_cpu and
parse_trust_cpu definitions a bit closer to where they're actually used,
which makes the code easier to read.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 90 ++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 57 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index be4922e09cc57..a55a58b9807ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -476,42 +476,6 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 
 static void invalidate_batched_entropy(void);
 
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static int __init parse_trust_cpu(char *arg)
-{
-	return kstrtobool(arg, &trust_cpu);
-}
-early_param("random.trust_cpu", parse_trust_cpu);
-
-static bool __init crng_init_try_arch_early(void)
-{
-	int i;
-	bool arch_init = true;
-	unsigned long rv;
-
-	for (i = 4; i < 16; i++) {
-		if (!arch_get_random_seed_long_early(&rv) &&
-		    !arch_get_random_long_early(&rv)) {
-			rv = random_get_entropy();
-			arch_init = false;
-		}
-		primary_crng.state[i] ^= rv;
-	}
-
-	return arch_init;
-}
-
-static void __init crng_initialize(void)
-{
-	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
-	if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) {
-		invalidate_batched_entropy();
-		crng_init = 2;
-		pr_notice("crng init done (trusting CPU's manufacturer)\n");
-	}
-	primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
-}
-
 /*
  * crng_fast_load() can be called by code in the interrupt service
  * path.  So we can't afford to dilly-dally. Returns the number of
@@ -1220,17 +1184,28 @@ int __must_check get_random_bytes_arch(void *buf, int nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes_arch);
 
+static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static int __init parse_trust_cpu(char *arg)
+{
+	return kstrtobool(arg, &trust_cpu);
+}
+early_param("random.trust_cpu", parse_trust_cpu);
+
 /*
- * init_std_data - initialize pool with system data
- *
- * This function clears the pool's entropy count and mixes some system
- * data into the pool to prepare it for use. The pool is not cleared
- * as that can only decrease the entropy in the pool.
+ * Note that setup_arch() may call add_device_randomness()
+ * long before we get here. This allows seeding of the pools
+ * with some platform dependent data very early in the boot
+ * process. But it limits our options here. We must use
+ * statically allocated structures that already have all
+ * initializations complete at compile time. We should also
+ * take care not to overwrite the precious per platform data
+ * we were given.
  */
-static void __init init_std_data(void)
+int __init rand_initialize(void)
 {
 	int i;
 	ktime_t now = ktime_get_real();
+	bool arch_init = true;
 	unsigned long rv;
 
 	mix_pool_bytes(&now, sizeof(now));
@@ -1241,22 +1216,23 @@ static void __init init_std_data(void)
 		mix_pool_bytes(&rv, sizeof(rv));
 	}
 	mix_pool_bytes(utsname(), sizeof(*(utsname())));
-}
 
-/*
- * Note that setup_arch() may call add_device_randomness()
- * long before we get here. This allows seeding of the pools
- * with some platform dependent data very early in the boot
- * process. But it limits our options here. We must use
- * statically allocated structures that already have all
- * initializations complete at compile time. We should also
- * take care not to overwrite the precious per platform data
- * we were given.
- */
-int __init rand_initialize(void)
-{
-	init_std_data();
-	crng_initialize();
+	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
+	for (i = 4; i < 16; i++) {
+		if (!arch_get_random_seed_long_early(&rv) &&
+		    !arch_get_random_long_early(&rv)) {
+			rv = random_get_entropy();
+			arch_init = false;
+		}
+		primary_crng.state[i] ^= rv;
+	}
+	if (arch_init && trust_cpu && crng_init < 2) {
+		invalidate_batched_entropy();
+		crng_init = 2;
+		pr_notice("crng init done (trusting CPU's manufacturer)\n");
+	}
+	primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+
 	if (ratelimit_disable) {
 		urandom_warning.interval = 0;
 		unseeded_warning.interval = 0;
-- 
GitLab


From a02cf3d0dd77244fd5333ac48d78871de459ae6d Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Feb 2022 12:44:28 +0100
Subject: [PATCH 0533/1586] random: ensure early RDSEED goes through mixer on
 init

Continuing the reasoning of "random: use RDSEED instead of RDRAND in
entropy extraction" from this series, at init time we also don't want to
be xoring RDSEED directly into the crng. Instead it's safer to put it
into our entropy collector and then re-extract it, so that it goes
through a hash function with preimage resistance. As a matter of hygiene,
we also order these now so that the RDSEED byte are hashed in first,
followed by the bytes that are likely more predictable (e.g. utsname()).

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a55a58b9807ae..21a067cf5b4c4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1208,24 +1208,18 @@ int __init rand_initialize(void)
 	bool arch_init = true;
 	unsigned long rv;
 
-	mix_pool_bytes(&now, sizeof(now));
 	for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) {
-		if (!arch_get_random_seed_long(&rv) &&
-		    !arch_get_random_long(&rv))
-			rv = random_get_entropy();
-		mix_pool_bytes(&rv, sizeof(rv));
-	}
-	mix_pool_bytes(utsname(), sizeof(*(utsname())));
-
-	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
-	for (i = 4; i < 16; i++) {
 		if (!arch_get_random_seed_long_early(&rv) &&
 		    !arch_get_random_long_early(&rv)) {
 			rv = random_get_entropy();
 			arch_init = false;
 		}
-		primary_crng.state[i] ^= rv;
+		mix_pool_bytes(&rv, sizeof(rv));
 	}
+	mix_pool_bytes(&now, sizeof(now));
+	mix_pool_bytes(utsname(), sizeof(*(utsname())));
+
+	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
 	if (arch_init && trust_cpu && crng_init < 2) {
 		invalidate_batched_entropy();
 		crng_init = 2;
-- 
GitLab


From 91c2afca290ed3034841c8c8532e69ed9e16cf34 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Feb 2022 13:00:11 +0100
Subject: [PATCH 0534/1586] random: do not xor RDRAND when writing into
 /dev/random

Continuing the reasoning of "random: ensure early RDSEED goes through
mixer on init", we don't want RDRAND interacting with anything without
going through the mixer function, as a backdoored CPU could presumably
cancel out data during an xor, which it'd have a harder time doing when
being forced through a cryptographic hash function. There's actually no
need at all to be calling RDRAND in write_pool(), because before we
extract from the pool, we always do so with 32 bytes of RDSEED hashed in
at that stage. Xoring at this stage is needless and introduces a minor
liability.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 21a067cf5b4c4..d31b0b3afe2ed 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1305,25 +1305,15 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 static int write_pool(const char __user *buffer, size_t count)
 {
 	size_t bytes;
-	u32 t, buf[16];
+	u8 buf[BLAKE2S_BLOCK_SIZE];
 	const char __user *p = buffer;
 
 	while (count > 0) {
-		int b, i = 0;
-
 		bytes = min(count, sizeof(buf));
-		if (copy_from_user(&buf, p, bytes))
+		if (copy_from_user(buf, p, bytes))
 			return -EFAULT;
-
-		for (b = bytes; b > 0; b -= sizeof(u32), i++) {
-			if (!arch_get_random_int(&t))
-				break;
-			buf[i] ^= t;
-		}
-
 		count -= bytes;
 		p += bytes;
-
 		mix_pool_bytes(buf, bytes);
 		cond_resched();
 	}
-- 
GitLab


From 80e4e1f472889f31a4dcaea3a4eb7a565296f1f3 Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Sun, 20 Feb 2022 19:19:10 +1300
Subject: [PATCH 0535/1586] irqchip/gic-v3: Use dsb(ishst) to order writes with
 ICC_SGI1R_EL1 accesses

A dsb(ishst) barrier should be enough to order previous writes with
the system register generating the SGI, as we only need to guarantee
the visibility of data to other CPUs in the inner shareable domain
before we send the SGI.

A micro-benchmark is written to verify the performance impact on
kunpeng920 machine with 2 sockets, each socket has 2 dies, and
each die has 24 CPUs, so totally the system has 2 * 2 * 24 = 96
CPUs. ~2% performance improvement can be seen by this benchmark.

The code of benchmark module:

 #include <linux/module.h>
 #include <linux/timekeeping.h>

 volatile int data0 ____cacheline_aligned;
 volatile int data1 ____cacheline_aligned;
 volatile int data2 ____cacheline_aligned;
 volatile int data3 ____cacheline_aligned;
 volatile int data4 ____cacheline_aligned;
 volatile int data5 ____cacheline_aligned;
 volatile int data6 ____cacheline_aligned;

 static void ipi_latency_func(void *val)
 {
 }

 static int __init ipi_latency_init(void)
 {
 	ktime_t stime, etime, delta;
 	int cpu, i;
 	int start = smp_processor_id();

 	stime = ktime_get();
 	for ( i = 0; i < 1000; i++)
 		for (cpu = 0; cpu < 96; cpu++) {
 			data0 = data1 = data2 = data3 = data4 = data5 = data6 = cpu;
 			smp_call_function_single(cpu, ipi_latency_func, NULL, 1);
 		}
 	etime = ktime_get();

 	delta = ktime_sub(etime, stime);

 	printk("%s ipi from cpu%d to cpu0-95 delta of 1000times:%lld\n",
 			__func__, start, delta);

 	return 0;
 }
 module_init(ipi_latency_init);

 static void ipi_latency_exit(void)
 {
 }
 module_exit(ipi_latency_exit);

 MODULE_DESCRIPTION("IPI benchmark");
 MODULE_LICENSE("GPL");

run the below commands 10 times on both Vanilla and the kernel with this
patch:
 # taskset -c 0 insmod test.ko
 # rmmod test

The result on vanilla:
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126757449
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126784249
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126177703
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127022281
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126184883
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127374585
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:125778089
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126974441
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127357625
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126228184

The result on the kernel with this patch:
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124467401
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123474209
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123558497
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122993951
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122984223
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123323609
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124507583
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123386963
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123340664
 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123285324

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
[maz: tidied up commit message]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220220061910.6155-1-21cnbao@gmail.com
---
 drivers/irqchip/irq-gic-v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5e935d97207dc..0efe1a9a9f3b2 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
 	 * Ensure that stores to Normal memory are visible to the
 	 * other CPUs before issuing the IPI.
 	 */
-	wmb();
+	dsb(ishst);
 
 	for_each_cpu(cpu, mask) {
 		u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
-- 
GitLab


From a8ab8ef437d48173e166f1f44f202097e91bf81c Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 6 Feb 2022 06:58:03 -0800
Subject: [PATCH 0536/1586] ACPI: clean up double words in two comments

Remove the second 'on' and 'those'.

Signed-off-by: Tom Rix <trix@redhat.com>
[ rjw: Subject adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Kconfig    | 2 +-
 drivers/acpi/pci_link.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 273741dedfd20..1e34f846508fb 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -302,7 +302,7 @@ config ACPI_IPMI
 	help
 	  This driver enables the ACPI to access the BMC controller. And it
 	  uses the IPMI request/response message to communicate with BMC
-	  controller, which can be found on on the server.
+	  controller, which can be found on the server.
 
 	  To compile this driver as a module, choose M here:
 	  the module will be called as acpi_ipmi.
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index d54fb8e54671d..58647051c948a 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -185,7 +185,7 @@ static acpi_status acpi_pci_link_check_current(struct acpi_resource *resource,
 			if (!p || !p->interrupt_count) {
 				/*
 				 * IRQ descriptors may have no IRQ# bits set,
-				 * particularly those those w/ _STA disabled
+				 * particularly those w/ _STA disabled
 				 */
 				pr_debug("Blank _CRS IRQ resource\n");
 				return AE_OK;
-- 
GitLab


From c30c575db4858f0bbe5e315ff2e529c782f33a1f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 9 Feb 2022 01:56:35 +0100
Subject: [PATCH 0537/1586] random: absorb fast pool into input pool after fast
 load

During crng_init == 0, we never credit entropy in add_interrupt_
randomness(), but instead dump it directly into the primary_crng. That's
fine, except for the fact that we then wind up throwing away that
entropy later when we switch to extracting from the input pool and
xoring into (and later in this series overwriting) the primary_crng key.
The two other early init sites -- add_hwgenerator_randomness()'s use
crng_fast_load() and add_device_ randomness()'s use of crng_slow_load()
-- always additionally give their inputs to the input pool. But not
add_interrupt_randomness().

This commit fixes that shortcoming by calling mix_pool_bytes() after
crng_fast_load() in add_interrupt_randomness(). That's partially
verboten on PREEMPT_RT, where it implies taking spinlock_t from an IRQ
handler. But this also only happens during early boot and then never
again after that. Plus it's a trylock so it has the same considerations
as calling crng_fast_load(), which we're already using.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Suggested-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d31b0b3afe2ed..f3179c67010b9 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -850,6 +850,10 @@ void add_interrupt_randomness(int irq)
 		    crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
 			fast_pool->count = 0;
 			fast_pool->last = now;
+			if (spin_trylock(&input_pool.lock)) {
+				_mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
+				spin_unlock(&input_pool.lock);
+			}
 		}
 		return;
 	}
-- 
GitLab


From 186873c549df11b63e17062f863654e1501e1524 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 7 Feb 2022 15:08:49 +0100
Subject: [PATCH 0538/1586] random: use simpler fast key erasure flow on
 per-cpu keys
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than the clunky NUMA full ChaCha state system we had prior, this
commit is closer to the original "fast key erasure RNG" proposal from
<https://blog.cr.yp.to/20170723-random.html>, by simply treating ChaCha
keys on a per-cpu basis.

All entropy is extracted to a base crng key of 32 bytes. This base crng
has a birthdate and a generation counter. When we go to take bytes from
the crng, we first check if the birthdate is too old; if it is, we
reseed per usual. Then we start working on a per-cpu crng.

This per-cpu crng makes sure that it has the same generation counter as
the base crng. If it doesn't, it does fast key erasure with the base
crng key and uses the output as its new per-cpu key, and then updates
its local generation counter. Then, using this per-cpu state, we do
ordinary fast key erasure. Half of this first block is used to overwrite
the per-cpu crng key for the next call -- this is the fast key erasure
RNG idea -- and the other half, along with the ChaCha state, is returned
to the caller. If the caller desires more than this remaining half, it
can generate more ChaCha blocks, unlocked, using the now detached ChaCha
state that was just returned. Crypto-wise, this is more or less what we
were doing before, but this simply makes it more explicit and ensures
that we always have backtrack protection by not playing games with a
shared block counter.

The flow looks like this:

──extract()──► base_crng.key ◄──memcpy()───┐
                   │                       │
                   └──chacha()──────┬─► new_base_key
                                    └─► crngs[n].key ◄──memcpy()───┐
                                              │                    │
                                              └──chacha()───┬─► new_key
                                                            └─► random_bytes
                                                                      │
                                                                      └────►

There are a few hairy details around early init. Just as was done
before, prior to having gathered enough entropy, crng_fast_load() and
crng_slow_load() dump bytes directly into the base crng, and when we go
to take bytes from the crng, in that case, we're doing fast key erasure
with the base crng rather than the fast unlocked per-cpu crngs. This is
fine as that's only the state of affairs during very early boot; once
the crng initializes we never use these paths again.

In the process of all this, the APIs into the crng become a bit simpler:
we have get_random_bytes(buf, len) and get_random_bytes_user(buf, len),
which both do what you'd expect. All of the details of fast key erasure
and per-cpu selection happen only in a very short critical section of
crng_make_state(), which selects the right per-cpu key, does the fast
key erasure, and returns a local state to the caller's stack. So, we no
longer have a need for a separate backtrack function, as this happens
all at once here. The API then allows us to extend backtrack protection
to batched entropy without really having to do much at all.

The result is a bit simpler than before and has fewer foot guns. The
init time state machine also gets a lot simpler as we don't need to wait
for workqueues to come online and do deferred work. And the multi-core
performance should be increased significantly, by virtue of having hardly
any locking on the fast path.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 395 ++++++++++++++++++++++++------------------
 1 file changed, 229 insertions(+), 166 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index f3179c67010b9..538eaa469f5e4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -67,63 +67,19 @@
  * Exported interfaces ---- kernel output
  * --------------------------------------
  *
- * The primary kernel interface is
+ * The primary kernel interfaces are:
  *
  *	void get_random_bytes(void *buf, int nbytes);
- *
- * This interface will return the requested number of random bytes,
- * and place it in the requested buffer.  This is equivalent to a
- * read from /dev/urandom.
- *
- * For less critical applications, there are the functions:
- *
  *	u32 get_random_u32()
  *	u64 get_random_u64()
  *	unsigned int get_random_int()
  *	unsigned long get_random_long()
  *
- * These are produced by a cryptographic RNG seeded from get_random_bytes,
- * and so do not deplete the entropy pool as much.  These are recommended
- * for most in-kernel operations *if the result is going to be stored in
- * the kernel*.
- *
- * Specifically, the get_random_int() family do not attempt to do
- * "anti-backtracking".  If you capture the state of the kernel (e.g.
- * by snapshotting the VM), you can figure out previous get_random_int()
- * return values.  But if the value is stored in the kernel anyway,
- * this is not a problem.
- *
- * It *is* safe to expose get_random_int() output to attackers (e.g. as
- * network cookies); given outputs 1..n, it's not feasible to predict
- * outputs 0 or n+1.  The only concern is an attacker who breaks into
- * the kernel later; the get_random_int() engine is not reseeded as
- * often as the get_random_bytes() one.
- *
- * get_random_bytes() is needed for keys that need to stay secret after
- * they are erased from the kernel.  For example, any key that will
- * be wrapped and stored encrypted.  And session encryption keys: we'd
- * like to know that after the session is closed and the keys erased,
- * the plaintext is unrecoverable to someone who recorded the ciphertext.
- *
- * But for network ports/cookies, stack canaries, PRNG seeds, address
- * space layout randomization, session *authentication* keys, or other
- * applications where the sensitive data is stored in the kernel in
- * plaintext for as long as it's sensitive, the get_random_int() family
- * is just fine.
- *
- * Consider ASLR.  We want to keep the address space secret from an
- * outside attacker while the process is running, but once the address
- * space is torn down, it's of no use to an attacker any more.  And it's
- * stored in kernel data structures as long as it's alive, so worrying
- * about an attacker's ability to extrapolate it from the get_random_int()
- * CRNG is silly.
- *
- * Even some cryptographic keys are safe to generate with get_random_int().
- * In particular, keys for SipHash are generally fine.  Here, knowledge
- * of the key authorizes you to do something to a kernel object (inject
- * packets to a network connection, or flood a hash table), and the
- * key is stored with the object being protected.  Once it goes away,
- * we no longer care if anyone knows the key.
+ * These interfaces will return the requested number of random bytes
+ * into the given buffer or as a return value. This is equivalent to a
+ * read from /dev/urandom. The get_random_{u32,u64,int,long}() family
+ * of functions may be higher performance for one-off random integers,
+ * because they do a bit of buffering.
  *
  * prandom_u32()
  * -------------
@@ -300,20 +256,6 @@ static struct fasync_struct *fasync;
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
 
-struct crng_state {
-	u32 state[16];
-	unsigned long init_time;
-	spinlock_t lock;
-};
-
-static struct crng_state primary_crng = {
-	.lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
-	.state[0] = CHACHA_CONSTANT_EXPA,
-	.state[1] = CHACHA_CONSTANT_ND_3,
-	.state[2] = CHACHA_CONSTANT_2_BY,
-	.state[3] = CHACHA_CONSTANT_TE_K,
-};
-
 /*
  * crng_init =  0 --> Uninitialized
  *		1 --> Initialized
@@ -325,9 +267,6 @@ static struct crng_state primary_crng = {
 static int crng_init = 0;
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
-#define CRNG_INIT_CNT_THRESH (2 * CHACHA_KEY_SIZE)
-static void extract_crng(u8 out[CHACHA_BLOCK_SIZE]);
-static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -470,7 +409,30 @@ static void credit_entropy_bits(int nbits)
  *
  *********************************************************************/
 
-#define CRNG_RESEED_INTERVAL (300 * HZ)
+enum {
+	CRNG_RESEED_INTERVAL = 300 * HZ,
+	CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE
+};
+
+static struct {
+	u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
+	unsigned long birth;
+	unsigned long generation;
+	spinlock_t lock;
+} base_crng = {
+	.lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
+};
+
+struct crng {
+	u8 key[CHACHA_KEY_SIZE];
+	unsigned long generation;
+	local_lock_t lock;
+};
+
+static DEFINE_PER_CPU(struct crng, crngs) = {
+	.generation = ULONG_MAX,
+	.lock = INIT_LOCAL_LOCK(crngs.lock),
+};
 
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 
@@ -487,22 +449,22 @@ static size_t crng_fast_load(const u8 *cp, size_t len)
 	u8 *p;
 	size_t ret = 0;
 
-	if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+	if (!spin_trylock_irqsave(&base_crng.lock, flags))
 		return 0;
 	if (crng_init != 0) {
-		spin_unlock_irqrestore(&primary_crng.lock, flags);
+		spin_unlock_irqrestore(&base_crng.lock, flags);
 		return 0;
 	}
-	p = (u8 *)&primary_crng.state[4];
+	p = base_crng.key;
 	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp;
+		p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp;
 		cp++; crng_init_cnt++; len--; ret++;
 	}
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
 		invalidate_batched_entropy();
 		crng_init = 1;
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	spin_unlock_irqrestore(&base_crng.lock, flags);
 	if (crng_init == 1)
 		pr_notice("fast init done\n");
 	return ret;
@@ -527,14 +489,14 @@ static int crng_slow_load(const u8 *cp, size_t len)
 	unsigned long flags;
 	static u8 lfsr = 1;
 	u8 tmp;
-	unsigned int i, max = CHACHA_KEY_SIZE;
+	unsigned int i, max = sizeof(base_crng.key);
 	const u8 *src_buf = cp;
-	u8 *dest_buf = (u8 *)&primary_crng.state[4];
+	u8 *dest_buf = base_crng.key;
 
-	if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+	if (!spin_trylock_irqsave(&base_crng.lock, flags))
 		return 0;
 	if (crng_init != 0) {
-		spin_unlock_irqrestore(&primary_crng.lock, flags);
+		spin_unlock_irqrestore(&base_crng.lock, flags);
 		return 0;
 	}
 	if (len > max)
@@ -545,38 +507,50 @@ static int crng_slow_load(const u8 *cp, size_t len)
 		lfsr >>= 1;
 		if (tmp & 1)
 			lfsr ^= 0xE1;
-		tmp = dest_buf[i % CHACHA_KEY_SIZE];
-		dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr;
+		tmp = dest_buf[i % sizeof(base_crng.key)];
+		dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr;
 		lfsr += (tmp << 3) | (tmp >> 5);
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	spin_unlock_irqrestore(&base_crng.lock, flags);
 	return 1;
 }
 
 static void crng_reseed(void)
 {
 	unsigned long flags;
-	int i, entropy_count;
-	union {
-		u8 block[CHACHA_BLOCK_SIZE];
-		u32 key[8];
-	} buf;
+	int entropy_count;
+	unsigned long next_gen;
+	u8 key[CHACHA_KEY_SIZE];
 
+	/*
+	 * First we make sure we have POOL_MIN_BITS of entropy in the pool,
+	 * and then we drain all of it. Only then can we extract a new key.
+	 */
 	do {
 		entropy_count = READ_ONCE(input_pool.entropy_count);
 		if (entropy_count < POOL_MIN_BITS)
 			return;
 	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
-	extract_entropy(buf.key, sizeof(buf.key));
+	extract_entropy(key, sizeof(key));
 	wake_up_interruptible(&random_write_wait);
 	kill_fasync(&fasync, SIGIO, POLL_OUT);
 
-	spin_lock_irqsave(&primary_crng.lock, flags);
-	for (i = 0; i < 8; i++)
-		primary_crng.state[i + 4] ^= buf.key[i];
-	memzero_explicit(&buf, sizeof(buf));
-	WRITE_ONCE(primary_crng.init_time, jiffies);
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	/*
+	 * We copy the new key into the base_crng, overwriting the old one,
+	 * and update the generation counter. We avoid hitting ULONG_MAX,
+	 * because the per-cpu crngs are initialized to ULONG_MAX, so this
+	 * forces new CPUs that come online to always initialize.
+	 */
+	spin_lock_irqsave(&base_crng.lock, flags);
+	memcpy(base_crng.key, key, sizeof(base_crng.key));
+	next_gen = base_crng.generation + 1;
+	if (next_gen == ULONG_MAX)
+		++next_gen;
+	WRITE_ONCE(base_crng.generation, next_gen);
+	WRITE_ONCE(base_crng.birth, jiffies);
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+	memzero_explicit(key, sizeof(key));
+
 	if (crng_init < 2) {
 		invalidate_batched_entropy();
 		crng_init = 2;
@@ -597,77 +571,143 @@ static void crng_reseed(void)
 	}
 }
 
-static void extract_crng(u8 out[CHACHA_BLOCK_SIZE])
+/*
+ * The general form here is based on a "fast key erasure RNG" from
+ * <https://blog.cr.yp.to/20170723-random.html>. It generates a ChaCha
+ * block using the provided key, and then immediately overwites that
+ * key with half the block. It returns the resultant ChaCha state to the
+ * user, along with the second half of the block containing 32 bytes of
+ * random data that may be used; random_data_len may not be greater than
+ * 32.
+ */
+static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
+				  u32 chacha_state[CHACHA_STATE_WORDS],
+				  u8 *random_data, size_t random_data_len)
 {
-	unsigned long flags, init_time;
+	u8 first_block[CHACHA_BLOCK_SIZE];
 
-	if (crng_ready()) {
-		init_time = READ_ONCE(primary_crng.init_time);
-		if (time_after(jiffies, init_time + CRNG_RESEED_INTERVAL))
-			crng_reseed();
-	}
-	spin_lock_irqsave(&primary_crng.lock, flags);
-	chacha20_block(&primary_crng.state[0], out);
-	if (primary_crng.state[12] == 0)
-		primary_crng.state[13]++;
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	BUG_ON(random_data_len > 32);
+
+	chacha_init_consts(chacha_state);
+	memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
+	memset(&chacha_state[12], 0, sizeof(u32) * 4);
+	chacha20_block(chacha_state, first_block);
+
+	memcpy(key, first_block, CHACHA_KEY_SIZE);
+	memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
+	memzero_explicit(first_block, sizeof(first_block));
 }
 
 /*
- * Use the leftover bytes from the CRNG block output (if there is
- * enough) to mutate the CRNG key to provide backtracking protection.
+ * This function returns a ChaCha state that you may use for generating
+ * random data. It also returns up to 32 bytes on its own of random data
+ * that may be used; random_data_len may not be greater than 32.
  */
-static void crng_backtrack_protect(u8 tmp[CHACHA_BLOCK_SIZE], int used)
+static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
+			    u8 *random_data, size_t random_data_len)
 {
 	unsigned long flags;
-	u32 *s, *d;
-	int i;
+	struct crng *crng;
 
-	used = round_up(used, sizeof(u32));
-	if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) {
-		extract_crng(tmp);
-		used = 0;
+	BUG_ON(random_data_len > 32);
+
+	/*
+	 * For the fast path, we check whether we're ready, unlocked first, and
+	 * then re-check once locked later. In the case where we're really not
+	 * ready, we do fast key erasure with the base_crng directly, because
+	 * this is what crng_{fast,slow}_load mutate during early init.
+	 */
+	if (unlikely(!crng_ready())) {
+		bool ready;
+
+		spin_lock_irqsave(&base_crng.lock, flags);
+		ready = crng_ready();
+		if (!ready)
+			crng_fast_key_erasure(base_crng.key, chacha_state,
+					      random_data, random_data_len);
+		spin_unlock_irqrestore(&base_crng.lock, flags);
+		if (!ready)
+			return;
 	}
-	spin_lock_irqsave(&primary_crng.lock, flags);
-	s = (u32 *)&tmp[used];
-	d = &primary_crng.state[4];
-	for (i = 0; i < 8; i++)
-		*d++ ^= *s++;
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
+
+	/*
+	 * If the base_crng is more than 5 minutes old, we reseed, which
+	 * in turn bumps the generation counter that we check below.
+	 */
+	if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL)))
+		crng_reseed();
+
+	local_lock_irqsave(&crngs.lock, flags);
+	crng = raw_cpu_ptr(&crngs);
+
+	/*
+	 * If our per-cpu crng is older than the base_crng, then it means
+	 * somebody reseeded the base_crng. In that case, we do fast key
+	 * erasure on the base_crng, and use its output as the new key
+	 * for our per-cpu crng. This brings us up to date with base_crng.
+	 */
+	if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
+		spin_lock(&base_crng.lock);
+		crng_fast_key_erasure(base_crng.key, chacha_state,
+				      crng->key, sizeof(crng->key));
+		crng->generation = base_crng.generation;
+		spin_unlock(&base_crng.lock);
+	}
+
+	/*
+	 * Finally, when we've made it this far, our per-cpu crng has an up
+	 * to date key, and we can do fast key erasure with it to produce
+	 * some random data and a ChaCha state for the caller. All other
+	 * branches of this function are "unlikely", so most of the time we
+	 * should wind up here immediately.
+	 */
+	crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
+	local_unlock_irqrestore(&crngs.lock, flags);
 }
 
-static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
+static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
 {
-	ssize_t ret = 0, i = CHACHA_BLOCK_SIZE;
-	u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
-	int large_request = (nbytes > 256);
+	bool large_request = nbytes > 256;
+	ssize_t ret = 0, len;
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 output[CHACHA_BLOCK_SIZE];
+
+	if (!nbytes)
+		return 0;
+
+	len = min_t(ssize_t, 32, nbytes);
+	crng_make_state(chacha_state, output, len);
+
+	if (copy_to_user(buf, output, len))
+		return -EFAULT;
+	nbytes -= len;
+	buf += len;
+	ret += len;
 
 	while (nbytes) {
 		if (large_request && need_resched()) {
-			if (signal_pending(current)) {
-				if (ret == 0)
-					ret = -ERESTARTSYS;
+			if (signal_pending(current))
 				break;
-			}
 			schedule();
 		}
 
-		extract_crng(tmp);
-		i = min_t(int, nbytes, CHACHA_BLOCK_SIZE);
-		if (copy_to_user(buf, tmp, i)) {
+		chacha20_block(chacha_state, output);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
+
+		len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE);
+		if (copy_to_user(buf, output, len)) {
 			ret = -EFAULT;
 			break;
 		}
 
-		nbytes -= i;
-		buf += i;
-		ret += i;
+		nbytes -= len;
+		buf += len;
+		ret += len;
 	}
-	crng_backtrack_protect(tmp, i);
-
-	/* Wipe data just written to memory */
-	memzero_explicit(tmp, sizeof(tmp));
 
+	memzero_explicit(chacha_state, sizeof(chacha_state));
+	memzero_explicit(output, sizeof(output));
 	return ret;
 }
 
@@ -976,23 +1016,36 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4);
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 tmp[CHACHA_BLOCK_SIZE];
+	ssize_t len;
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
-	while (nbytes >= CHACHA_BLOCK_SIZE) {
-		extract_crng(buf);
-		buf += CHACHA_BLOCK_SIZE;
+	if (!nbytes)
+		return;
+
+	len = min_t(ssize_t, 32, nbytes);
+	crng_make_state(chacha_state, buf, len);
+	nbytes -= len;
+	buf += len;
+
+	while (nbytes) {
+		if (nbytes < CHACHA_BLOCK_SIZE) {
+			chacha20_block(chacha_state, tmp);
+			memcpy(buf, tmp, nbytes);
+			memzero_explicit(tmp, sizeof(tmp));
+			break;
+		}
+
+		chacha20_block(chacha_state, buf);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
 		nbytes -= CHACHA_BLOCK_SIZE;
+		buf += CHACHA_BLOCK_SIZE;
 	}
 
-	if (nbytes > 0) {
-		extract_crng(tmp);
-		memcpy(buf, tmp, nbytes);
-		crng_backtrack_protect(tmp, nbytes);
-	} else
-		crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE);
-	memzero_explicit(tmp, sizeof(tmp));
+	memzero_explicit(chacha_state, sizeof(chacha_state));
 }
 
 void get_random_bytes(void *buf, int nbytes)
@@ -1223,13 +1276,12 @@ int __init rand_initialize(void)
 	mix_pool_bytes(&now, sizeof(now));
 	mix_pool_bytes(utsname(), sizeof(*(utsname())));
 
-	extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
+	extract_entropy(base_crng.key, sizeof(base_crng.key));
 	if (arch_init && trust_cpu && crng_init < 2) {
 		invalidate_batched_entropy();
 		crng_init = 2;
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
-	primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 
 	if (ratelimit_disable) {
 		urandom_warning.interval = 0;
@@ -1261,7 +1313,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
 	int ret;
 
 	nbytes = min_t(size_t, nbytes, INT_MAX >> 6);
-	ret = extract_crng_user(buf, nbytes);
+	ret = get_random_bytes_user(buf, nbytes);
 	trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count);
 	return ret;
 }
@@ -1577,8 +1629,15 @@ static atomic_t batch_generation = ATOMIC_INIT(0);
 
 struct batched_entropy {
 	union {
-		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
-		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
+		/*
+		 * We make this 1.5x a ChaCha block, so that we get the
+		 * remaining 32 bytes from fast key erasure, plus one full
+		 * block from the detached ChaCha state. We can increase
+		 * the size of this later if needed so long as we keep the
+		 * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
+		 */
+		u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
+		u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
 	};
 	local_lock_t lock;
 	unsigned int position;
@@ -1587,14 +1646,13 @@ struct batched_entropy {
 
 /*
  * Get a random word for internal kernel use only. The quality of the random
- * number is good as /dev/urandom, but there is no backtrack protection, with
- * the goal of being quite fast and not depleting entropy. In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once at any
- * point prior.
+ * number is good as /dev/urandom. In order to ensure that the randomness
+ * provided by this function is okay, the function wait_for_random_bytes()
+ * should be called and return 0 at least once at any point prior.
  */
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock)
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
+	.position = UINT_MAX
 };
 
 u64 get_random_u64(void)
@@ -1611,21 +1669,24 @@ u64 get_random_u64(void)
 	batch = raw_cpu_ptr(&batched_entropy_u64);
 
 	next_gen = atomic_read(&batch_generation);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 ||
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
 	    next_gen != batch->generation) {
-		extract_crng((u8 *)batch->entropy_u64);
+		_get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
 		batch->position = 0;
 		batch->generation = next_gen;
 	}
 
-	ret = batch->entropy_u64[batch->position++];
+	ret = batch->entropy_u64[batch->position];
+	batch->entropy_u64[batch->position] = 0;
+	++batch->position;
 	local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u64);
 
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock)
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
+	.position = UINT_MAX
 };
 
 u32 get_random_u32(void)
@@ -1642,14 +1703,16 @@ u32 get_random_u32(void)
 	batch = raw_cpu_ptr(&batched_entropy_u32);
 
 	next_gen = atomic_read(&batch_generation);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 ||
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
 	    next_gen != batch->generation) {
-		extract_crng((u8 *)batch->entropy_u32);
+		_get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
 		batch->position = 0;
 		batch->generation = next_gen;
 	}
 
-	ret = batch->entropy_u32[batch->position++];
+	ret = batch->entropy_u32[batch->position];
+	batch->entropy_u32[batch->position] = 0;
+	++batch->position;
 	local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
 	return ret;
 }
-- 
GitLab


From 66e4c2b9541503d721e936cc3898c9f25f4591ff Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Feb 2022 19:23:17 +0100
Subject: [PATCH 0539/1586] random: use hash function for crng_slow_load()

Since we have a hash function that's really fast, and the goal of
crng_slow_load() is reportedly to "touch all of the crng's state", we
can just hash the old state together with the new state and call it a
day. This way we dont need to reason about another LFSR or worry about
various attacks there. This code is only ever used at early boot and
then never again.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 538eaa469f5e4..630b9b9e7d256 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -477,42 +477,30 @@ static size_t crng_fast_load(const u8 *cp, size_t len)
  * all), and (2) it doesn't have the performance constraints of
  * crng_fast_load().
  *
- * So we do something more comprehensive which is guaranteed to touch
- * all of the primary_crng's state, and which uses a LFSR with a
- * period of 255 as part of the mixing algorithm.  Finally, we do
- * *not* advance crng_init_cnt since buffer we may get may be something
- * like a fixed DMI table (for example), which might very well be
- * unique to the machine, but is otherwise unvarying.
+ * So, we simply hash the contents in with the current key. Finally,
+ * we do *not* advance crng_init_cnt since buffer we may get may be
+ * something like a fixed DMI table (for example), which might very
+ * well be unique to the machine, but is otherwise unvarying.
  */
-static int crng_slow_load(const u8 *cp, size_t len)
+static void crng_slow_load(const u8 *cp, size_t len)
 {
 	unsigned long flags;
-	static u8 lfsr = 1;
-	u8 tmp;
-	unsigned int i, max = sizeof(base_crng.key);
-	const u8 *src_buf = cp;
-	u8 *dest_buf = base_crng.key;
+	struct blake2s_state hash;
+
+	blake2s_init(&hash, sizeof(base_crng.key));
 
 	if (!spin_trylock_irqsave(&base_crng.lock, flags))
-		return 0;
+		return;
 	if (crng_init != 0) {
 		spin_unlock_irqrestore(&base_crng.lock, flags);
-		return 0;
-	}
-	if (len > max)
-		max = len;
-
-	for (i = 0; i < max; i++) {
-		tmp = lfsr;
-		lfsr >>= 1;
-		if (tmp & 1)
-			lfsr ^= 0xE1;
-		tmp = dest_buf[i % sizeof(base_crng.key)];
-		dest_buf[i % sizeof(base_crng.key)] ^= src_buf[i % len] ^ lfsr;
-		lfsr += (tmp << 3) | (tmp >> 5);
+		return;
 	}
+
+	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+	blake2s_update(&hash, cp, len);
+	blake2s_final(&hash, base_crng.key);
+
 	spin_unlock_irqrestore(&base_crng.lock, flags);
-	return 1;
 }
 
 static void crng_reseed(void)
-- 
GitLab


From 04ec96b768c9dd43946b047c3da60dcc66431370 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 9 Feb 2022 14:43:25 +0100
Subject: [PATCH 0540/1586] random: make more consistent use of integer types

We've been using a flurry of int, unsigned int, size_t, and ssize_t.
Let's unify all of this into size_t where it makes sense, as it does in
most places, and leave ssize_t for return values with possible errors.

In addition, keeping with the convention of other functions in this
file, functions that are dealing with raw bytes now take void *
consistently instead of a mix of that and u8 *, because much of the time
we're actually passing some other structure that is then interpreted as
bytes by the function.

We also take the opportunity to fix the outdated and incorrect comment
in get_random_bytes_arch().

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c         | 123 +++++++++++++++-------------------
 include/linux/hw_random.h     |   2 +-
 include/linux/random.h        |  10 +--
 include/trace/events/random.h |  79 +++++++++++-----------
 4 files changed, 99 insertions(+), 115 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 630b9b9e7d256..768dee5e081a5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -69,7 +69,7 @@
  *
  * The primary kernel interfaces are:
  *
- *	void get_random_bytes(void *buf, int nbytes);
+ *	void get_random_bytes(void *buf, size_t nbytes);
  *	u32 get_random_u32()
  *	u64 get_random_u64()
  *	unsigned int get_random_int()
@@ -97,14 +97,14 @@
  * The current exported interfaces for gathering environmental noise
  * from the devices are:
  *
- *	void add_device_randomness(const void *buf, unsigned int size);
+ *	void add_device_randomness(const void *buf, size_t size);
  *	void add_input_randomness(unsigned int type, unsigned int code,
  *                                unsigned int value);
  *	void add_interrupt_randomness(int irq);
  *	void add_disk_randomness(struct gendisk *disk);
- *	void add_hwgenerator_randomness(const char *buffer, size_t count,
+ *	void add_hwgenerator_randomness(const void *buffer, size_t count,
  *					size_t entropy);
- *	void add_bootloader_randomness(const void *buf, unsigned int size);
+ *	void add_bootloader_randomness(const void *buf, size_t size);
  *
  * add_device_randomness() is for adding data to the random pool that
  * is likely to differ between two devices (or possibly even per boot).
@@ -268,7 +268,7 @@ static int crng_init = 0;
 #define crng_ready() (likely(crng_init > 1))
 static int crng_init_cnt = 0;
 static void process_random_ready_list(void);
-static void _get_random_bytes(void *buf, int nbytes);
+static void _get_random_bytes(void *buf, size_t nbytes);
 
 static struct ratelimit_state unseeded_warning =
 	RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
@@ -290,7 +290,7 @@ MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 static struct {
 	struct blake2s_state hash;
 	spinlock_t lock;
-	int entropy_count;
+	unsigned int entropy_count;
 } input_pool = {
 	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
 		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
@@ -308,18 +308,12 @@ static void crng_reseed(void);
  * update the entropy estimate.  The caller should call
  * credit_entropy_bits if this is appropriate.
  */
-static void _mix_pool_bytes(const void *in, int nbytes)
+static void _mix_pool_bytes(const void *in, size_t nbytes)
 {
 	blake2s_update(&input_pool.hash, in, nbytes);
 }
 
-static void __mix_pool_bytes(const void *in, int nbytes)
-{
-	trace_mix_pool_bytes_nolock(nbytes, _RET_IP_);
-	_mix_pool_bytes(in, nbytes);
-}
-
-static void mix_pool_bytes(const void *in, int nbytes)
+static void mix_pool_bytes(const void *in, size_t nbytes)
 {
 	unsigned long flags;
 
@@ -383,18 +377,18 @@ static void process_random_ready_list(void)
 	spin_unlock_irqrestore(&random_ready_list_lock, flags);
 }
 
-static void credit_entropy_bits(int nbits)
+static void credit_entropy_bits(size_t nbits)
 {
-	int entropy_count, orig;
+	unsigned int entropy_count, orig, add;
 
-	if (nbits <= 0)
+	if (!nbits)
 		return;
 
-	nbits = min(nbits, POOL_BITS);
+	add = min_t(size_t, nbits, POOL_BITS);
 
 	do {
 		orig = READ_ONCE(input_pool.entropy_count);
-		entropy_count = min(POOL_BITS, orig + nbits);
+		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
 	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
 
 	trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_);
@@ -443,10 +437,10 @@ static void invalidate_batched_entropy(void);
  * path.  So we can't afford to dilly-dally. Returns the number of
  * bytes processed from cp.
  */
-static size_t crng_fast_load(const u8 *cp, size_t len)
+static size_t crng_fast_load(const void *cp, size_t len)
 {
 	unsigned long flags;
-	u8 *p;
+	const u8 *src = (const u8 *)cp;
 	size_t ret = 0;
 
 	if (!spin_trylock_irqsave(&base_crng.lock, flags))
@@ -455,10 +449,9 @@ static size_t crng_fast_load(const u8 *cp, size_t len)
 		spin_unlock_irqrestore(&base_crng.lock, flags);
 		return 0;
 	}
-	p = base_crng.key;
 	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		p[crng_init_cnt % sizeof(base_crng.key)] ^= *cp;
-		cp++; crng_init_cnt++; len--; ret++;
+		base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src;
+		src++; crng_init_cnt++; len--; ret++;
 	}
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
 		invalidate_batched_entropy();
@@ -482,7 +475,7 @@ static size_t crng_fast_load(const u8 *cp, size_t len)
  * something like a fixed DMI table (for example), which might very
  * well be unique to the machine, but is otherwise unvarying.
  */
-static void crng_slow_load(const u8 *cp, size_t len)
+static void crng_slow_load(const void *cp, size_t len)
 {
 	unsigned long flags;
 	struct blake2s_state hash;
@@ -656,14 +649,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
 {
 	bool large_request = nbytes > 256;
-	ssize_t ret = 0, len;
+	ssize_t ret = 0;
+	size_t len;
 	u32 chacha_state[CHACHA_STATE_WORDS];
 	u8 output[CHACHA_BLOCK_SIZE];
 
 	if (!nbytes)
 		return 0;
 
-	len = min_t(ssize_t, 32, nbytes);
+	len = min_t(size_t, 32, nbytes);
 	crng_make_state(chacha_state, output, len);
 
 	if (copy_to_user(buf, output, len))
@@ -683,7 +677,7 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
 		if (unlikely(chacha_state[12] == 0))
 			++chacha_state[13];
 
-		len = min_t(ssize_t, nbytes, CHACHA_BLOCK_SIZE);
+		len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
 		if (copy_to_user(buf, output, len)) {
 			ret = -EFAULT;
 			break;
@@ -721,7 +715,7 @@ struct timer_rand_state {
  * the entropy pool having similar initial state across largely
  * identical devices.
  */
-void add_device_randomness(const void *buf, unsigned int size)
+void add_device_randomness(const void *buf, size_t size)
 {
 	unsigned long time = random_get_entropy() ^ jiffies;
 	unsigned long flags;
@@ -749,7 +743,7 @@ static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
  * keyboard scan codes, and 256 upwards for interrupts.
  *
  */
-static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
 {
 	struct {
 		long jiffies;
@@ -793,7 +787,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 	 * Round down by 1 bit on general principles,
 	 * and limit entropy estimate to 12 bits.
 	 */
-	credit_entropy_bits(min_t(int, fls(delta >> 1), 11));
+	credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11));
 }
 
 void add_input_randomness(unsigned int type, unsigned int code,
@@ -874,8 +868,8 @@ void add_interrupt_randomness(int irq)
 	add_interrupt_bench(cycles);
 
 	if (unlikely(crng_init == 0)) {
-		if ((fast_pool->count >= 64) &&
-		    crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
+		if (fast_pool->count >= 64 &&
+		    crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
 			fast_pool->count = 0;
 			fast_pool->last = now;
 			if (spin_trylock(&input_pool.lock)) {
@@ -893,7 +887,7 @@ void add_interrupt_randomness(int irq)
 		return;
 
 	fast_pool->last = now;
-	__mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
+	_mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
 	spin_unlock(&input_pool.lock);
 
 	fast_pool->count = 0;
@@ -1002,18 +996,18 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  * wait_for_random_bytes() should be called and return 0 at least once
  * at any point prior.
  */
-static void _get_random_bytes(void *buf, int nbytes)
+static void _get_random_bytes(void *buf, size_t nbytes)
 {
 	u32 chacha_state[CHACHA_STATE_WORDS];
 	u8 tmp[CHACHA_BLOCK_SIZE];
-	ssize_t len;
+	size_t len;
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
 	if (!nbytes)
 		return;
 
-	len = min_t(ssize_t, 32, nbytes);
+	len = min_t(size_t, 32, nbytes);
 	crng_make_state(chacha_state, buf, len);
 	nbytes -= len;
 	buf += len;
@@ -1036,7 +1030,7 @@ static void _get_random_bytes(void *buf, int nbytes)
 	memzero_explicit(chacha_state, sizeof(chacha_state));
 }
 
-void get_random_bytes(void *buf, int nbytes)
+void get_random_bytes(void *buf, size_t nbytes)
 {
 	static void *previous;
 
@@ -1197,25 +1191,19 @@ EXPORT_SYMBOL(del_random_ready_callback);
 
 /*
  * This function will use the architecture-specific hardware random
- * number generator if it is available.  The arch-specific hw RNG will
- * almost certainly be faster than what we can do in software, but it
- * is impossible to verify that it is implemented securely (as
- * opposed, to, say, the AES encryption of a sequence number using a
- * key known by the NSA).  So it's useful if we need the speed, but
- * only if we're willing to trust the hardware manufacturer not to
- * have put in a back door.
- *
- * Return number of bytes filled in.
+ * number generator if it is available. It is not recommended for
+ * use. Use get_random_bytes() instead. It returns the number of
+ * bytes filled in.
  */
-int __must_check get_random_bytes_arch(void *buf, int nbytes)
+size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
 {
-	int left = nbytes;
+	size_t left = nbytes;
 	u8 *p = buf;
 
 	trace_get_random_bytes_arch(left, _RET_IP_);
 	while (left) {
 		unsigned long v;
-		int chunk = min_t(int, left, sizeof(unsigned long));
+		size_t chunk = min_t(size_t, left, sizeof(unsigned long));
 
 		if (!arch_get_random_long(&v))
 			break;
@@ -1248,12 +1236,12 @@ early_param("random.trust_cpu", parse_trust_cpu);
  */
 int __init rand_initialize(void)
 {
-	int i;
+	size_t i;
 	ktime_t now = ktime_get_real();
 	bool arch_init = true;
 	unsigned long rv;
 
-	for (i = BLAKE2S_BLOCK_SIZE; i > 0; i -= sizeof(rv)) {
+	for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
 		if (!arch_get_random_seed_long_early(&rv) &&
 		    !arch_get_random_long_early(&rv)) {
 			rv = random_get_entropy();
@@ -1302,7 +1290,7 @@ static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
 
 	nbytes = min_t(size_t, nbytes, INT_MAX >> 6);
 	ret = get_random_bytes_user(buf, nbytes);
-	trace_urandom_read(8 * nbytes, 0, input_pool.entropy_count);
+	trace_urandom_read(nbytes, input_pool.entropy_count);
 	return ret;
 }
 
@@ -1346,19 +1334,18 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-static int write_pool(const char __user *buffer, size_t count)
+static int write_pool(const char __user *ubuf, size_t count)
 {
-	size_t bytes;
-	u8 buf[BLAKE2S_BLOCK_SIZE];
-	const char __user *p = buffer;
+	size_t len;
+	u8 block[BLAKE2S_BLOCK_SIZE];
 
-	while (count > 0) {
-		bytes = min(count, sizeof(buf));
-		if (copy_from_user(buf, p, bytes))
+	while (count) {
+		len = min(count, sizeof(block));
+		if (copy_from_user(block, ubuf, len))
 			return -EFAULT;
-		count -= bytes;
-		p += bytes;
-		mix_pool_bytes(buf, bytes);
+		count -= len;
+		ubuf += len;
+		mix_pool_bytes(block, len);
 		cond_resched();
 	}
 
@@ -1368,7 +1355,7 @@ static int write_pool(const char __user *buffer, size_t count)
 static ssize_t random_write(struct file *file, const char __user *buffer,
 			    size_t count, loff_t *ppos)
 {
-	size_t ret;
+	int ret;
 
 	ret = write_pool(buffer, count);
 	if (ret)
@@ -1464,8 +1451,6 @@ const struct file_operations urandom_fops = {
 SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
 		flags)
 {
-	int ret;
-
 	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
 		return -EINVAL;
 
@@ -1480,6 +1465,8 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
 		count = INT_MAX;
 
 	if (!(flags & GRND_INSECURE) && !crng_ready()) {
+		int ret;
+
 		if (flags & GRND_NONBLOCK)
 			return -EAGAIN;
 		ret = wait_for_random_bytes();
@@ -1751,7 +1738,7 @@ unsigned long randomize_page(unsigned long start, unsigned long range)
  * Those devices may produce endless random bits and will be throttled
  * when our pool is full.
  */
-void add_hwgenerator_randomness(const char *buffer, size_t count,
+void add_hwgenerator_randomness(const void *buffer, size_t count,
 				size_t entropy)
 {
 	if (unlikely(crng_init == 0)) {
@@ -1782,7 +1769,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
  * it would be regarded as device data.
  * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
  */
-void add_bootloader_randomness(const void *buf, unsigned int size)
+void add_bootloader_randomness(const void *buf, size_t size)
 {
 	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
 		add_hwgenerator_randomness(buf, size, size * 8);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 8e6dd908da216..1a9fc38f8938c 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -61,6 +61,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
 extern void hwrng_unregister(struct hwrng *rng);
 extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
 /** Feed random bits into the pool. */
-extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
+extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy);
 
 #endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/random.h b/include/linux/random.h
index c45b2693e51fb..e92efb39779cc 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -20,8 +20,8 @@ struct random_ready_callback {
 	struct module *owner;
 };
 
-extern void add_device_randomness(const void *, unsigned int);
-extern void add_bootloader_randomness(const void *, unsigned int);
+extern void add_device_randomness(const void *, size_t);
+extern void add_bootloader_randomness(const void *, size_t);
 
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 static inline void add_latent_entropy(void)
@@ -37,13 +37,13 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value) __latent_entropy;
 extern void add_interrupt_randomness(int irq) __latent_entropy;
 
-extern void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
 extern int __init rand_initialize(void);
 extern bool rng_is_initialized(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
-extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
+extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
@@ -87,7 +87,7 @@ static inline unsigned long get_random_canary(void)
 
 /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
  * Returns the result of the call to wait_for_random_bytes. */
-static inline int get_random_bytes_wait(void *buf, int nbytes)
+static inline int get_random_bytes_wait(void *buf, size_t nbytes)
 {
 	int ret = wait_for_random_bytes();
 	get_random_bytes(buf, nbytes);
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index ad149aeaf42c5..0609a2810a120 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -9,13 +9,13 @@
 #include <linux/tracepoint.h>
 
 TRACE_EVENT(add_device_randomness,
-	TP_PROTO(int bytes, unsigned long IP),
+	TP_PROTO(size_t bytes, unsigned long IP),
 
 	TP_ARGS(bytes, IP),
 
 	TP_STRUCT__entry(
-		__field(	  int,	bytes			)
-		__field(unsigned long,	IP			)
+		__field(size_t,		bytes	)
+		__field(unsigned long,	IP	)
 	),
 
 	TP_fast_assign(
@@ -23,18 +23,18 @@ TRACE_EVENT(add_device_randomness,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("bytes %d caller %pS",
+	TP_printk("bytes %zu caller %pS",
 		__entry->bytes, (void *)__entry->IP)
 );
 
 DECLARE_EVENT_CLASS(random__mix_pool_bytes,
-	TP_PROTO(int bytes, unsigned long IP),
+	TP_PROTO(size_t bytes, unsigned long IP),
 
 	TP_ARGS(bytes, IP),
 
 	TP_STRUCT__entry(
-		__field(	  int,	bytes			)
-		__field(unsigned long,	IP			)
+		__field(size_t,		bytes	)
+		__field(unsigned long,	IP	)
 	),
 
 	TP_fast_assign(
@@ -42,12 +42,12 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("input pool: bytes %d caller %pS",
+	TP_printk("input pool: bytes %zu caller %pS",
 		  __entry->bytes, (void *)__entry->IP)
 );
 
 DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
-	TP_PROTO(int bytes, unsigned long IP),
+	TP_PROTO(size_t bytes, unsigned long IP),
 
 	TP_ARGS(bytes, IP)
 );
@@ -59,13 +59,13 @@ DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
 );
 
 TRACE_EVENT(credit_entropy_bits,
-	TP_PROTO(int bits, int entropy_count, unsigned long IP),
+	TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP),
 
 	TP_ARGS(bits, entropy_count, IP),
 
 	TP_STRUCT__entry(
-		__field(	  int,	bits			)
-		__field(	  int,	entropy_count		)
+		__field(size_t,		bits			)
+		__field(size_t,		entropy_count		)
 		__field(unsigned long,	IP			)
 	),
 
@@ -75,34 +75,34 @@ TRACE_EVENT(credit_entropy_bits,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("input pool: bits %d entropy_count %d caller %pS",
+	TP_printk("input pool: bits %zu entropy_count %zu caller %pS",
 		  __entry->bits, __entry->entropy_count, (void *)__entry->IP)
 );
 
 TRACE_EVENT(add_input_randomness,
-	TP_PROTO(int input_bits),
+	TP_PROTO(size_t input_bits),
 
 	TP_ARGS(input_bits),
 
 	TP_STRUCT__entry(
-		__field(	  int,	input_bits		)
+		__field(size_t,	input_bits		)
 	),
 
 	TP_fast_assign(
 		__entry->input_bits	= input_bits;
 	),
 
-	TP_printk("input_pool_bits %d", __entry->input_bits)
+	TP_printk("input_pool_bits %zu", __entry->input_bits)
 );
 
 TRACE_EVENT(add_disk_randomness,
-	TP_PROTO(dev_t dev, int input_bits),
+	TP_PROTO(dev_t dev, size_t input_bits),
 
 	TP_ARGS(dev, input_bits),
 
 	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	  int,	input_bits		)
+		__field(dev_t,		dev			)
+		__field(size_t,		input_bits		)
 	),
 
 	TP_fast_assign(
@@ -110,17 +110,17 @@ TRACE_EVENT(add_disk_randomness,
 		__entry->input_bits	= input_bits;
 	),
 
-	TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev),
+	TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev),
 		  MINOR(__entry->dev), __entry->input_bits)
 );
 
 DECLARE_EVENT_CLASS(random__get_random_bytes,
-	TP_PROTO(int nbytes, unsigned long IP),
+	TP_PROTO(size_t nbytes, unsigned long IP),
 
 	TP_ARGS(nbytes, IP),
 
 	TP_STRUCT__entry(
-		__field(	  int,	nbytes			)
+		__field(size_t,		nbytes			)
 		__field(unsigned long,	IP			)
 	),
 
@@ -129,29 +129,29 @@ DECLARE_EVENT_CLASS(random__get_random_bytes,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
+	TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP)
 );
 
 DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
-	TP_PROTO(int nbytes, unsigned long IP),
+	TP_PROTO(size_t nbytes, unsigned long IP),
 
 	TP_ARGS(nbytes, IP)
 );
 
 DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
-	TP_PROTO(int nbytes, unsigned long IP),
+	TP_PROTO(size_t nbytes, unsigned long IP),
 
 	TP_ARGS(nbytes, IP)
 );
 
 DECLARE_EVENT_CLASS(random__extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count),
+	TP_PROTO(size_t nbytes, size_t entropy_count),
 
 	TP_ARGS(nbytes, entropy_count),
 
 	TP_STRUCT__entry(
-		__field(	  int,	nbytes			)
-		__field(	  int,	entropy_count		)
+		__field(  size_t,	nbytes			)
+		__field(  size_t,	entropy_count		)
 	),
 
 	TP_fast_assign(
@@ -159,37 +159,34 @@ DECLARE_EVENT_CLASS(random__extract_entropy,
 		__entry->entropy_count	= entropy_count;
 	),
 
-	TP_printk("input pool: nbytes %d entropy_count %d",
+	TP_printk("input pool: nbytes %zu entropy_count %zu",
 		  __entry->nbytes, __entry->entropy_count)
 );
 
 
 DEFINE_EVENT(random__extract_entropy, extract_entropy,
-	TP_PROTO(int nbytes, int entropy_count),
+	TP_PROTO(size_t nbytes, size_t entropy_count),
 
 	TP_ARGS(nbytes, entropy_count)
 );
 
 TRACE_EVENT(urandom_read,
-	TP_PROTO(int got_bits, int pool_left, int input_left),
+	TP_PROTO(size_t nbytes, size_t entropy_count),
 
-	TP_ARGS(got_bits, pool_left, input_left),
+	TP_ARGS(nbytes, entropy_count),
 
 	TP_STRUCT__entry(
-		__field(	  int,	got_bits		)
-		__field(	  int,	pool_left		)
-		__field(	  int,	input_left		)
+		__field( size_t,	nbytes		)
+		__field( size_t,	entropy_count	)
 	),
 
 	TP_fast_assign(
-		__entry->got_bits	= got_bits;
-		__entry->pool_left	= pool_left;
-		__entry->input_left	= input_left;
+		__entry->nbytes		= nbytes;
+		__entry->entropy_count	= entropy_count;
 	),
 
-	TP_printk("got_bits %d nonblocking_pool_entropy_left %d "
-		  "input_entropy_left %d", __entry->got_bits,
-		  __entry->pool_left, __entry->input_left)
+	TP_printk("reading: nbytes %zu entropy_count %zu",
+		  __entry->nbytes, __entry->entropy_count)
 );
 
 TRACE_EVENT(prandom_u32,
-- 
GitLab


From 434537ae54ad37e93555de21b6ac8133d6d773a9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 7 Feb 2022 23:37:13 +0100
Subject: [PATCH 0541/1586] random: remove outdated INT_MAX >> 6 check in
 urandom_read()

In 79a8468747c5 ("random: check for increase of entropy_count because of
signed conversion"), a number of checks were added around what values
were passed to account(), because account() was doing fancy fixed point
fractional arithmetic, and a user had some ability to pass large values
directly into it. One of things in that commit was limiting those values
to INT_MAX >> 6. The first >> 3 was for bytes to bits, and the next >> 3
was for bits to 1/8 fractional bits.

However, for several years now, urandom reads no longer touch entropy
accounting, and so this check serves no purpose. The current flow is:

urandom_read_nowarn()-->get_random_bytes_user()-->chacha20_block()

Of course, we don't want that size_t to be truncated when adding it into
the ssize_t. But we arrive at urandom_read_nowarn() in the first place
either via ordinary fops, which limits reads to MAX_RW_COUNT, or via
getrandom() which limits reads to INT_MAX.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 768dee5e081a5..896ec54f8f5cc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1286,9 +1286,8 @@ void rand_initialize_disk(struct gendisk *disk)
 static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
 				   size_t nbytes, loff_t *ppos)
 {
-	int ret;
+	ssize_t ret;
 
-	nbytes = min_t(size_t, nbytes, INT_MAX >> 6);
 	ret = get_random_bytes_user(buf, nbytes);
 	trace_urandom_read(nbytes, input_pool.entropy_count);
 	return ret;
-- 
GitLab


From 7b5164fb1279bf0251371848e40bae646b59b3a8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 9 Feb 2022 18:42:13 +0100
Subject: [PATCH 0542/1586] random: zero buffer after reading entropy from
 userspace

This buffer may contain entropic data that shouldn't stick around longer
than needed, so zero out the temporary buffer at the end of write_pool().

Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 896ec54f8f5cc..8ae7d0b30280c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1336,19 +1336,24 @@ static __poll_t random_poll(struct file *file, poll_table *wait)
 static int write_pool(const char __user *ubuf, size_t count)
 {
 	size_t len;
+	int ret = 0;
 	u8 block[BLAKE2S_BLOCK_SIZE];
 
 	while (count) {
 		len = min(count, sizeof(block));
-		if (copy_from_user(block, ubuf, len))
-			return -EFAULT;
+		if (copy_from_user(block, ubuf, len)) {
+			ret = -EFAULT;
+			goto out;
+		}
 		count -= len;
 		ubuf += len;
 		mix_pool_bytes(block, len);
 		cond_resched();
 	}
 
-	return 0;
+out:
+	memzero_explicit(block, sizeof(block));
+	return ret;
 }
 
 static ssize_t random_write(struct file *file, const char __user *buffer,
-- 
GitLab


From 7191c628fe07b70d3f37de736d173d1b115396ed Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Wed, 9 Feb 2022 19:57:06 +0100
Subject: [PATCH 0543/1586] random: fix locking for crng_init in crng_reseed()

crng_init is protected by primary_crng->lock. Therefore, we need
to hold this lock when increasing crng_init to 2. As we shouldn't
hold this lock for too long, only hold it for those parts which
require protection.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8ae7d0b30280c..1ccd92efa8f41 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -502,6 +502,7 @@ static void crng_reseed(void)
 	int entropy_count;
 	unsigned long next_gen;
 	u8 key[CHACHA_KEY_SIZE];
+	bool finalize_init = false;
 
 	/*
 	 * First we make sure we have POOL_MIN_BITS of entropy in the pool,
@@ -529,12 +530,14 @@ static void crng_reseed(void)
 		++next_gen;
 	WRITE_ONCE(base_crng.generation, next_gen);
 	WRITE_ONCE(base_crng.birth, jiffies);
-	spin_unlock_irqrestore(&base_crng.lock, flags);
-	memzero_explicit(key, sizeof(key));
-
 	if (crng_init < 2) {
 		invalidate_batched_entropy();
 		crng_init = 2;
+		finalize_init = true;
+	}
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+	memzero_explicit(key, sizeof(key));
+	if (finalize_init) {
 		process_random_ready_list();
 		wake_up_interruptible(&crng_init_wait);
 		kill_fasync(&fasync, SIGIO, POLL_IN);
-- 
GitLab


From 0791e8b655cc373718f0f58800fdc625a3447ac5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 9 Feb 2022 22:46:48 +0100
Subject: [PATCH 0544/1586] random: tie batched entropy generation to base_crng
 generation

Now that we have an explicit base_crng generation counter, we don't need
a separate one for batched entropy. Rather, we can just move the
generation forward every time we change crng_init state or update the
base_crng key.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1ccd92efa8f41..c27ebf7073809 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -430,8 +430,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
 
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 
-static void invalidate_batched_entropy(void);
-
 /*
  * crng_fast_load() can be called by code in the interrupt service
  * path.  So we can't afford to dilly-dally. Returns the number of
@@ -454,7 +452,7 @@ static size_t crng_fast_load(const void *cp, size_t len)
 		src++; crng_init_cnt++; len--; ret++;
 	}
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
-		invalidate_batched_entropy();
+		++base_crng.generation;
 		crng_init = 1;
 	}
 	spin_unlock_irqrestore(&base_crng.lock, flags);
@@ -531,7 +529,6 @@ static void crng_reseed(void)
 	WRITE_ONCE(base_crng.generation, next_gen);
 	WRITE_ONCE(base_crng.birth, jiffies);
 	if (crng_init < 2) {
-		invalidate_batched_entropy();
 		crng_init = 2;
 		finalize_init = true;
 	}
@@ -1256,8 +1253,9 @@ int __init rand_initialize(void)
 	mix_pool_bytes(utsname(), sizeof(*(utsname())));
 
 	extract_entropy(base_crng.key, sizeof(base_crng.key));
+	++base_crng.generation;
+
 	if (arch_init && trust_cpu && crng_init < 2) {
-		invalidate_batched_entropy();
 		crng_init = 2;
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
@@ -1607,8 +1605,6 @@ static int __init random_sysctls_init(void)
 device_initcall(random_sysctls_init);
 #endif	/* CONFIG_SYSCTL */
 
-static atomic_t batch_generation = ATOMIC_INIT(0);
-
 struct batched_entropy {
 	union {
 		/*
@@ -1622,8 +1618,8 @@ struct batched_entropy {
 		u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
 	};
 	local_lock_t lock;
+	unsigned long generation;
 	unsigned int position;
-	int generation;
 };
 
 /*
@@ -1643,14 +1639,14 @@ u64 get_random_u64(void)
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
-	int next_gen;
+	unsigned long next_gen;
 
 	warn_unseeded_randomness(&previous);
 
 	local_lock_irqsave(&batched_entropy_u64.lock, flags);
 	batch = raw_cpu_ptr(&batched_entropy_u64);
 
-	next_gen = atomic_read(&batch_generation);
+	next_gen = READ_ONCE(base_crng.generation);
 	if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
 	    next_gen != batch->generation) {
 		_get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
@@ -1677,14 +1673,14 @@ u32 get_random_u32(void)
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
-	int next_gen;
+	unsigned long next_gen;
 
 	warn_unseeded_randomness(&previous);
 
 	local_lock_irqsave(&batched_entropy_u32.lock, flags);
 	batch = raw_cpu_ptr(&batched_entropy_u32);
 
-	next_gen = atomic_read(&batch_generation);
+	next_gen = READ_ONCE(base_crng.generation);
 	if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
 	    next_gen != batch->generation) {
 		_get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
@@ -1700,15 +1696,6 @@ u32 get_random_u32(void)
 }
 EXPORT_SYMBOL(get_random_u32);
 
-/* It's important to invalidate all potential batched entropy that might
- * be stored before the crng is initialized, which we can do lazily by
- * bumping the generation counter.
- */
-static void invalidate_batched_entropy(void)
-{
-	atomic_inc(&batch_generation);
-}
-
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.
-- 
GitLab


From 95e6060c20a7f5db60163274c5222a725ac118f9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 10 Feb 2022 16:35:24 +0100
Subject: [PATCH 0545/1586] random: remove ifdef'd out interrupt bench

With tools like kbench9000 giving more finegrained responses, and this
basically never having been used ever since it was initially added,
let's just get rid of this. There *is* still work to be done on the
interrupt handler, but this really isn't the way it's being developed.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 Documentation/admin-guide/sysctl/kernel.rst |  9 -----
 drivers/char/random.c                       | 40 ---------------------
 2 files changed, 49 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d3c6d9a501a9c..5dd660aac0aee 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1041,15 +1041,6 @@ This is a directory, with the following entries:
   are woken up. This file is writable for compatibility purposes, but
   writing to it has no effect on any RNG behavior.
 
-If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
-defined, these additional entries are present:
-
-* ``add_interrupt_avg_cycles``: the average number of cycles between
-  interrupts used to feed the pool;
-
-* ``add_interrupt_avg_deviation``: the standard deviation seen on the
-  number of cycles between interrupts used to feed the pool.
-
 
 randomize_va_space
 ==================
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c27ebf7073809..35c440a0d83cf 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -240,8 +240,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/random.h>
 
-/* #define ADD_INTERRUPT_BENCH */
-
 enum {
 	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
 	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
@@ -808,27 +806,6 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
 
 static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
 
-#ifdef ADD_INTERRUPT_BENCH
-static unsigned long avg_cycles, avg_deviation;
-
-#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
-#define FIXED_1_2 (1 << (AVG_SHIFT - 1))
-
-static void add_interrupt_bench(cycles_t start)
-{
-	long delta = random_get_entropy() - start;
-
-	/* Use a weighted moving average */
-	delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
-	avg_cycles += delta;
-	/* And average deviation */
-	delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
-	avg_deviation += delta;
-}
-#else
-#define add_interrupt_bench(x)
-#endif
-
 static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
 {
 	u32 *ptr = (u32 *)regs;
@@ -865,7 +842,6 @@ void add_interrupt_randomness(int irq)
 		(sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs);
 
 	fast_mix(fast_pool);
-	add_interrupt_bench(cycles);
 
 	if (unlikely(crng_init == 0)) {
 		if (fast_pool->count >= 64 &&
@@ -1574,22 +1550,6 @@ static struct ctl_table random_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_do_uuid,
 	},
-#ifdef ADD_INTERRUPT_BENCH
-	{
-		.procname	= "add_interrupt_avg_cycles",
-		.data		= &avg_cycles,
-		.maxlen		= sizeof(avg_cycles),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "add_interrupt_avg_deviation",
-		.data		= &avg_deviation,
-		.maxlen		= sizeof(avg_deviation),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif
 	{ }
 };
 
-- 
GitLab


From 14c174633f349cb41ea90c2c0aaddac157012f74 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 10 Feb 2022 16:40:44 +0100
Subject: [PATCH 0546/1586] random: remove unused tracepoints

These explicit tracepoints aren't really used and show sign of aging.
It's work to keep these up to date, and before I attempted to keep them
up to date, they weren't up to date, which indicates that they're not
really used. These days there are better ways of introspecting anyway.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c         |  30 +----
 include/trace/events/random.h | 212 ----------------------------------
 lib/random32.c                |   2 -
 3 files changed, 3 insertions(+), 241 deletions(-)
 delete mode 100644 include/trace/events/random.h

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 35c440a0d83cf..055c42246b688 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -237,9 +237,6 @@
 #include <asm/irq_regs.h>
 #include <asm/io.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/random.h>
-
 enum {
 	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
 	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
@@ -315,7 +312,6 @@ static void mix_pool_bytes(const void *in, size_t nbytes)
 {
 	unsigned long flags;
 
-	trace_mix_pool_bytes(nbytes, _RET_IP_);
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(in, nbytes);
 	spin_unlock_irqrestore(&input_pool.lock, flags);
@@ -389,8 +385,6 @@ static void credit_entropy_bits(size_t nbits)
 		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
 	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
 
-	trace_credit_entropy_bits(nbits, entropy_count, _RET_IP_);
-
 	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
 		crng_reseed();
 }
@@ -721,7 +715,6 @@ void add_device_randomness(const void *buf, size_t size)
 	if (!crng_ready() && size)
 		crng_slow_load(buf, size);
 
-	trace_add_device_randomness(size, _RET_IP_);
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(buf, size);
 	_mix_pool_bytes(&time, sizeof(time));
@@ -800,7 +793,6 @@ void add_input_randomness(unsigned int type, unsigned int code,
 	last_value = value;
 	add_timer_randomness(&input_timer_state,
 			     (type << 4) ^ code ^ (code >> 4) ^ value);
-	trace_add_input_randomness(input_pool.entropy_count);
 }
 EXPORT_SYMBOL_GPL(add_input_randomness);
 
@@ -880,7 +872,6 @@ void add_disk_randomness(struct gendisk *disk)
 		return;
 	/* first major is 1, so we get >= 0x200 here */
 	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
-	trace_add_disk_randomness(disk_devt(disk), input_pool.entropy_count);
 }
 EXPORT_SYMBOL_GPL(add_disk_randomness);
 #endif
@@ -905,8 +896,6 @@ static void extract_entropy(void *buf, size_t nbytes)
 	} block;
 	size_t i;
 
-	trace_extract_entropy(nbytes, input_pool.entropy_count);
-
 	for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
 		if (!arch_get_random_seed_long(&block.rdseed[i]) &&
 		    !arch_get_random_long(&block.rdseed[i]))
@@ -978,8 +967,6 @@ static void _get_random_bytes(void *buf, size_t nbytes)
 	u8 tmp[CHACHA_BLOCK_SIZE];
 	size_t len;
 
-	trace_get_random_bytes(nbytes, _RET_IP_);
-
 	if (!nbytes)
 		return;
 
@@ -1176,7 +1163,6 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
 	size_t left = nbytes;
 	u8 *p = buf;
 
-	trace_get_random_bytes_arch(left, _RET_IP_);
 	while (left) {
 		unsigned long v;
 		size_t chunk = min_t(size_t, left, sizeof(unsigned long));
@@ -1260,16 +1246,6 @@ void rand_initialize_disk(struct gendisk *disk)
 }
 #endif
 
-static ssize_t urandom_read_nowarn(struct file *file, char __user *buf,
-				   size_t nbytes, loff_t *ppos)
-{
-	ssize_t ret;
-
-	ret = get_random_bytes_user(buf, nbytes);
-	trace_urandom_read(nbytes, input_pool.entropy_count);
-	return ret;
-}
-
 static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
 			    loff_t *ppos)
 {
@@ -1282,7 +1258,7 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
 				  current->comm, nbytes);
 	}
 
-	return urandom_read_nowarn(file, buf, nbytes, ppos);
+	return get_random_bytes_user(buf, nbytes);
 }
 
 static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
@@ -1293,7 +1269,7 @@ static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
 	ret = wait_for_random_bytes();
 	if (ret != 0)
 		return ret;
-	return urandom_read_nowarn(file, buf, nbytes, ppos);
+	return get_random_bytes_user(buf, nbytes);
 }
 
 static __poll_t random_poll(struct file *file, poll_table *wait)
@@ -1454,7 +1430,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
 		if (unlikely(ret))
 			return ret;
 	}
-	return urandom_read_nowarn(NULL, buf, count, NULL);
+	return get_random_bytes_user(buf, count);
 }
 
 /********************************************************************
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
deleted file mode 100644
index 0609a2810a120..0000000000000
--- a/include/trace/events/random.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM random
-
-#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_RANDOM_H
-
-#include <linux/writeback.h>
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(add_device_randomness,
-	TP_PROTO(size_t bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP),
-
-	TP_STRUCT__entry(
-		__field(size_t,		bytes	)
-		__field(unsigned long,	IP	)
-	),
-
-	TP_fast_assign(
-		__entry->bytes		= bytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("bytes %zu caller %pS",
-		__entry->bytes, (void *)__entry->IP)
-);
-
-DECLARE_EVENT_CLASS(random__mix_pool_bytes,
-	TP_PROTO(size_t bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP),
-
-	TP_STRUCT__entry(
-		__field(size_t,		bytes	)
-		__field(unsigned long,	IP	)
-	),
-
-	TP_fast_assign(
-		__entry->bytes		= bytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("input pool: bytes %zu caller %pS",
-		  __entry->bytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes,
-	TP_PROTO(size_t bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP)
-);
-
-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock,
-	TP_PROTO(int bytes, unsigned long IP),
-
-	TP_ARGS(bytes, IP)
-);
-
-TRACE_EVENT(credit_entropy_bits,
-	TP_PROTO(size_t bits, size_t entropy_count, unsigned long IP),
-
-	TP_ARGS(bits, entropy_count, IP),
-
-	TP_STRUCT__entry(
-		__field(size_t,		bits			)
-		__field(size_t,		entropy_count		)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->bits		= bits;
-		__entry->entropy_count	= entropy_count;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("input pool: bits %zu entropy_count %zu caller %pS",
-		  __entry->bits, __entry->entropy_count, (void *)__entry->IP)
-);
-
-TRACE_EVENT(add_input_randomness,
-	TP_PROTO(size_t input_bits),
-
-	TP_ARGS(input_bits),
-
-	TP_STRUCT__entry(
-		__field(size_t,	input_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->input_bits	= input_bits;
-	),
-
-	TP_printk("input_pool_bits %zu", __entry->input_bits)
-);
-
-TRACE_EVENT(add_disk_randomness,
-	TP_PROTO(dev_t dev, size_t input_bits),
-
-	TP_ARGS(dev, input_bits),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev			)
-		__field(size_t,		input_bits		)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= dev;
-		__entry->input_bits	= input_bits;
-	),
-
-	TP_printk("dev %d,%d input_pool_bits %zu", MAJOR(__entry->dev),
-		  MINOR(__entry->dev), __entry->input_bits)
-);
-
-DECLARE_EVENT_CLASS(random__get_random_bytes,
-	TP_PROTO(size_t nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP),
-
-	TP_STRUCT__entry(
-		__field(size_t,		nbytes			)
-		__field(unsigned long,	IP			)
-	),
-
-	TP_fast_assign(
-		__entry->nbytes		= nbytes;
-		__entry->IP		= IP;
-	),
-
-	TP_printk("nbytes %zu caller %pS", __entry->nbytes, (void *)__entry->IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
-	TP_PROTO(size_t nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP)
-);
-
-DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch,
-	TP_PROTO(size_t nbytes, unsigned long IP),
-
-	TP_ARGS(nbytes, IP)
-);
-
-DECLARE_EVENT_CLASS(random__extract_entropy,
-	TP_PROTO(size_t nbytes, size_t entropy_count),
-
-	TP_ARGS(nbytes, entropy_count),
-
-	TP_STRUCT__entry(
-		__field(  size_t,	nbytes			)
-		__field(  size_t,	entropy_count		)
-	),
-
-	TP_fast_assign(
-		__entry->nbytes		= nbytes;
-		__entry->entropy_count	= entropy_count;
-	),
-
-	TP_printk("input pool: nbytes %zu entropy_count %zu",
-		  __entry->nbytes, __entry->entropy_count)
-);
-
-
-DEFINE_EVENT(random__extract_entropy, extract_entropy,
-	TP_PROTO(size_t nbytes, size_t entropy_count),
-
-	TP_ARGS(nbytes, entropy_count)
-);
-
-TRACE_EVENT(urandom_read,
-	TP_PROTO(size_t nbytes, size_t entropy_count),
-
-	TP_ARGS(nbytes, entropy_count),
-
-	TP_STRUCT__entry(
-		__field( size_t,	nbytes		)
-		__field( size_t,	entropy_count	)
-	),
-
-	TP_fast_assign(
-		__entry->nbytes		= nbytes;
-		__entry->entropy_count	= entropy_count;
-	),
-
-	TP_printk("reading: nbytes %zu entropy_count %zu",
-		  __entry->nbytes, __entry->entropy_count)
-);
-
-TRACE_EVENT(prandom_u32,
-
-	TP_PROTO(unsigned int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(   unsigned int, ret)
-	),
-
-	TP_fast_assign(
-		__entry->ret = ret;
-	),
-
-	TP_printk("ret=%u" , __entry->ret)
-);
-
-#endif /* _TRACE_RANDOM_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/lib/random32.c b/lib/random32.c
index a57a0e18819d0..3c19820796d04 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
-#include <trace/events/random.h>
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
@@ -387,7 +386,6 @@ u32 prandom_u32(void)
 	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
 	u32 res = siprand_u32(state);
 
-	trace_prandom_u32(res);
 	put_cpu_ptr(&net_rand_state);
 	return res;
 }
-- 
GitLab


From a07fdae346c35c6ba286af1c88e0effcfa330bf9 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 10 Feb 2022 16:43:57 +0100
Subject: [PATCH 0547/1586] random: add proper SPDX header

Convert the current license into the SPDX notation of "(GPL-2.0 OR
BSD-3-Clause)". This infers GPL-2.0 from the text "ALTERNATIVELY, this
product may be distributed under the terms of the GNU General Public
License, in which case the provisions of the GPL are required INSTEAD OF
the above restrictions" and it infers BSD-3-Clause from the verbatim
BSD 3 clause license in the file.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 37 +------------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 055c42246b688..5373fae0aa632 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1,44 +1,9 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * random.c -- A strong random number generator
- *
  * Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
  * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
- *
  * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999.  All
  * rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
  */
 
 /*
-- 
GitLab


From b2f408fe403800c91a49f6589d95b6759ce1b30b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 10 Feb 2022 17:01:27 +0100
Subject: [PATCH 0548/1586] random: deobfuscate irq u32/u64 contributions

In the irq handler, we fill out 16 bytes differently on 32-bit and
64-bit platforms, and for 32-bit vs 64-bit cycle counters, which doesn't
always correspond with the bitness of the platform. Whether or not you
like this strangeness, it is a matter of fact.  But it might not be a
fact you well realized until now, because the code that loaded the irq
info into 4 32-bit words was quite confusing.  Instead, this commit
makes everything explicit by having separate (compile-time) branches for
32-bit and 64-bit types.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 49 ++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5373fae0aa632..e7571de3d91a1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -283,7 +283,10 @@ static void mix_pool_bytes(const void *in, size_t nbytes)
 }
 
 struct fast_pool {
-	u32 pool[4];
+	union {
+		u32 pool32[4];
+		u64 pool64[2];
+	};
 	unsigned long last;
 	u16 reg_idx;
 	u8 count;
@@ -294,10 +297,10 @@ struct fast_pool {
  * collector.  It's hardcoded for an 128 bit pool and assumes that any
  * locks that might be needed are taken by the caller.
  */
-static void fast_mix(struct fast_pool *f)
+static void fast_mix(u32 pool[4])
 {
-	u32 a = f->pool[0],	b = f->pool[1];
-	u32 c = f->pool[2],	d = f->pool[3];
+	u32 a = pool[0],	b = pool[1];
+	u32 c = pool[2],	d = pool[3];
 
 	a += b;			c += d;
 	b = rol32(b, 6);	d = rol32(d, 27);
@@ -315,9 +318,8 @@ static void fast_mix(struct fast_pool *f)
 	b = rol32(b, 16);	d = rol32(d, 14);
 	d ^= a;			b ^= c;
 
-	f->pool[0] = a;  f->pool[1] = b;
-	f->pool[2] = c;  f->pool[3] = d;
-	f->count++;
+	pool[0] = a;  pool[1] = b;
+	pool[2] = c;  pool[3] = d;
 }
 
 static void process_random_ready_list(void)
@@ -784,29 +786,34 @@ void add_interrupt_randomness(int irq)
 	struct pt_regs *regs = get_irq_regs();
 	unsigned long now = jiffies;
 	cycles_t cycles = random_get_entropy();
-	u32 c_high, j_high;
-	u64 ip;
 
 	if (cycles == 0)
 		cycles = get_reg(fast_pool, regs);
-	c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
-	j_high = (sizeof(now) > 4) ? now >> 32 : 0;
-	fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
-	fast_pool->pool[1] ^= now ^ c_high;
-	ip = regs ? instruction_pointer(regs) : _RET_IP_;
-	fast_pool->pool[2] ^= ip;
-	fast_pool->pool[3] ^=
-		(sizeof(ip) > 4) ? ip >> 32 : get_reg(fast_pool, regs);
 
-	fast_mix(fast_pool);
+	if (sizeof(cycles) == 8)
+		fast_pool->pool64[0] ^= cycles ^ rol64(now, 32) ^ irq;
+	else {
+		fast_pool->pool32[0] ^= cycles ^ irq;
+		fast_pool->pool32[1] ^= now;
+	}
+
+	if (sizeof(unsigned long) == 8)
+		fast_pool->pool64[1] ^= regs ? instruction_pointer(regs) : _RET_IP_;
+	else {
+		fast_pool->pool32[2] ^= regs ? instruction_pointer(regs) : _RET_IP_;
+		fast_pool->pool32[3] ^= get_reg(fast_pool, regs);
+	}
+
+	fast_mix(fast_pool->pool32);
+	++fast_pool->count;
 
 	if (unlikely(crng_init == 0)) {
 		if (fast_pool->count >= 64 &&
-		    crng_fast_load(fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
+		    crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) {
 			fast_pool->count = 0;
 			fast_pool->last = now;
 			if (spin_trylock(&input_pool.lock)) {
-				_mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
+				_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
 				spin_unlock(&input_pool.lock);
 			}
 		}
@@ -820,7 +827,7 @@ void add_interrupt_randomness(int irq)
 		return;
 
 	fast_pool->last = now;
-	_mix_pool_bytes(&fast_pool->pool, sizeof(fast_pool->pool));
+	_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
 	spin_unlock(&input_pool.lock);
 
 	fast_pool->count = 0;
-- 
GitLab


From 246c03dd899164d0186b6d685d6387f228c28d93 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:19:49 +0100
Subject: [PATCH 0549/1586] random: introduce drain_entropy() helper to
 declutter crng_reseed()

In preparation for separating responsibilities, break out the entropy
count management part of crng_reseed() into its own function.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index e7571de3d91a1..d9a31ac10daf5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -260,6 +260,7 @@ static struct {
 };
 
 static void extract_entropy(void *buf, size_t nbytes);
+static bool drain_entropy(void *buf, size_t nbytes);
 
 static void crng_reseed(void);
 
@@ -456,23 +457,13 @@ static void crng_slow_load(const void *cp, size_t len)
 static void crng_reseed(void)
 {
 	unsigned long flags;
-	int entropy_count;
 	unsigned long next_gen;
 	u8 key[CHACHA_KEY_SIZE];
 	bool finalize_init = false;
 
-	/*
-	 * First we make sure we have POOL_MIN_BITS of entropy in the pool,
-	 * and then we drain all of it. Only then can we extract a new key.
-	 */
-	do {
-		entropy_count = READ_ONCE(input_pool.entropy_count);
-		if (entropy_count < POOL_MIN_BITS)
-			return;
-	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
-	extract_entropy(key, sizeof(key));
-	wake_up_interruptible(&random_write_wait);
-	kill_fasync(&fasync, SIGIO, POLL_OUT);
+	/* Only reseed if we can, to prevent brute forcing a small amount of new bits. */
+	if (!drain_entropy(key, sizeof(key)))
+		return;
 
 	/*
 	 * We copy the new key into the base_crng, overwriting the old one,
@@ -900,6 +891,25 @@ static void extract_entropy(void *buf, size_t nbytes)
 	memzero_explicit(&block, sizeof(block));
 }
 
+/*
+ * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we
+ * set the entropy count to zero (but don't actually touch any data). Only then
+ * can we extract a new key with extract_entropy().
+ */
+static bool drain_entropy(void *buf, size_t nbytes)
+{
+	unsigned int entropy_count;
+	do {
+		entropy_count = READ_ONCE(input_pool.entropy_count);
+		if (entropy_count < POOL_MIN_BITS)
+			return false;
+	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+	extract_entropy(buf, nbytes);
+	wake_up_interruptible(&random_write_wait);
+	kill_fasync(&fasync, SIGIO, POLL_OUT);
+	return true;
+}
+
 #define warn_unseeded_randomness(previous) \
 	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
 
-- 
GitLab


From 6071a6c0fba2d747742cadcbb3ba26ed756ed73b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:28:33 +0100
Subject: [PATCH 0550/1586] random: remove useless header comment

This really adds nothing at all useful.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/random.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/random.h b/include/linux/random.h
index e92efb39779cc..37e1e8c43d7ee 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -1,9 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/linux/random.h
- *
- * Include file for the random number generator.
- */
+
 #ifndef _LINUX_RANDOM_H
 #define _LINUX_RANDOM_H
 
-- 
GitLab


From 87e7d5abad0cbc9312dea7f889a57d294c1a5fcc Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 13:41:41 +0100
Subject: [PATCH 0551/1586] random: remove whitespace and reorder includes

This is purely cosmetic. Future work involves figuring out which of
these headers we need and which we don't.

Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d9a31ac10daf5..ad112f9281820 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -193,11 +193,10 @@
 #include <linux/syscalls.h>
 #include <linux/completion.h>
 #include <linux/uuid.h>
+#include <linux/uaccess.h>
 #include <crypto/chacha.h>
 #include <crypto/blake2s.h>
-
 #include <asm/processor.h>
-#include <linux/uaccess.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/io.h>
-- 
GitLab


From 5f1bb112006b104b3e2a1e1b39bbb9b2617581e6 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0552/1586] random: group initialization wait functions

This pulls all of the readiness waiting-focused functions into the first
labeled section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 333 ++++++++++++++++++++++--------------------
 1 file changed, 172 insertions(+), 161 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index ad112f9281820..dd2da0b123502 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -201,44 +201,197 @@
 #include <asm/irq_regs.h>
 #include <asm/io.h>
 
-enum {
-	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
-	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
-};
-
-/*
- * Static global variables
- */
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static struct fasync_struct *fasync;
-
-static DEFINE_SPINLOCK(random_ready_list_lock);
-static LIST_HEAD(random_ready_list);
+/*********************************************************************
+ *
+ * Initialization and readiness waiting.
+ *
+ * Much of the RNG infrastructure is devoted to various dependencies
+ * being able to wait until the RNG has collected enough entropy and
+ * is ready for safe consumption.
+ *
+ *********************************************************************/
 
 /*
  * crng_init =  0 --> Uninitialized
  *		1 --> Initialized
  *		2 --> Initialized from input_pool
  *
- * crng_init is protected by primary_crng->lock, and only increases
+ * crng_init is protected by base_crng->lock, and only increases
  * its value (from 0->1->2).
  */
 static int crng_init = 0;
 #define crng_ready() (likely(crng_init > 1))
-static int crng_init_cnt = 0;
-static void process_random_ready_list(void);
-static void _get_random_bytes(void *buf, size_t nbytes);
+/* Various types of waiters for crng_init->2 transition. */
+static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+static struct fasync_struct *fasync;
+static DEFINE_SPINLOCK(random_ready_list_lock);
+static LIST_HEAD(random_ready_list);
 
+/* Control how we warn userspace. */
 static struct ratelimit_state unseeded_warning =
 	RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
 static struct ratelimit_state urandom_warning =
 	RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
-
 static int ratelimit_disable __read_mostly;
-
 module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
 MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 
+/*
+ * Returns whether or not the input pool has been seeded and thus guaranteed
+ * to supply cryptographically secure random numbers. This applies to: the
+ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+ * ,u64,int,long} family of functions.
+ *
+ * Returns: true if the input pool has been seeded.
+ *          false if the input pool has not been seeded.
+ */
+bool rng_is_initialized(void)
+{
+	return crng_ready();
+}
+EXPORT_SYMBOL(rng_is_initialized);
+
+/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
+static void try_to_generate_entropy(void);
+
+/*
+ * Wait for the input pool to be seeded and thus guaranteed to supply
+ * cryptographically secure random numbers. This applies to: the /dev/urandom
+ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+ * family of functions. Using any of these functions without first calling
+ * this function forfeits the guarantee of security.
+ *
+ * Returns: 0 if the input pool has been seeded.
+ *          -ERESTARTSYS if the function was interrupted by a signal.
+ */
+int wait_for_random_bytes(void)
+{
+	if (likely(crng_ready()))
+		return 0;
+
+	do {
+		int ret;
+		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
+		if (ret)
+			return ret > 0 ? 0 : ret;
+
+		try_to_generate_entropy();
+	} while (!crng_ready());
+
+	return 0;
+}
+EXPORT_SYMBOL(wait_for_random_bytes);
+
+/*
+ * Add a callback function that will be invoked when the input
+ * pool is initialised.
+ *
+ * returns: 0 if callback is successfully added
+ *	    -EALREADY if pool is already initialised (callback not called)
+ *	    -ENOENT if module for callback is not alive
+ */
+int add_random_ready_callback(struct random_ready_callback *rdy)
+{
+	struct module *owner;
+	unsigned long flags;
+	int err = -EALREADY;
+
+	if (crng_ready())
+		return err;
+
+	owner = rdy->owner;
+	if (!try_module_get(owner))
+		return -ENOENT;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	if (crng_ready())
+		goto out;
+
+	owner = NULL;
+
+	list_add(&rdy->list, &random_ready_list);
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+
+	module_put(owner);
+
+	return err;
+}
+EXPORT_SYMBOL(add_random_ready_callback);
+
+/*
+ * Delete a previously registered readiness callback function.
+ */
+void del_random_ready_callback(struct random_ready_callback *rdy)
+{
+	unsigned long flags;
+	struct module *owner = NULL;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	if (!list_empty(&rdy->list)) {
+		list_del_init(&rdy->list);
+		owner = rdy->owner;
+	}
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+
+	module_put(owner);
+}
+EXPORT_SYMBOL(del_random_ready_callback);
+
+static void process_random_ready_list(void)
+{
+	unsigned long flags;
+	struct random_ready_callback *rdy, *tmp;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
+		struct module *owner = rdy->owner;
+
+		list_del_init(&rdy->list);
+		rdy->func(rdy);
+		module_put(owner);
+	}
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+}
+
+#define warn_unseeded_randomness(previous) \
+	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
+
+static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
+{
+#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	const bool print_once = false;
+#else
+	static bool print_once __read_mostly;
+#endif
+
+	if (print_once || crng_ready() ||
+	    (previous && (caller == READ_ONCE(*previous))))
+		return;
+	WRITE_ONCE(*previous, caller);
+#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
+	print_once = true;
+#endif
+	if (__ratelimit(&unseeded_warning))
+		printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
+				func_name, caller, crng_init);
+}
+
+
+enum {
+	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
+	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+};
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+
+static int crng_init_cnt = 0;
+
 /**********************************************************************
  *
  * OS independent entropy store.   Here are the functions which handle
@@ -322,22 +475,6 @@ static void fast_mix(u32 pool[4])
 	pool[2] = c;  pool[3] = d;
 }
 
-static void process_random_ready_list(void)
-{
-	unsigned long flags;
-	struct random_ready_callback *rdy, *tmp;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
-		struct module *owner = rdy->owner;
-
-		list_del_init(&rdy->list);
-		rdy->func(rdy);
-		module_put(owner);
-	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-}
-
 static void credit_entropy_bits(size_t nbits)
 {
 	unsigned int entropy_count, orig, add;
@@ -387,8 +524,6 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
 	.lock = INIT_LOCAL_LOCK(crngs.lock),
 };
 
-static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
-
 /*
  * crng_fast_load() can be called by code in the interrupt service
  * path.  So we can't afford to dilly-dally. Returns the number of
@@ -909,29 +1044,6 @@ static bool drain_entropy(void *buf, size_t nbytes)
 	return true;
 }
 
-#define warn_unseeded_randomness(previous) \
-	_warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous))
-
-static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous)
-{
-#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-	const bool print_once = false;
-#else
-	static bool print_once __read_mostly;
-#endif
-
-	if (print_once || crng_ready() ||
-	    (previous && (caller == READ_ONCE(*previous))))
-		return;
-	WRITE_ONCE(*previous, caller);
-#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
-	print_once = true;
-#endif
-	if (__ratelimit(&unseeded_warning))
-		printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n",
-				func_name, caller, crng_init);
-}
-
 /*
  * This function is the exported kernel interface.  It returns some
  * number of good random numbers, suitable for key generation, seeding
@@ -1032,107 +1144,6 @@ static void try_to_generate_entropy(void)
 	mix_pool_bytes(&stack.now, sizeof(stack.now));
 }
 
-/*
- * Wait for the urandom pool to be seeded and thus guaranteed to supply
- * cryptographically secure random numbers. This applies to: the /dev/urandom
- * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
- * family of functions. Using any of these functions without first calling
- * this function forfeits the guarantee of security.
- *
- * Returns: 0 if the urandom pool has been seeded.
- *          -ERESTARTSYS if the function was interrupted by a signal.
- */
-int wait_for_random_bytes(void)
-{
-	if (likely(crng_ready()))
-		return 0;
-
-	do {
-		int ret;
-		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
-		if (ret)
-			return ret > 0 ? 0 : ret;
-
-		try_to_generate_entropy();
-	} while (!crng_ready());
-
-	return 0;
-}
-EXPORT_SYMBOL(wait_for_random_bytes);
-
-/*
- * Returns whether or not the urandom pool has been seeded and thus guaranteed
- * to supply cryptographically secure random numbers. This applies to: the
- * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
- * ,u64,int,long} family of functions.
- *
- * Returns: true if the urandom pool has been seeded.
- *          false if the urandom pool has not been seeded.
- */
-bool rng_is_initialized(void)
-{
-	return crng_ready();
-}
-EXPORT_SYMBOL(rng_is_initialized);
-
-/*
- * Add a callback function that will be invoked when the nonblocking
- * pool is initialised.
- *
- * returns: 0 if callback is successfully added
- *	    -EALREADY if pool is already initialised (callback not called)
- *	    -ENOENT if module for callback is not alive
- */
-int add_random_ready_callback(struct random_ready_callback *rdy)
-{
-	struct module *owner;
-	unsigned long flags;
-	int err = -EALREADY;
-
-	if (crng_ready())
-		return err;
-
-	owner = rdy->owner;
-	if (!try_module_get(owner))
-		return -ENOENT;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (crng_ready())
-		goto out;
-
-	owner = NULL;
-
-	list_add(&rdy->list, &random_ready_list);
-	err = 0;
-
-out:
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
-	module_put(owner);
-
-	return err;
-}
-EXPORT_SYMBOL(add_random_ready_callback);
-
-/*
- * Delete a previously registered readiness callback function.
- */
-void del_random_ready_callback(struct random_ready_callback *rdy)
-{
-	unsigned long flags;
-	struct module *owner = NULL;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (!list_empty(&rdy->list)) {
-		list_del_init(&rdy->list);
-		owner = rdy->owner;
-	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
-	module_put(owner);
-}
-EXPORT_SYMBOL(del_random_ready_callback);
-
 /*
  * This function will use the architecture-specific hardware random
  * number generator if it is available. It is not recommended for
-- 
GitLab


From 3655adc7089da4f8ca74cec8fcef73ea5101430e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0553/1586] random: group crng functions

This pulls all of the crng-focused functions into the second labeled
section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 862 ++++++++++++++++++++++--------------------
 1 file changed, 445 insertions(+), 417 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index dd2da0b123502..99b55e76b0043 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -380,122 +380,27 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
 }
 
 
-enum {
-	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
-	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
-};
-
-/*
- * Static global variables
- */
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-
-static int crng_init_cnt = 0;
-
-/**********************************************************************
+/*********************************************************************
  *
- * OS independent entropy store.   Here are the functions which handle
- * storing entropy in an entropy pool.
+ * Fast key erasure RNG, the "crng".
  *
- **********************************************************************/
-
-static struct {
-	struct blake2s_state hash;
-	spinlock_t lock;
-	unsigned int entropy_count;
-} input_pool = {
-	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
-		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
-		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
-	.hash.outlen = BLAKE2S_HASH_SIZE,
-	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
-};
-
-static void extract_entropy(void *buf, size_t nbytes);
-static bool drain_entropy(void *buf, size_t nbytes);
-
-static void crng_reseed(void);
-
-/*
- * This function adds bytes into the entropy "pool".  It does not
- * update the entropy estimate.  The caller should call
- * credit_entropy_bits if this is appropriate.
- */
-static void _mix_pool_bytes(const void *in, size_t nbytes)
-{
-	blake2s_update(&input_pool.hash, in, nbytes);
-}
-
-static void mix_pool_bytes(const void *in, size_t nbytes)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&input_pool.lock, flags);
-	_mix_pool_bytes(in, nbytes);
-	spin_unlock_irqrestore(&input_pool.lock, flags);
-}
-
-struct fast_pool {
-	union {
-		u32 pool32[4];
-		u64 pool64[2];
-	};
-	unsigned long last;
-	u16 reg_idx;
-	u8 count;
-};
-
-/*
- * This is a fast mixing routine used by the interrupt randomness
- * collector.  It's hardcoded for an 128 bit pool and assumes that any
- * locks that might be needed are taken by the caller.
- */
-static void fast_mix(u32 pool[4])
-{
-	u32 a = pool[0],	b = pool[1];
-	u32 c = pool[2],	d = pool[3];
-
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
-
-	pool[0] = a;  pool[1] = b;
-	pool[2] = c;  pool[3] = d;
-}
-
-static void credit_entropy_bits(size_t nbits)
-{
-	unsigned int entropy_count, orig, add;
-
-	if (!nbits)
-		return;
-
-	add = min_t(size_t, nbits, POOL_BITS);
-
-	do {
-		orig = READ_ONCE(input_pool.entropy_count);
-		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
-	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
-
-	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
-		crng_reseed();
-}
-
-/*********************************************************************
+ * These functions expand entropy from the entropy extractor into
+ * long streams for external consumption using the "fast key erasure"
+ * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
  *
- * CRNG using CHACHA20
+ * There are a few exported interfaces for use by other drivers:
+ *
+ *	void get_random_bytes(void *buf, size_t nbytes)
+ *	u32 get_random_u32()
+ *	u64 get_random_u64()
+ *	unsigned int get_random_int()
+ *	unsigned long get_random_long()
+ *
+ * These interfaces will return the requested number of random bytes
+ * into the given buffer or as a return value. This is equivalent to
+ * a read from /dev/urandom. The integer family of functions may be
+ * higher performance for one-off random integers, because they do a
+ * bit of buffering.
  *
  *********************************************************************/
 
@@ -524,70 +429,14 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
 	.lock = INIT_LOCAL_LOCK(crngs.lock),
 };
 
-/*
- * crng_fast_load() can be called by code in the interrupt service
- * path.  So we can't afford to dilly-dally. Returns the number of
- * bytes processed from cp.
- */
-static size_t crng_fast_load(const void *cp, size_t len)
-{
-	unsigned long flags;
-	const u8 *src = (const u8 *)cp;
-	size_t ret = 0;
-
-	if (!spin_trylock_irqsave(&base_crng.lock, flags))
-		return 0;
-	if (crng_init != 0) {
-		spin_unlock_irqrestore(&base_crng.lock, flags);
-		return 0;
-	}
-	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src;
-		src++; crng_init_cnt++; len--; ret++;
-	}
-	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
-		++base_crng.generation;
-		crng_init = 1;
-	}
-	spin_unlock_irqrestore(&base_crng.lock, flags);
-	if (crng_init == 1)
-		pr_notice("fast init done\n");
-	return ret;
-}
+/* Used by crng_reseed() to extract a new seed from the input pool. */
+static bool drain_entropy(void *buf, size_t nbytes);
 
 /*
- * crng_slow_load() is called by add_device_randomness, which has two
- * attributes.  (1) We can't trust the buffer passed to it is
- * guaranteed to be unpredictable (so it might not have any entropy at
- * all), and (2) it doesn't have the performance constraints of
- * crng_fast_load().
- *
- * So, we simply hash the contents in with the current key. Finally,
- * we do *not* advance crng_init_cnt since buffer we may get may be
- * something like a fixed DMI table (for example), which might very
- * well be unique to the machine, but is otherwise unvarying.
+ * This extracts a new crng key from the input pool, but only if there is a
+ * sufficient amount of entropy available, in order to mitigate bruteforcing
+ * of newly added bits.
  */
-static void crng_slow_load(const void *cp, size_t len)
-{
-	unsigned long flags;
-	struct blake2s_state hash;
-
-	blake2s_init(&hash, sizeof(base_crng.key));
-
-	if (!spin_trylock_irqsave(&base_crng.lock, flags))
-		return;
-	if (crng_init != 0) {
-		spin_unlock_irqrestore(&base_crng.lock, flags);
-		return;
-	}
-
-	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
-	blake2s_update(&hash, cp, len);
-	blake2s_final(&hash, base_crng.key);
-
-	spin_unlock_irqrestore(&base_crng.lock, flags);
-}
-
 static void crng_reseed(void)
 {
 	unsigned long flags;
@@ -637,13 +486,11 @@ static void crng_reseed(void)
 }
 
 /*
- * The general form here is based on a "fast key erasure RNG" from
- * <https://blog.cr.yp.to/20170723-random.html>. It generates a ChaCha
- * block using the provided key, and then immediately overwites that
- * key with half the block. It returns the resultant ChaCha state to the
- * user, along with the second half of the block containing 32 bytes of
- * random data that may be used; random_data_len may not be greater than
- * 32.
+ * This generates a ChaCha block using the provided key, and then
+ * immediately overwites that key with half the block. It returns
+ * the resultant ChaCha state to the user, along with the second
+ * half of the block containing 32 bytes of random data that may
+ * be used; random_data_len may not be greater than 32.
  */
 static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
 				  u32 chacha_state[CHACHA_STATE_WORDS],
@@ -730,51 +577,433 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	local_unlock_irqrestore(&crngs.lock, flags);
 }
 
-static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
+/*
+ * This function is for crng_init == 0 only.
+ *
+ * crng_fast_load() can be called by code in the interrupt service
+ * path.  So we can't afford to dilly-dally. Returns the number of
+ * bytes processed from cp.
+ */
+static size_t crng_fast_load(const void *cp, size_t len)
+{
+	static int crng_init_cnt = 0;
+	unsigned long flags;
+	const u8 *src = (const u8 *)cp;
+	size_t ret = 0;
+
+	if (!spin_trylock_irqsave(&base_crng.lock, flags))
+		return 0;
+	if (crng_init != 0) {
+		spin_unlock_irqrestore(&base_crng.lock, flags);
+		return 0;
+	}
+	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
+		base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src;
+		src++; crng_init_cnt++; len--; ret++;
+	}
+	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+		++base_crng.generation;
+		crng_init = 1;
+	}
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+	if (crng_init == 1)
+		pr_notice("fast init done\n");
+	return ret;
+}
+
+/*
+ * This function is for crng_init == 0 only.
+ *
+ * crng_slow_load() is called by add_device_randomness, which has two
+ * attributes.  (1) We can't trust the buffer passed to it is
+ * guaranteed to be unpredictable (so it might not have any entropy at
+ * all), and (2) it doesn't have the performance constraints of
+ * crng_fast_load().
+ *
+ * So, we simply hash the contents in with the current key. Finally,
+ * we do *not* advance crng_init_cnt since buffer we may get may be
+ * something like a fixed DMI table (for example), which might very
+ * well be unique to the machine, but is otherwise unvarying.
+ */
+static void crng_slow_load(const void *cp, size_t len)
+{
+	unsigned long flags;
+	struct blake2s_state hash;
+
+	blake2s_init(&hash, sizeof(base_crng.key));
+
+	if (!spin_trylock_irqsave(&base_crng.lock, flags))
+		return;
+	if (crng_init != 0) {
+		spin_unlock_irqrestore(&base_crng.lock, flags);
+		return;
+	}
+
+	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+	blake2s_update(&hash, cp, len);
+	blake2s_final(&hash, base_crng.key);
+
+	spin_unlock_irqrestore(&base_crng.lock, flags);
+}
+
+static void _get_random_bytes(void *buf, size_t nbytes)
 {
-	bool large_request = nbytes > 256;
-	ssize_t ret = 0;
-	size_t len;
 	u32 chacha_state[CHACHA_STATE_WORDS];
-	u8 output[CHACHA_BLOCK_SIZE];
+	u8 tmp[CHACHA_BLOCK_SIZE];
+	size_t len;
+
+	if (!nbytes)
+		return;
+
+	len = min_t(size_t, 32, nbytes);
+	crng_make_state(chacha_state, buf, len);
+	nbytes -= len;
+	buf += len;
+
+	while (nbytes) {
+		if (nbytes < CHACHA_BLOCK_SIZE) {
+			chacha20_block(chacha_state, tmp);
+			memcpy(buf, tmp, nbytes);
+			memzero_explicit(tmp, sizeof(tmp));
+			break;
+		}
+
+		chacha20_block(chacha_state, buf);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
+		nbytes -= CHACHA_BLOCK_SIZE;
+		buf += CHACHA_BLOCK_SIZE;
+	}
+
+	memzero_explicit(chacha_state, sizeof(chacha_state));
+}
+
+/*
+ * This function is the exported kernel interface.  It returns some
+ * number of good random numbers, suitable for key generation, seeding
+ * TCP sequence numbers, etc.  It does not rely on the hardware random
+ * number generator.  For random bytes direct from the hardware RNG
+ * (when available), use get_random_bytes_arch(). In order to ensure
+ * that the randomness provided by this function is okay, the function
+ * wait_for_random_bytes() should be called and return 0 at least once
+ * at any point prior.
+ */
+void get_random_bytes(void *buf, size_t nbytes)
+{
+	static void *previous;
+
+	warn_unseeded_randomness(&previous);
+	_get_random_bytes(buf, nbytes);
+}
+EXPORT_SYMBOL(get_random_bytes);
+
+static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes)
+{
+	bool large_request = nbytes > 256;
+	ssize_t ret = 0;
+	size_t len;
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 output[CHACHA_BLOCK_SIZE];
+
+	if (!nbytes)
+		return 0;
+
+	len = min_t(size_t, 32, nbytes);
+	crng_make_state(chacha_state, output, len);
+
+	if (copy_to_user(buf, output, len))
+		return -EFAULT;
+	nbytes -= len;
+	buf += len;
+	ret += len;
+
+	while (nbytes) {
+		if (large_request && need_resched()) {
+			if (signal_pending(current))
+				break;
+			schedule();
+		}
+
+		chacha20_block(chacha_state, output);
+		if (unlikely(chacha_state[12] == 0))
+			++chacha_state[13];
+
+		len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
+		if (copy_to_user(buf, output, len)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		nbytes -= len;
+		buf += len;
+		ret += len;
+	}
+
+	memzero_explicit(chacha_state, sizeof(chacha_state));
+	memzero_explicit(output, sizeof(output));
+	return ret;
+}
+
+/*
+ * Batched entropy returns random integers. The quality of the random
+ * number is good as /dev/urandom. In order to ensure that the randomness
+ * provided by this function is okay, the function wait_for_random_bytes()
+ * should be called and return 0 at least once at any point prior.
+ */
+struct batched_entropy {
+	union {
+		/*
+		 * We make this 1.5x a ChaCha block, so that we get the
+		 * remaining 32 bytes from fast key erasure, plus one full
+		 * block from the detached ChaCha state. We can increase
+		 * the size of this later if needed so long as we keep the
+		 * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
+		 */
+		u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
+		u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
+	};
+	local_lock_t lock;
+	unsigned long generation;
+	unsigned int position;
+};
+
+
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
+	.position = UINT_MAX
+};
+
+u64 get_random_u64(void)
+{
+	u64 ret;
+	unsigned long flags;
+	struct batched_entropy *batch;
+	static void *previous;
+	unsigned long next_gen;
+
+	warn_unseeded_randomness(&previous);
+
+	local_lock_irqsave(&batched_entropy_u64.lock, flags);
+	batch = raw_cpu_ptr(&batched_entropy_u64);
+
+	next_gen = READ_ONCE(base_crng.generation);
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
+	    next_gen != batch->generation) {
+		_get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
+		batch->position = 0;
+		batch->generation = next_gen;
+	}
+
+	ret = batch->entropy_u64[batch->position];
+	batch->entropy_u64[batch->position] = 0;
+	++batch->position;
+	local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(get_random_u64);
+
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
+	.position = UINT_MAX
+};
+
+u32 get_random_u32(void)
+{
+	u32 ret;
+	unsigned long flags;
+	struct batched_entropy *batch;
+	static void *previous;
+	unsigned long next_gen;
+
+	warn_unseeded_randomness(&previous);
+
+	local_lock_irqsave(&batched_entropy_u32.lock, flags);
+	batch = raw_cpu_ptr(&batched_entropy_u32);
+
+	next_gen = READ_ONCE(base_crng.generation);
+	if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
+	    next_gen != batch->generation) {
+		_get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
+		batch->position = 0;
+		batch->generation = next_gen;
+	}
+
+	ret = batch->entropy_u32[batch->position];
+	batch->entropy_u32[batch->position] = 0;
+	++batch->position;
+	local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(get_random_u32);
+
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start:	The smallest acceptable address the caller will take.
+ * @range:	The size of the area, starting at @start, within which the
+ *		random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
+ *
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned.  We now align it regardless.
+ *
+ * Return: A page aligned address within [start, start + range).  On error,
+ * @start is returned.
+ */
+unsigned long randomize_page(unsigned long start, unsigned long range)
+{
+	if (!PAGE_ALIGNED(start)) {
+		range -= PAGE_ALIGN(start) - start;
+		start = PAGE_ALIGN(start);
+	}
+
+	if (start > ULONG_MAX - range)
+		range = ULONG_MAX - start;
+
+	range >>= PAGE_SHIFT;
+
+	if (range == 0)
+		return start;
+
+	return start + (get_random_long() % range << PAGE_SHIFT);
+}
+
+/*
+ * This function will use the architecture-specific hardware random
+ * number generator if it is available. It is not recommended for
+ * use. Use get_random_bytes() instead. It returns the number of
+ * bytes filled in.
+ */
+size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
+{
+	size_t left = nbytes;
+	u8 *p = buf;
+
+	while (left) {
+		unsigned long v;
+		size_t chunk = min_t(size_t, left, sizeof(unsigned long));
+
+		if (!arch_get_random_long(&v))
+			break;
+
+		memcpy(p, &v, chunk);
+		p += chunk;
+		left -= chunk;
+	}
+
+	return nbytes - left;
+}
+EXPORT_SYMBOL(get_random_bytes_arch);
+
+enum {
+	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
+	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+};
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+
+/**********************************************************************
+ *
+ * OS independent entropy store.   Here are the functions which handle
+ * storing entropy in an entropy pool.
+ *
+ **********************************************************************/
+
+static struct {
+	struct blake2s_state hash;
+	spinlock_t lock;
+	unsigned int entropy_count;
+} input_pool = {
+	.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
+		    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
+		    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
+	.hash.outlen = BLAKE2S_HASH_SIZE,
+	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+};
+
+static void extract_entropy(void *buf, size_t nbytes);
+static bool drain_entropy(void *buf, size_t nbytes);
+
+static void crng_reseed(void);
+
+/*
+ * This function adds bytes into the entropy "pool".  It does not
+ * update the entropy estimate.  The caller should call
+ * credit_entropy_bits if this is appropriate.
+ */
+static void _mix_pool_bytes(const void *in, size_t nbytes)
+{
+	blake2s_update(&input_pool.hash, in, nbytes);
+}
+
+static void mix_pool_bytes(const void *in, size_t nbytes)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(in, nbytes);
+	spin_unlock_irqrestore(&input_pool.lock, flags);
+}
+
+struct fast_pool {
+	union {
+		u32 pool32[4];
+		u64 pool64[2];
+	};
+	unsigned long last;
+	u16 reg_idx;
+	u8 count;
+};
+
+/*
+ * This is a fast mixing routine used by the interrupt randomness
+ * collector.  It's hardcoded for an 128 bit pool and assumes that any
+ * locks that might be needed are taken by the caller.
+ */
+static void fast_mix(u32 pool[4])
+{
+	u32 a = pool[0],	b = pool[1];
+	u32 c = pool[2],	d = pool[3];
+
+	a += b;			c += d;
+	b = rol32(b, 6);	d = rol32(d, 27);
+	d ^= a;			b ^= c;
 
-	if (!nbytes)
-		return 0;
+	a += b;			c += d;
+	b = rol32(b, 16);	d = rol32(d, 14);
+	d ^= a;			b ^= c;
 
-	len = min_t(size_t, 32, nbytes);
-	crng_make_state(chacha_state, output, len);
+	a += b;			c += d;
+	b = rol32(b, 6);	d = rol32(d, 27);
+	d ^= a;			b ^= c;
 
-	if (copy_to_user(buf, output, len))
-		return -EFAULT;
-	nbytes -= len;
-	buf += len;
-	ret += len;
+	a += b;			c += d;
+	b = rol32(b, 16);	d = rol32(d, 14);
+	d ^= a;			b ^= c;
 
-	while (nbytes) {
-		if (large_request && need_resched()) {
-			if (signal_pending(current))
-				break;
-			schedule();
-		}
+	pool[0] = a;  pool[1] = b;
+	pool[2] = c;  pool[3] = d;
+}
 
-		chacha20_block(chacha_state, output);
-		if (unlikely(chacha_state[12] == 0))
-			++chacha_state[13];
+static void credit_entropy_bits(size_t nbits)
+{
+	unsigned int entropy_count, orig, add;
 
-		len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE);
-		if (copy_to_user(buf, output, len)) {
-			ret = -EFAULT;
-			break;
-		}
+	if (!nbits)
+		return;
 
-		nbytes -= len;
-		buf += len;
-		ret += len;
-	}
+	add = min_t(size_t, nbits, POOL_BITS);
 
-	memzero_explicit(chacha_state, sizeof(chacha_state));
-	memzero_explicit(output, sizeof(output));
-	return ret;
+	do {
+		orig = READ_ONCE(input_pool.entropy_count);
+		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
+	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
+
+	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
+		crng_reseed();
 }
 
 /*********************************************************************
@@ -1044,57 +1273,6 @@ static bool drain_entropy(void *buf, size_t nbytes)
 	return true;
 }
 
-/*
- * This function is the exported kernel interface.  It returns some
- * number of good random numbers, suitable for key generation, seeding
- * TCP sequence numbers, etc.  It does not rely on the hardware random
- * number generator.  For random bytes direct from the hardware RNG
- * (when available), use get_random_bytes_arch(). In order to ensure
- * that the randomness provided by this function is okay, the function
- * wait_for_random_bytes() should be called and return 0 at least once
- * at any point prior.
- */
-static void _get_random_bytes(void *buf, size_t nbytes)
-{
-	u32 chacha_state[CHACHA_STATE_WORDS];
-	u8 tmp[CHACHA_BLOCK_SIZE];
-	size_t len;
-
-	if (!nbytes)
-		return;
-
-	len = min_t(size_t, 32, nbytes);
-	crng_make_state(chacha_state, buf, len);
-	nbytes -= len;
-	buf += len;
-
-	while (nbytes) {
-		if (nbytes < CHACHA_BLOCK_SIZE) {
-			chacha20_block(chacha_state, tmp);
-			memcpy(buf, tmp, nbytes);
-			memzero_explicit(tmp, sizeof(tmp));
-			break;
-		}
-
-		chacha20_block(chacha_state, buf);
-		if (unlikely(chacha_state[12] == 0))
-			++chacha_state[13];
-		nbytes -= CHACHA_BLOCK_SIZE;
-		buf += CHACHA_BLOCK_SIZE;
-	}
-
-	memzero_explicit(chacha_state, sizeof(chacha_state));
-}
-
-void get_random_bytes(void *buf, size_t nbytes)
-{
-	static void *previous;
-
-	warn_unseeded_randomness(&previous);
-	_get_random_bytes(buf, nbytes);
-}
-EXPORT_SYMBOL(get_random_bytes);
-
 /*
  * Each time the timer fires, we expect that we got an unpredictable
  * jump in the cycle counter. Even if the timer is running on another
@@ -1144,33 +1322,6 @@ static void try_to_generate_entropy(void)
 	mix_pool_bytes(&stack.now, sizeof(stack.now));
 }
 
-/*
- * This function will use the architecture-specific hardware random
- * number generator if it is available. It is not recommended for
- * use. Use get_random_bytes() instead. It returns the number of
- * bytes filled in.
- */
-size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
-{
-	size_t left = nbytes;
-	u8 *p = buf;
-
-	while (left) {
-		unsigned long v;
-		size_t chunk = min_t(size_t, left, sizeof(unsigned long));
-
-		if (!arch_get_random_long(&v))
-			break;
-
-		memcpy(p, &v, chunk);
-		p += chunk;
-		left -= chunk;
-	}
-
-	return nbytes - left;
-}
-EXPORT_SYMBOL(get_random_bytes_arch);
-
 static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
 static int __init parse_trust_cpu(char *arg)
 {
@@ -1533,129 +1684,6 @@ static int __init random_sysctls_init(void)
 device_initcall(random_sysctls_init);
 #endif	/* CONFIG_SYSCTL */
 
-struct batched_entropy {
-	union {
-		/*
-		 * We make this 1.5x a ChaCha block, so that we get the
-		 * remaining 32 bytes from fast key erasure, plus one full
-		 * block from the detached ChaCha state. We can increase
-		 * the size of this later if needed so long as we keep the
-		 * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.
-		 */
-		u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))];
-		u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))];
-	};
-	local_lock_t lock;
-	unsigned long generation;
-	unsigned int position;
-};
-
-/*
- * Get a random word for internal kernel use only. The quality of the random
- * number is good as /dev/urandom. In order to ensure that the randomness
- * provided by this function is okay, the function wait_for_random_bytes()
- * should be called and return 0 at least once at any point prior.
- */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock),
-	.position = UINT_MAX
-};
-
-u64 get_random_u64(void)
-{
-	u64 ret;
-	unsigned long flags;
-	struct batched_entropy *batch;
-	static void *previous;
-	unsigned long next_gen;
-
-	warn_unseeded_randomness(&previous);
-
-	local_lock_irqsave(&batched_entropy_u64.lock, flags);
-	batch = raw_cpu_ptr(&batched_entropy_u64);
-
-	next_gen = READ_ONCE(base_crng.generation);
-	if (batch->position >= ARRAY_SIZE(batch->entropy_u64) ||
-	    next_gen != batch->generation) {
-		_get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64));
-		batch->position = 0;
-		batch->generation = next_gen;
-	}
-
-	ret = batch->entropy_u64[batch->position];
-	batch->entropy_u64[batch->position] = 0;
-	++batch->position;
-	local_unlock_irqrestore(&batched_entropy_u64.lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(get_random_u64);
-
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock),
-	.position = UINT_MAX
-};
-
-u32 get_random_u32(void)
-{
-	u32 ret;
-	unsigned long flags;
-	struct batched_entropy *batch;
-	static void *previous;
-	unsigned long next_gen;
-
-	warn_unseeded_randomness(&previous);
-
-	local_lock_irqsave(&batched_entropy_u32.lock, flags);
-	batch = raw_cpu_ptr(&batched_entropy_u32);
-
-	next_gen = READ_ONCE(base_crng.generation);
-	if (batch->position >= ARRAY_SIZE(batch->entropy_u32) ||
-	    next_gen != batch->generation) {
-		_get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32));
-		batch->position = 0;
-		batch->generation = next_gen;
-	}
-
-	ret = batch->entropy_u32[batch->position];
-	batch->entropy_u32[batch->position] = 0;
-	++batch->position;
-	local_unlock_irqrestore(&batched_entropy_u32.lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(get_random_u32);
-
-/**
- * randomize_page - Generate a random, page aligned address
- * @start:	The smallest acceptable address the caller will take.
- * @range:	The size of the area, starting at @start, within which the
- *		random address must fall.
- *
- * If @start + @range would overflow, @range is capped.
- *
- * NOTE: Historical use of randomize_range, which this replaces, presumed that
- * @start was already page aligned.  We now align it regardless.
- *
- * Return: A page aligned address within [start, start + range).  On error,
- * @start is returned.
- */
-unsigned long randomize_page(unsigned long start, unsigned long range)
-{
-	if (!PAGE_ALIGNED(start)) {
-		range -= PAGE_ALIGN(start) - start;
-		start = PAGE_ALIGN(start);
-	}
-
-	if (start > ULONG_MAX - range)
-		range = ULONG_MAX - start;
-
-	range >>= PAGE_SHIFT;
-
-	if (range == 0)
-		return start;
-
-	return start + (get_random_long() % range << PAGE_SHIFT);
-}
-
 /* Interface for in-kernel drivers of true hardware RNGs.
  * Those devices may produce endless random bits and will be throttled
  * when our pool is full.
-- 
GitLab


From a5ed7cb1a7732ef11959332d507889fbc39ebbb4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0554/1586] random: group entropy extraction functions

This pulls all of the entropy extraction-focused functions into the
third labeled section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 216 +++++++++++++++++++++---------------------
 1 file changed, 109 insertions(+), 107 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 99b55e76b0043..ce530a44b4d07 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -895,23 +895,36 @@ size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes_arch);
 
+
+/**********************************************************************
+ *
+ * Entropy accumulation and extraction routines.
+ *
+ * Callers may add entropy via:
+ *
+ *     static void mix_pool_bytes(const void *in, size_t nbytes)
+ *
+ * After which, if added entropy should be credited:
+ *
+ *     static void credit_entropy_bits(size_t nbits)
+ *
+ * Finally, extract entropy via these two, with the latter one
+ * setting the entropy count to zero and extracting only if there
+ * is POOL_MIN_BITS entropy credited prior:
+ *
+ *     static void extract_entropy(void *buf, size_t nbytes)
+ *     static bool drain_entropy(void *buf, size_t nbytes)
+ *
+ **********************************************************************/
+
 enum {
 	POOL_BITS = BLAKE2S_HASH_SIZE * 8,
 	POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
 };
 
-/*
- * Static global variables
- */
+/* For notifying userspace should write into /dev/random. */
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 
-/**********************************************************************
- *
- * OS independent entropy store.   Here are the functions which handle
- * storing entropy in an entropy pool.
- *
- **********************************************************************/
-
 static struct {
 	struct blake2s_state hash;
 	spinlock_t lock;
@@ -924,28 +937,106 @@ static struct {
 	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 };
 
-static void extract_entropy(void *buf, size_t nbytes);
-static bool drain_entropy(void *buf, size_t nbytes);
-
-static void crng_reseed(void);
+static void _mix_pool_bytes(const void *in, size_t nbytes)
+{
+	blake2s_update(&input_pool.hash, in, nbytes);
+}
 
 /*
  * This function adds bytes into the entropy "pool".  It does not
  * update the entropy estimate.  The caller should call
  * credit_entropy_bits if this is appropriate.
  */
-static void _mix_pool_bytes(const void *in, size_t nbytes)
+static void mix_pool_bytes(const void *in, size_t nbytes)
 {
-	blake2s_update(&input_pool.hash, in, nbytes);
+	unsigned long flags;
+
+	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(in, nbytes);
+	spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 
-static void mix_pool_bytes(const void *in, size_t nbytes)
+static void credit_entropy_bits(size_t nbits)
+{
+	unsigned int entropy_count, orig, add;
+
+	if (!nbits)
+		return;
+
+	add = min_t(size_t, nbits, POOL_BITS);
+
+	do {
+		orig = READ_ONCE(input_pool.entropy_count);
+		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
+	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
+
+	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
+		crng_reseed();
+}
+
+/*
+ * This is an HKDF-like construction for using the hashed collected entropy
+ * as a PRF key, that's then expanded block-by-block.
+ */
+static void extract_entropy(void *buf, size_t nbytes)
 {
 	unsigned long flags;
+	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
+	struct {
+		unsigned long rdseed[32 / sizeof(long)];
+		size_t counter;
+	} block;
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
+		if (!arch_get_random_seed_long(&block.rdseed[i]) &&
+		    !arch_get_random_long(&block.rdseed[i]))
+			block.rdseed[i] = random_get_entropy();
+	}
 
 	spin_lock_irqsave(&input_pool.lock, flags);
-	_mix_pool_bytes(in, nbytes);
+
+	/* seed = HASHPRF(last_key, entropy_input) */
+	blake2s_final(&input_pool.hash, seed);
+
+	/* next_key = HASHPRF(seed, RDSEED || 0) */
+	block.counter = 0;
+	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
+	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
+
 	spin_unlock_irqrestore(&input_pool.lock, flags);
+	memzero_explicit(next_key, sizeof(next_key));
+
+	while (nbytes) {
+		i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
+		/* output = HASHPRF(seed, RDSEED || ++counter) */
+		++block.counter;
+		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
+		nbytes -= i;
+		buf += i;
+	}
+
+	memzero_explicit(seed, sizeof(seed));
+	memzero_explicit(&block, sizeof(block));
+}
+
+/*
+ * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we
+ * set the entropy count to zero (but don't actually touch any data). Only then
+ * can we extract a new key with extract_entropy().
+ */
+static bool drain_entropy(void *buf, size_t nbytes)
+{
+	unsigned int entropy_count;
+	do {
+		entropy_count = READ_ONCE(input_pool.entropy_count);
+		if (entropy_count < POOL_MIN_BITS)
+			return false;
+	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
+	extract_entropy(buf, nbytes);
+	wake_up_interruptible(&random_write_wait);
+	kill_fasync(&fasync, SIGIO, POLL_OUT);
+	return true;
 }
 
 struct fast_pool {
@@ -988,24 +1079,6 @@ static void fast_mix(u32 pool[4])
 	pool[2] = c;  pool[3] = d;
 }
 
-static void credit_entropy_bits(size_t nbits)
-{
-	unsigned int entropy_count, orig, add;
-
-	if (!nbits)
-		return;
-
-	add = min_t(size_t, nbits, POOL_BITS);
-
-	do {
-		orig = READ_ONCE(input_pool.entropy_count);
-		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
-	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
-
-	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
-		crng_reseed();
-}
-
 /*********************************************************************
  *
  * Entropy input management
@@ -1202,77 +1275,6 @@ void add_disk_randomness(struct gendisk *disk)
 EXPORT_SYMBOL_GPL(add_disk_randomness);
 #endif
 
-/*********************************************************************
- *
- * Entropy extraction routines
- *
- *********************************************************************/
-
-/*
- * This is an HKDF-like construction for using the hashed collected entropy
- * as a PRF key, that's then expanded block-by-block.
- */
-static void extract_entropy(void *buf, size_t nbytes)
-{
-	unsigned long flags;
-	u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
-	struct {
-		unsigned long rdseed[32 / sizeof(long)];
-		size_t counter;
-	} block;
-	size_t i;
-
-	for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) {
-		if (!arch_get_random_seed_long(&block.rdseed[i]) &&
-		    !arch_get_random_long(&block.rdseed[i]))
-			block.rdseed[i] = random_get_entropy();
-	}
-
-	spin_lock_irqsave(&input_pool.lock, flags);
-
-	/* seed = HASHPRF(last_key, entropy_input) */
-	blake2s_final(&input_pool.hash, seed);
-
-	/* next_key = HASHPRF(seed, RDSEED || 0) */
-	block.counter = 0;
-	blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
-	blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
-
-	spin_unlock_irqrestore(&input_pool.lock, flags);
-	memzero_explicit(next_key, sizeof(next_key));
-
-	while (nbytes) {
-		i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE);
-		/* output = HASHPRF(seed, RDSEED || ++counter) */
-		++block.counter;
-		blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
-		nbytes -= i;
-		buf += i;
-	}
-
-	memzero_explicit(seed, sizeof(seed));
-	memzero_explicit(&block, sizeof(block));
-}
-
-/*
- * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we
- * set the entropy count to zero (but don't actually touch any data). Only then
- * can we extract a new key with extract_entropy().
- */
-static bool drain_entropy(void *buf, size_t nbytes)
-{
-	unsigned int entropy_count;
-	do {
-		entropy_count = READ_ONCE(input_pool.entropy_count);
-		if (entropy_count < POOL_MIN_BITS)
-			return false;
-	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
-	extract_entropy(buf, nbytes);
-	wake_up_interruptible(&random_write_wait);
-	kill_fasync(&fasync, SIGIO, POLL_OUT);
-	return true;
-}
-
 /*
  * Each time the timer fires, we expect that we got an unpredictable
  * jump in the cycle counter. Even if the timer is running on another
-- 
GitLab


From 92c653cf14400946f376a29b828d6af7e01f38dd Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0555/1586] random: group entropy collection functions

This pulls all of the entropy collection-focused functions into the
fourth labeled section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 370 +++++++++++++++++++++++-------------------
 1 file changed, 206 insertions(+), 164 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index ce530a44b4d07..75e16e2f07e45 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1039,60 +1039,112 @@ static bool drain_entropy(void *buf, size_t nbytes)
 	return true;
 }
 
-struct fast_pool {
-	union {
-		u32 pool32[4];
-		u64 pool64[2];
-	};
-	unsigned long last;
-	u16 reg_idx;
-	u8 count;
-};
+
+/**********************************************************************
+ *
+ * Entropy collection routines.
+ *
+ * The following exported functions are used for pushing entropy into
+ * the above entropy accumulation routines:
+ *
+ *	void add_device_randomness(const void *buf, size_t size);
+ *	void add_input_randomness(unsigned int type, unsigned int code,
+ *	                          unsigned int value);
+ *	void add_disk_randomness(struct gendisk *disk);
+ *	void add_hwgenerator_randomness(const void *buffer, size_t count,
+ *					size_t entropy);
+ *	void add_bootloader_randomness(const void *buf, size_t size);
+ *	void add_interrupt_randomness(int irq);
+ *
+ * add_device_randomness() adds data to the input pool that
+ * is likely to differ between two devices (or possibly even per boot).
+ * This would be things like MAC addresses or serial numbers, or the
+ * read-out of the RTC. This does *not* credit any actual entropy to
+ * the pool, but it initializes the pool to different values for devices
+ * that might otherwise be identical and have very little entropy
+ * available to them (particularly common in the embedded world).
+ *
+ * add_input_randomness() uses the input layer interrupt timing, as well
+ * as the event type information from the hardware.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
+ *
+ * The above two routines try to estimate how many bits of entropy
+ * to credit. They do this by keeping track of the first and second
+ * order deltas of the event timings.
+ *
+ * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
+ * entropy as specified by the caller. If the entropy pool is full it will
+ * block until more entropy is needed.
+ *
+ * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
+ * add_device_randomness(), depending on whether or not the configuration
+ * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
+ *
+ * add_interrupt_randomness() uses the interrupt timing as random
+ * inputs to the entropy pool. Using the cycle counters and the irq source
+ * as inputs, it feeds the input pool roughly once a second or after 64
+ * interrupts, crediting 1 bit of entropy for whichever comes first.
+ *
+ **********************************************************************/
+
+static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static int __init parse_trust_cpu(char *arg)
+{
+	return kstrtobool(arg, &trust_cpu);
+}
+early_param("random.trust_cpu", parse_trust_cpu);
 
 /*
- * This is a fast mixing routine used by the interrupt randomness
- * collector.  It's hardcoded for an 128 bit pool and assumes that any
- * locks that might be needed are taken by the caller.
+ * The first collection of entropy occurs at system boot while interrupts
+ * are still turned off. Here we push in RDSEED, a timestamp, and utsname().
+ * Depending on the above configuration knob, RDSEED may be considered
+ * sufficient for initialization. Note that much earlier setup may already
+ * have pushed entropy into the input pool by the time we get here.
  */
-static void fast_mix(u32 pool[4])
+int __init rand_initialize(void)
 {
-	u32 a = pool[0],	b = pool[1];
-	u32 c = pool[2],	d = pool[3];
-
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
+	size_t i;
+	ktime_t now = ktime_get_real();
+	bool arch_init = true;
+	unsigned long rv;
 
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
+	for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
+		if (!arch_get_random_seed_long_early(&rv) &&
+		    !arch_get_random_long_early(&rv)) {
+			rv = random_get_entropy();
+			arch_init = false;
+		}
+		mix_pool_bytes(&rv, sizeof(rv));
+	}
+	mix_pool_bytes(&now, sizeof(now));
+	mix_pool_bytes(utsname(), sizeof(*(utsname())));
 
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
+	extract_entropy(base_crng.key, sizeof(base_crng.key));
+	++base_crng.generation;
 
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
+	if (arch_init && trust_cpu && crng_init < 2) {
+		crng_init = 2;
+		pr_notice("crng init done (trusting CPU's manufacturer)\n");
+	}
 
-	pool[0] = a;  pool[1] = b;
-	pool[2] = c;  pool[3] = d;
+	if (ratelimit_disable) {
+		urandom_warning.interval = 0;
+		unseeded_warning.interval = 0;
+	}
+	return 0;
 }
 
-/*********************************************************************
- *
- * Entropy input management
- *
- *********************************************************************/
-
 /* There is one of these per entropy source */
 struct timer_rand_state {
 	cycles_t last_time;
 	long last_delta, last_delta2;
 };
 
-#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
-
 /*
  * Add device- or boot-specific data to the input pool to help
  * initialize it.
@@ -1116,8 +1168,6 @@ void add_device_randomness(const void *buf, size_t size)
 }
 EXPORT_SYMBOL(add_device_randomness);
 
-static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
-
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -1179,8 +1229,9 @@ void add_input_randomness(unsigned int type, unsigned int code,
 			  unsigned int value)
 {
 	static unsigned char last_value;
+	static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };
 
-	/* ignore autorepeat and the like */
+	/* Ignore autorepeat and the like. */
 	if (value == last_value)
 		return;
 
@@ -1190,6 +1241,119 @@ void add_input_randomness(unsigned int type, unsigned int code,
 }
 EXPORT_SYMBOL_GPL(add_input_randomness);
 
+#ifdef CONFIG_BLOCK
+void add_disk_randomness(struct gendisk *disk)
+{
+	if (!disk || !disk->random)
+		return;
+	/* First major is 1, so we get >= 0x200 here. */
+	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
+}
+EXPORT_SYMBOL_GPL(add_disk_randomness);
+
+void rand_initialize_disk(struct gendisk *disk)
+{
+	struct timer_rand_state *state;
+
+	/*
+	 * If kzalloc returns null, we just won't use that entropy
+	 * source.
+	 */
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state) {
+		state->last_time = INITIAL_JIFFIES;
+		disk->random = state;
+	}
+}
+#endif
+
+/*
+ * Interface for in-kernel drivers of true hardware RNGs.
+ * Those devices may produce endless random bits and will be throttled
+ * when our pool is full.
+ */
+void add_hwgenerator_randomness(const void *buffer, size_t count,
+				size_t entropy)
+{
+	if (unlikely(crng_init == 0)) {
+		size_t ret = crng_fast_load(buffer, count);
+		mix_pool_bytes(buffer, ret);
+		count -= ret;
+		buffer += ret;
+		if (!count || crng_init == 0)
+			return;
+	}
+
+	/*
+	 * Throttle writing if we're above the trickle threshold.
+	 * We'll be woken up again once below POOL_MIN_BITS, when
+	 * the calling thread is about to terminate, or once
+	 * CRNG_RESEED_INTERVAL has elapsed.
+	 */
+	wait_event_interruptible_timeout(random_write_wait,
+			!system_wq || kthread_should_stop() ||
+			input_pool.entropy_count < POOL_MIN_BITS,
+			CRNG_RESEED_INTERVAL);
+	mix_pool_bytes(buffer, count);
+	credit_entropy_bits(entropy);
+}
+EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
+
+/*
+ * Handle random seed passed by bootloader.
+ * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
+ * it would be regarded as device data.
+ * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ */
+void add_bootloader_randomness(const void *buf, size_t size)
+{
+	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
+		add_hwgenerator_randomness(buf, size, size * 8);
+	else
+		add_device_randomness(buf, size);
+}
+EXPORT_SYMBOL_GPL(add_bootloader_randomness);
+
+struct fast_pool {
+	union {
+		u32 pool32[4];
+		u64 pool64[2];
+	};
+	unsigned long last;
+	u16 reg_idx;
+	u8 count;
+};
+
+/*
+ * This is a fast mixing routine used by the interrupt randomness
+ * collector. It's hardcoded for an 128 bit pool and assumes that any
+ * locks that might be needed are taken by the caller.
+ */
+static void fast_mix(u32 pool[4])
+{
+	u32 a = pool[0],	b = pool[1];
+	u32 c = pool[2],	d = pool[3];
+
+	a += b;			c += d;
+	b = rol32(b, 6);	d = rol32(d, 27);
+	d ^= a;			b ^= c;
+
+	a += b;			c += d;
+	b = rol32(b, 16);	d = rol32(d, 14);
+	d ^= a;			b ^= c;
+
+	a += b;			c += d;
+	b = rol32(b, 6);	d = rol32(d, 27);
+	d ^= a;			b ^= c;
+
+	a += b;			c += d;
+	b = rol32(b, 16);	d = rol32(d, 14);
+	d ^= a;			b ^= c;
+
+	pool[0] = a;  pool[1] = b;
+	pool[2] = c;  pool[3] = d;
+}
+
 static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
 
 static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
@@ -1259,22 +1423,11 @@ void add_interrupt_randomness(int irq)
 
 	fast_pool->count = 0;
 
-	/* award one bit for the contents of the fast pool */
+	/* Award one bit for the contents of the fast pool. */
 	credit_entropy_bits(1);
 }
 EXPORT_SYMBOL_GPL(add_interrupt_randomness);
 
-#ifdef CONFIG_BLOCK
-void add_disk_randomness(struct gendisk *disk)
-{
-	if (!disk || !disk->random)
-		return;
-	/* first major is 1, so we get >= 0x200 here */
-	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
-}
-EXPORT_SYMBOL_GPL(add_disk_randomness);
-#endif
-
 /*
  * Each time the timer fires, we expect that we got an unpredictable
  * jump in the cycle counter. Even if the timer is running on another
@@ -1324,73 +1477,6 @@ static void try_to_generate_entropy(void)
 	mix_pool_bytes(&stack.now, sizeof(stack.now));
 }
 
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static int __init parse_trust_cpu(char *arg)
-{
-	return kstrtobool(arg, &trust_cpu);
-}
-early_param("random.trust_cpu", parse_trust_cpu);
-
-/*
- * Note that setup_arch() may call add_device_randomness()
- * long before we get here. This allows seeding of the pools
- * with some platform dependent data very early in the boot
- * process. But it limits our options here. We must use
- * statically allocated structures that already have all
- * initializations complete at compile time. We should also
- * take care not to overwrite the precious per platform data
- * we were given.
- */
-int __init rand_initialize(void)
-{
-	size_t i;
-	ktime_t now = ktime_get_real();
-	bool arch_init = true;
-	unsigned long rv;
-
-	for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) {
-		if (!arch_get_random_seed_long_early(&rv) &&
-		    !arch_get_random_long_early(&rv)) {
-			rv = random_get_entropy();
-			arch_init = false;
-		}
-		mix_pool_bytes(&rv, sizeof(rv));
-	}
-	mix_pool_bytes(&now, sizeof(now));
-	mix_pool_bytes(utsname(), sizeof(*(utsname())));
-
-	extract_entropy(base_crng.key, sizeof(base_crng.key));
-	++base_crng.generation;
-
-	if (arch_init && trust_cpu && crng_init < 2) {
-		crng_init = 2;
-		pr_notice("crng init done (trusting CPU's manufacturer)\n");
-	}
-
-	if (ratelimit_disable) {
-		urandom_warning.interval = 0;
-		unseeded_warning.interval = 0;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_BLOCK
-void rand_initialize_disk(struct gendisk *disk)
-{
-	struct timer_rand_state *state;
-
-	/*
-	 * If kzalloc returns null, we just won't use that entropy
-	 * source.
-	 */
-	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		state->last_time = INITIAL_JIFFIES;
-		disk->random = state;
-	}
-}
-#endif
-
 static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
 			    loff_t *ppos)
 {
@@ -1685,47 +1771,3 @@ static int __init random_sysctls_init(void)
 }
 device_initcall(random_sysctls_init);
 #endif	/* CONFIG_SYSCTL */
-
-/* Interface for in-kernel drivers of true hardware RNGs.
- * Those devices may produce endless random bits and will be throttled
- * when our pool is full.
- */
-void add_hwgenerator_randomness(const void *buffer, size_t count,
-				size_t entropy)
-{
-	if (unlikely(crng_init == 0)) {
-		size_t ret = crng_fast_load(buffer, count);
-		mix_pool_bytes(buffer, ret);
-		count -= ret;
-		buffer += ret;
-		if (!count || crng_init == 0)
-			return;
-	}
-
-	/* Throttle writing if we're above the trickle threshold.
-	 * We'll be woken up again once below POOL_MIN_BITS, when
-	 * the calling thread is about to terminate, or once
-	 * CRNG_RESEED_INTERVAL has elapsed.
-	 */
-	wait_event_interruptible_timeout(random_write_wait,
-			!system_wq || kthread_should_stop() ||
-			input_pool.entropy_count < POOL_MIN_BITS,
-			CRNG_RESEED_INTERVAL);
-	mix_pool_bytes(buffer, count);
-	credit_entropy_bits(entropy);
-}
-EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
-
-/* Handle random seed passed by bootloader.
- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
- * it would be regarded as device data.
- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
- */
-void add_bootloader_randomness(const void *buf, size_t size)
-{
-	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
-		add_hwgenerator_randomness(buf, size, size * 8);
-	else
-		add_device_randomness(buf, size);
-}
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
-- 
GitLab


From a6adf8e7a605250b911e94793fd077933709ff9e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0556/1586] random: group userspace read/write functions

This pulls all of the userspace read/write-focused functions into the
fifth labeled section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 125 ++++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 48 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 75e16e2f07e45..f43d12354c8d5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1477,30 +1477,61 @@ static void try_to_generate_entropy(void)
 	mix_pool_bytes(&stack.now, sizeof(stack.now));
 }
 
-static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
-			    loff_t *ppos)
+
+/**********************************************************************
+ *
+ * Userspace reader/writer interfaces.
+ *
+ * getrandom(2) is the primary modern interface into the RNG and should
+ * be used in preference to anything else.
+ *
+ * Reading from /dev/random has the same functionality as calling
+ * getrandom(2) with flags=0. In earlier versions, however, it had
+ * vastly different semantics and should therefore be avoided, to
+ * prevent backwards compatibility issues.
+ *
+ * Reading from /dev/urandom has the same functionality as calling
+ * getrandom(2) with flags=GRND_INSECURE. Because it does not block
+ * waiting for the RNG to be ready, it should not be used.
+ *
+ * Writing to either /dev/random or /dev/urandom adds entropy to
+ * the input pool but does not credit it.
+ *
+ * Polling on /dev/random indicates when the RNG is initialized, on
+ * the read side, and when it wants new entropy, on the write side.
+ *
+ * Both /dev/random and /dev/urandom have the same set of ioctls for
+ * adding entropy, getting the entropy count, zeroing the count, and
+ * reseeding the crng.
+ *
+ **********************************************************************/
+
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
+		flags)
 {
-	static int maxwarn = 10;
+	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
+		return -EINVAL;
 
-	if (!crng_ready() && maxwarn > 0) {
-		maxwarn--;
-		if (__ratelimit(&urandom_warning))
-			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
-				  current->comm, nbytes);
-	}
+	/*
+	 * Requesting insecure and blocking randomness at the same time makes
+	 * no sense.
+	 */
+	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
+		return -EINVAL;
 
-	return get_random_bytes_user(buf, nbytes);
-}
+	if (count > INT_MAX)
+		count = INT_MAX;
 
-static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
-			   loff_t *ppos)
-{
-	int ret;
+	if (!(flags & GRND_INSECURE) && !crng_ready()) {
+		int ret;
 
-	ret = wait_for_random_bytes();
-	if (ret != 0)
-		return ret;
-	return get_random_bytes_user(buf, nbytes);
+		if (flags & GRND_NONBLOCK)
+			return -EAGAIN;
+		ret = wait_for_random_bytes();
+		if (unlikely(ret))
+			return ret;
+	}
+	return get_random_bytes_user(buf, count);
 }
 
 static __poll_t random_poll(struct file *file, poll_table *wait)
@@ -1552,6 +1583,32 @@ static ssize_t random_write(struct file *file, const char __user *buffer,
 	return (ssize_t)count;
 }
 
+static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
+			    loff_t *ppos)
+{
+	static int maxwarn = 10;
+
+	if (!crng_ready() && maxwarn > 0) {
+		maxwarn--;
+		if (__ratelimit(&urandom_warning))
+			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
+				  current->comm, nbytes);
+	}
+
+	return get_random_bytes_user(buf, nbytes);
+}
+
+static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
+			   loff_t *ppos)
+{
+	int ret;
+
+	ret = wait_for_random_bytes();
+	if (ret != 0)
+		return ret;
+	return get_random_bytes_user(buf, nbytes);
+}
+
 static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
 	int size, ent_count;
@@ -1560,7 +1617,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case RNDGETENTCNT:
-		/* inherently racy, no point locking */
+		/* Inherently racy, no point locking. */
 		if (put_user(input_pool.entropy_count, p))
 			return -EFAULT;
 		return 0;
@@ -1636,34 +1693,6 @@ const struct file_operations urandom_fops = {
 	.llseek = noop_llseek,
 };
 
-SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int,
-		flags)
-{
-	if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
-		return -EINVAL;
-
-	/*
-	 * Requesting insecure and blocking randomness at the same time makes
-	 * no sense.
-	 */
-	if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
-		return -EINVAL;
-
-	if (count > INT_MAX)
-		count = INT_MAX;
-
-	if (!(flags & GRND_INSECURE) && !crng_ready()) {
-		int ret;
-
-		if (flags & GRND_NONBLOCK)
-			return -EAGAIN;
-		ret = wait_for_random_bytes();
-		if (unlikely(ret))
-			return ret;
-	}
-	return get_random_bytes_user(buf, count);
-}
-
 /********************************************************************
  *
  * Sysctl interface
-- 
GitLab


From 0deff3c43206c24e746b1410f11125707ad3040e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:53:34 +0100
Subject: [PATCH 0557/1586] random: group sysctl functions

This pulls all of the sysctl-focused functions into the sixth labeled
section.

No functional changes.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index f43d12354c8d5..a4b442032db75 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1693,9 +1693,34 @@ const struct file_operations urandom_fops = {
 	.llseek = noop_llseek,
 };
 
+
 /********************************************************************
  *
- * Sysctl interface
+ * Sysctl interface.
+ *
+ * These are partly unused legacy knobs with dummy values to not break
+ * userspace and partly still useful things. They are usually accessible
+ * in /proc/sys/kernel/random/ and are as follows:
+ *
+ * - boot_id - a UUID representing the current boot.
+ *
+ * - uuid - a random UUID, different each time the file is read.
+ *
+ * - poolsize - the number of bits of entropy that the input pool can
+ *   hold, tied to the POOL_BITS constant.
+ *
+ * - entropy_avail - the number of bits of entropy currently in the
+ *   input pool. Always <= poolsize.
+ *
+ * - write_wakeup_threshold - the amount of entropy in the input pool
+ *   below which write polls to /dev/random will unblock, requesting
+ *   more entropy, tied to the POOL_MIN_BITS constant. It is writable
+ *   to avoid breaking old userspaces, but writing to it does not
+ *   change any behavior of the RNG.
+ *
+ * - urandom_min_reseed_secs - fixed to the meaningless value "60".
+ *   It is writable to avoid breaking old userspaces, but writing
+ *   to it does not change any behavior of the RNG.
  *
  ********************************************************************/
 
@@ -1703,8 +1728,8 @@ const struct file_operations urandom_fops = {
 
 #include <linux/sysctl.h>
 
-static int random_min_urandom_seed = 60;
-static int random_write_wakeup_bits = POOL_MIN_BITS;
+static int sysctl_random_min_urandom_seed = 60;
+static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
 static int sysctl_poolsize = POOL_BITS;
 static char sysctl_bootid[16];
 
@@ -1761,14 +1786,14 @@ static struct ctl_table random_table[] = {
 	},
 	{
 		.procname	= "write_wakeup_threshold",
-		.data		= &random_write_wakeup_bits,
+		.data		= &sysctl_random_write_wakeup_bits,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "urandom_min_reseed_secs",
-		.data		= &random_min_urandom_seed,
+		.data		= &sysctl_random_min_urandom_seed,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
@@ -1799,4 +1824,4 @@ static int __init random_sysctls_init(void)
 	return 0;
 }
 device_initcall(random_sysctls_init);
-#endif	/* CONFIG_SYSCTL */
+#endif
-- 
GitLab


From 5f75d9f3babea8ae0a2d06724656874f41d317f5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 12:29:33 +0100
Subject: [PATCH 0558/1586] random: rewrite header introductory comment

Now that we've re-documented the various sections, we can remove the
outdated text here and replace it with a high-level overview.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 183 +++++-------------------------------------
 1 file changed, 21 insertions(+), 162 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a4b442032db75..e0764e8407911 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2,168 +2,27 @@
 /*
  * Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
- * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999.  All
- * rights reserved.
- */
-
-/*
- * Exported interfaces ---- output
- * ===============================
- *
- * There are four exported interfaces; two for use within the kernel,
- * and two for use from userspace.
- *
- * Exported interfaces ---- userspace output
- * -----------------------------------------
- *
- * The userspace interfaces are two character devices /dev/random and
- * /dev/urandom.  /dev/random is suitable for use when very high
- * quality randomness is desired (for example, for key generation or
- * one-time pads), as it will only return a maximum of the number of
- * bits of randomness (as estimated by the random number generator)
- * contained in the entropy pool.
- *
- * The /dev/urandom device does not have this limit, and will return
- * as many bytes as are requested.  As more and more random bytes are
- * requested without giving time for the entropy pool to recharge,
- * this will result in random numbers that are merely cryptographically
- * strong.  For many applications, however, this is acceptable.
- *
- * Exported interfaces ---- kernel output
- * --------------------------------------
- *
- * The primary kernel interfaces are:
- *
- *	void get_random_bytes(void *buf, size_t nbytes);
- *	u32 get_random_u32()
- *	u64 get_random_u64()
- *	unsigned int get_random_int()
- *	unsigned long get_random_long()
- *
- * These interfaces will return the requested number of random bytes
- * into the given buffer or as a return value. This is equivalent to a
- * read from /dev/urandom. The get_random_{u32,u64,int,long}() family
- * of functions may be higher performance for one-off random integers,
- * because they do a bit of buffering.
- *
- * prandom_u32()
- * -------------
- *
- * For even weaker applications, see the pseudorandom generator
- * prandom_u32(), prandom_max(), and prandom_bytes().  If the random
- * numbers aren't security-critical at all, these are *far* cheaper.
- * Useful for self-tests, random error simulation, randomized backoffs,
- * and any other application where you trust that nobody is trying to
- * maliciously mess with you by guessing the "random" numbers.
- *
- * Exported interfaces ---- input
- * ==============================
- *
- * The current exported interfaces for gathering environmental noise
- * from the devices are:
- *
- *	void add_device_randomness(const void *buf, size_t size);
- *	void add_input_randomness(unsigned int type, unsigned int code,
- *                                unsigned int value);
- *	void add_interrupt_randomness(int irq);
- *	void add_disk_randomness(struct gendisk *disk);
- *	void add_hwgenerator_randomness(const void *buffer, size_t count,
- *					size_t entropy);
- *	void add_bootloader_randomness(const void *buf, size_t size);
- *
- * add_device_randomness() is for adding data to the random pool that
- * is likely to differ between two devices (or possibly even per boot).
- * This would be things like MAC addresses or serial numbers, or the
- * read-out of the RTC. This does *not* add any actual entropy to the
- * pool, but it initializes the pool to different values for devices
- * that might otherwise be identical and have very little entropy
- * available to them (particularly common in the embedded world).
- *
- * add_input_randomness() uses the input layer interrupt timing, as well as
- * the event type information from the hardware.
- *
- * add_interrupt_randomness() uses the interrupt timing as random
- * inputs to the entropy pool. Using the cycle counters and the irq source
- * as inputs, it feeds the randomness roughly once a second.
- *
- * add_disk_randomness() uses what amounts to the seek time of block
- * layer request events, on a per-disk_devt basis, as input to the
- * entropy pool. Note that high-speed solid state drives with very low
- * seek times do not make for good sources of entropy, as their seek
- * times are usually fairly consistent.
- *
- * All of these routines try to estimate how many bits of randomness a
- * particular randomness source.  They do this by keeping track of the
- * first and second order deltas of the event timings.
- *
- * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
- * entropy as specified by the caller. If the entropy pool is full it will
- * block until more entropy is needed.
- *
- * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or
- * add_device_randomness(), depending on whether or not the configuration
- * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
- *
- * Ensuring unpredictability at system startup
- * ============================================
- *
- * When any operating system starts up, it will go through a sequence
- * of actions that are fairly predictable by an adversary, especially
- * if the start-up does not involve interaction with a human operator.
- * This reduces the actual number of bits of unpredictability in the
- * entropy pool below the value in entropy_count.  In order to
- * counteract this effect, it helps to carry information in the
- * entropy pool across shut-downs and start-ups.  To do this, put the
- * following lines an appropriate script which is run during the boot
- * sequence:
- *
- *	echo "Initializing random number generator..."
- *	random_seed=/var/run/random-seed
- *	# Carry a random seed from start-up to start-up
- *	# Load and then save the whole entropy pool
- *	if [ -f $random_seed ]; then
- *		cat $random_seed >/dev/urandom
- *	else
- *		touch $random_seed
- *	fi
- *	chmod 600 $random_seed
- *	dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * and the following lines in an appropriate script which is run as
- * the system is shutdown:
- *
- *	# Carry a random seed from shut-down to start-up
- *	# Save the whole entropy pool
- *	echo "Saving random seed..."
- *	random_seed=/var/run/random-seed
- *	touch $random_seed
- *	chmod 600 $random_seed
- *	dd if=/dev/urandom of=$random_seed count=1 bs=512
- *
- * For example, on most modern systems using the System V init
- * scripts, such code fragments would be found in
- * /etc/rc.d/init.d/random.  On older Linux systems, the correct script
- * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
- *
- * Effectively, these commands cause the contents of the entropy pool
- * to be saved at shut-down time and reloaded into the entropy pool at
- * start-up.  (The 'dd' in the addition to the bootup script is to
- * make sure that /etc/random-seed is different for every start-up,
- * even if the system crashes without executing rc.0.)  Even with
- * complete knowledge of the start-up activities, predicting the state
- * of the entropy pool requires knowledge of the previous history of
- * the system.
- *
- * Configuring the /dev/random driver under Linux
- * ==============================================
- *
- * The /dev/random driver under Linux uses minor numbers 8 and 9 of
- * the /dev/mem major number (#1).  So if your system does not have
- * /dev/random and /dev/urandom created already, they can be created
- * by using the commands:
- *
- *	mknod /dev/random c 1 8
- *	mknod /dev/urandom c 1 9
+ * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
+ *
+ * This driver produces cryptographically secure pseudorandom data. It is divided
+ * into roughly six sections, each with a section header:
+ *
+ *   - Initialization and readiness waiting.
+ *   - Fast key erasure RNG, the "crng".
+ *   - Entropy accumulation and extraction routines.
+ *   - Entropy collection routines.
+ *   - Userspace reader/writer interfaces.
+ *   - Sysctl interface.
+ *
+ * The high level overview is that there is one input pool, into which
+ * various pieces of data are hashed. Some of that data is then "credited" as
+ * having a certain number of bits of entropy. When enough bits of entropy are
+ * available, the hash is finalized and handed as a key to a stream cipher that
+ * expands it indefinitely for various consumers. This key is periodically
+ * refreshed as the various entropy collectors, described below, add data to the
+ * input pool and credit it. There is currently no Fortuna-like scheduler
+ * involved, which can lead to malicious entropy sources causing a premature
+ * reseed, and the entropy estimates are, at best, conservative guesses.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-- 
GitLab


From 58340f8e952b613e0ead0bed58b97b05bf4743c5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Feb 2022 16:15:46 +0100
Subject: [PATCH 0559/1586] random: defer fast pool mixing to worker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On PREEMPT_RT, it's problematic to take spinlocks from hard irq
handlers. We can fix this by deferring to a workqueue the dumping of
the fast pool into the input pool.

We accomplish this with some careful rules on fast_pool->count:

  - When it's incremented to >= 64, we schedule the work.
  - If the top bit is set, we never schedule the work, even if >= 64.
  - The worker is responsible for setting it back to 0 when it's done.

There are two small issues around using workqueues for this purpose that
we work around.

The first issue is that mix_interrupt_randomness() might be migrated to
another CPU during CPU hotplug. This issue is rectified by checking that
it hasn't been migrated (after disabling irqs). If it has been migrated,
then we set the count to zero, so that when the CPU comes online again,
it can requeue the work. As part of this, we switch to using an
atomic_t, so that the increment in the irq handler doesn't wipe out the
zeroing if the CPU comes back online while this worker is running.

The second issue is that, though relatively minor in effect, we probably
want to make sure we get a consistent view of the pool onto the stack,
in case it's interrupted by an irq while reading. To do this, we don't
reenable irqs until after the copy. There are only 18 instructions
between the cli and sti, so this is a pretty tiny window.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Sultan Alsawaf <sultan@kerneltoast.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 63 +++++++++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index e0764e8407911..26cbfcaa3b783 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1178,9 +1178,10 @@ struct fast_pool {
 		u32 pool32[4];
 		u64 pool64[2];
 	};
+	struct work_struct mix;
 	unsigned long last;
+	atomic_t count;
 	u16 reg_idx;
-	u8 count;
 };
 
 /*
@@ -1230,12 +1231,49 @@ static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
 	return *ptr;
 }
 
+static void mix_interrupt_randomness(struct work_struct *work)
+{
+	struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
+	u32 pool[4];
+
+	/* Check to see if we're running on the wrong CPU due to hotplug. */
+	local_irq_disable();
+	if (fast_pool != this_cpu_ptr(&irq_randomness)) {
+		local_irq_enable();
+		/*
+		 * If we are unlucky enough to have been moved to another CPU,
+		 * during CPU hotplug while the CPU was shutdown then we set
+		 * our count to zero atomically so that when the CPU comes
+		 * back online, it can enqueue work again. The _release here
+		 * pairs with the atomic_inc_return_acquire in
+		 * add_interrupt_randomness().
+		 */
+		atomic_set_release(&fast_pool->count, 0);
+		return;
+	}
+
+	/*
+	 * Copy the pool to the stack so that the mixer always has a
+	 * consistent view, before we reenable irqs again.
+	 */
+	memcpy(pool, fast_pool->pool32, sizeof(pool));
+	atomic_set(&fast_pool->count, 0);
+	fast_pool->last = jiffies;
+	local_irq_enable();
+
+	mix_pool_bytes(pool, sizeof(pool));
+	credit_entropy_bits(1);
+	memzero_explicit(pool, sizeof(pool));
+}
+
 void add_interrupt_randomness(int irq)
 {
+	enum { MIX_INFLIGHT = 1U << 31 };
 	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
 	struct pt_regs *regs = get_irq_regs();
 	unsigned long now = jiffies;
 	cycles_t cycles = random_get_entropy();
+	unsigned int new_count;
 
 	if (cycles == 0)
 		cycles = get_reg(fast_pool, regs);
@@ -1255,12 +1293,13 @@ void add_interrupt_randomness(int irq)
 	}
 
 	fast_mix(fast_pool->pool32);
-	++fast_pool->count;
+	/* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */
+	new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count);
 
 	if (unlikely(crng_init == 0)) {
-		if (fast_pool->count >= 64 &&
+		if (new_count >= 64 &&
 		    crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) {
-			fast_pool->count = 0;
+			atomic_set(&fast_pool->count, 0);
 			fast_pool->last = now;
 			if (spin_trylock(&input_pool.lock)) {
 				_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
@@ -1270,20 +1309,16 @@ void add_interrupt_randomness(int irq)
 		return;
 	}
 
-	if ((fast_pool->count < 64) && !time_after(now, fast_pool->last + HZ))
+	if (new_count & MIX_INFLIGHT)
 		return;
 
-	if (!spin_trylock(&input_pool.lock))
+	if (new_count < 64 && !time_after(now, fast_pool->last + HZ))
 		return;
 
-	fast_pool->last = now;
-	_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
-	spin_unlock(&input_pool.lock);
-
-	fast_pool->count = 0;
-
-	/* Award one bit for the contents of the fast pool. */
-	credit_entropy_bits(1);
+	if (unlikely(!fast_pool->mix.func))
+		INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
+	atomic_or(MIX_INFLIGHT, &fast_pool->count);
+	queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
 }
 EXPORT_SYMBOL_GPL(add_interrupt_randomness);
 
-- 
GitLab


From afba0b80b977b2a8f16234f2acd982f82710ba33 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 12 Feb 2022 01:26:17 +0100
Subject: [PATCH 0560/1586] random: do not take pool spinlock at boot

Since rand_initialize() is run while interrupts are still off and
nothing else is running, we don't need to repeatedly take and release
the pool spinlock, especially in the RDSEED loop.

Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 26cbfcaa3b783..0541066e2e5c1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -978,10 +978,10 @@ int __init rand_initialize(void)
 			rv = random_get_entropy();
 			arch_init = false;
 		}
-		mix_pool_bytes(&rv, sizeof(rv));
+		_mix_pool_bytes(&rv, sizeof(rv));
 	}
-	mix_pool_bytes(&now, sizeof(now));
-	mix_pool_bytes(utsname(), sizeof(*(utsname())));
+	_mix_pool_bytes(&now, sizeof(now));
+	_mix_pool_bytes(utsname(), sizeof(*(utsname())));
 
 	extract_entropy(base_crng.key, sizeof(base_crng.key));
 	++base_crng.generation;
-- 
GitLab


From da792c6d5f59a76c10a310c5d4c93428fd18f996 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 12 Feb 2022 23:54:09 +0100
Subject: [PATCH 0561/1586] random: unify early init crng load accounting

crng_fast_load() and crng_slow_load() have different semantics:

- crng_fast_load() xors and accounts with crng_init_cnt.
- crng_slow_load() hashes and doesn't account.

However add_hwgenerator_randomness() can afford to hash (it's called
from a kthread), and it should account. Additionally, ones that can
afford to hash don't need to take a trylock but can take a normal lock.
So, we combine these into one function, crng_pre_init_inject(), which
allows us to control these in a uniform way. This will make it simpler
later to simplify this all down when the time comes for that.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 112 ++++++++++++++++++++++--------------------
 1 file changed, 58 insertions(+), 54 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0541066e2e5c1..93d01f918720f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -386,7 +386,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	 * For the fast path, we check whether we're ready, unlocked first, and
 	 * then re-check once locked later. In the case where we're really not
 	 * ready, we do fast key erasure with the base_crng directly, because
-	 * this is what crng_{fast,slow}_load mutate during early init.
+	 * this is what crng_pre_init_inject() mutates during early init.
 	 */
 	if (unlikely(!crng_ready())) {
 		bool ready;
@@ -437,72 +437,75 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 }
 
 /*
- * This function is for crng_init == 0 only.
- *
- * crng_fast_load() can be called by code in the interrupt service
- * path.  So we can't afford to dilly-dally. Returns the number of
- * bytes processed from cp.
+ * This function is for crng_init == 0 only. It loads entropy directly
+ * into the crng's key, without going through the input pool. It is,
+ * generally speaking, not very safe, but we use this only at early
+ * boot time when it's better to have something there rather than
+ * nothing.
+ *
+ * There are two paths, a slow one and a fast one. The slow one
+ * hashes the input along with the current key. The fast one simply
+ * xors it in, and should only be used from interrupt context.
+ *
+ * If account is set, then the crng_init_cnt counter is incremented.
+ * This shouldn't be set by functions like add_device_randomness(),
+ * where we can't trust the buffer passed to it is guaranteed to be
+ * unpredictable (so it might not have any entropy at all).
+ *
+ * Returns the number of bytes processed from input, which is bounded
+ * by CRNG_INIT_CNT_THRESH if account is true.
  */
-static size_t crng_fast_load(const void *cp, size_t len)
+static size_t crng_pre_init_inject(const void *input, size_t len,
+				   bool fast, bool account)
 {
 	static int crng_init_cnt = 0;
 	unsigned long flags;
-	const u8 *src = (const u8 *)cp;
-	size_t ret = 0;
 
-	if (!spin_trylock_irqsave(&base_crng.lock, flags))
-		return 0;
+	if (fast) {
+		if (!spin_trylock_irqsave(&base_crng.lock, flags))
+			return 0;
+	} else {
+		spin_lock_irqsave(&base_crng.lock, flags);
+	}
+
 	if (crng_init != 0) {
 		spin_unlock_irqrestore(&base_crng.lock, flags);
 		return 0;
 	}
-	while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) {
-		base_crng.key[crng_init_cnt % sizeof(base_crng.key)] ^= *src;
-		src++; crng_init_cnt++; len--; ret++;
-	}
-	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
-		++base_crng.generation;
-		crng_init = 1;
-	}
-	spin_unlock_irqrestore(&base_crng.lock, flags);
-	if (crng_init == 1)
-		pr_notice("fast init done\n");
-	return ret;
-}
 
-/*
- * This function is for crng_init == 0 only.
- *
- * crng_slow_load() is called by add_device_randomness, which has two
- * attributes.  (1) We can't trust the buffer passed to it is
- * guaranteed to be unpredictable (so it might not have any entropy at
- * all), and (2) it doesn't have the performance constraints of
- * crng_fast_load().
- *
- * So, we simply hash the contents in with the current key. Finally,
- * we do *not* advance crng_init_cnt since buffer we may get may be
- * something like a fixed DMI table (for example), which might very
- * well be unique to the machine, but is otherwise unvarying.
- */
-static void crng_slow_load(const void *cp, size_t len)
-{
-	unsigned long flags;
-	struct blake2s_state hash;
+	if (account)
+		len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
 
-	blake2s_init(&hash, sizeof(base_crng.key));
+	if (fast) {
+		const u8 *src = input;
+		size_t i;
 
-	if (!spin_trylock_irqsave(&base_crng.lock, flags))
-		return;
-	if (crng_init != 0) {
-		spin_unlock_irqrestore(&base_crng.lock, flags);
-		return;
+		for (i = 0; i < len; ++i)
+			base_crng.key[(crng_init_cnt + i) %
+				      sizeof(base_crng.key)] ^= src[i];
+	} else {
+		struct blake2s_state hash;
+
+		blake2s_init(&hash, sizeof(base_crng.key));
+		blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+		blake2s_update(&hash, input, len);
+		blake2s_final(&hash, base_crng.key);
 	}
 
-	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
-	blake2s_update(&hash, cp, len);
-	blake2s_final(&hash, base_crng.key);
+	if (account) {
+		crng_init_cnt += len;
+		if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+			++base_crng.generation;
+			crng_init = 1;
+		}
+	}
 
 	spin_unlock_irqrestore(&base_crng.lock, flags);
+
+	if (crng_init == 1)
+		pr_notice("fast init done\n");
+
+	return len;
 }
 
 static void _get_random_bytes(void *buf, size_t nbytes)
@@ -1018,7 +1021,7 @@ void add_device_randomness(const void *buf, size_t size)
 	unsigned long flags;
 
 	if (!crng_ready() && size)
-		crng_slow_load(buf, size);
+		crng_pre_init_inject(buf, size, false, false);
 
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(buf, size);
@@ -1135,7 +1138,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count,
 				size_t entropy)
 {
 	if (unlikely(crng_init == 0)) {
-		size_t ret = crng_fast_load(buffer, count);
+		size_t ret = crng_pre_init_inject(buffer, count, false, true);
 		mix_pool_bytes(buffer, ret);
 		count -= ret;
 		buffer += ret;
@@ -1298,7 +1301,8 @@ void add_interrupt_randomness(int irq)
 
 	if (unlikely(crng_init == 0)) {
 		if (new_count >= 64 &&
-		    crng_fast_load(fast_pool->pool32, sizeof(fast_pool->pool32)) > 0) {
+		    crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32),
+					 true, true) > 0) {
 			atomic_set(&fast_pool->count, 0);
 			fast_pool->last = now;
 			if (spin_trylock(&input_pool.lock)) {
-- 
GitLab


From 1daf2f387652bf3a7044aea042f5023b3f6b189b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 12 Feb 2022 23:57:38 +0100
Subject: [PATCH 0562/1586] random: check for crng_init == 0 in
 add_device_randomness()

This has no real functional change, as crng_pre_init_inject() (and
before that, crng_slow_init()) always checks for == 0, not >= 2. So
correct the outer unlocked change to reflect that. Before this used
crng_ready(), which was not correct.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 93d01f918720f..bca4467e540f7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1020,7 +1020,7 @@ void add_device_randomness(const void *buf, size_t size)
 	unsigned long time = random_get_entropy() ^ jiffies;
 	unsigned long flags;
 
-	if (!crng_ready() && size)
+	if (crng_init == 0 && size)
 		crng_pre_init_inject(buf, size, false, false);
 
 	spin_lock_irqsave(&input_pool.lock, flags);
-- 
GitLab


From b777c38239fec5a528e59f55b379e31b1a187524 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 13 Feb 2022 16:17:01 +0100
Subject: [PATCH 0563/1586] random: pull add_hwgenerator_randomness()
 declaration into random.h

add_hwgenerator_randomness() is a function implemented and documented
inside of random.c. It is the way that hardware RNGs push data into it.
Therefore, it should be declared in random.h. Otherwise sparse complains
with:

random.c:1137:6: warning: symbol 'add_hwgenerator_randomness' was not declared. Should it be static?

The alternative would be to include hw_random.h into random.c, but that
wouldn't really be good for anything except slowing down compile time.

Cc: Matt Mackall <mpm@selenic.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/hw_random/core.c | 1 +
 include/linux/hw_random.h     | 2 --
 include/linux/random.h        | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a3db27916256d..cfb085de876b7 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/hw_random.h>
+#include <linux/random.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched/signal.h>
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 1a9fc38f8938c..aa1d4da03538b 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
 /** Unregister a Hardware Random Number Generator driver. */
 extern void hwrng_unregister(struct hwrng *rng);
 extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
-/** Feed random bits into the pool. */
-extern void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy);
 
 #endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/random.h b/include/linux/random.h
index 37e1e8c43d7ee..d7354de9351e9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -32,6 +32,8 @@ static inline void add_latent_entropy(void) {}
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value) __latent_entropy;
 extern void add_interrupt_randomness(int irq) __latent_entropy;
+extern void add_hwgenerator_randomness(const void *buffer, size_t count,
+				       size_t entropy);
 
 extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
-- 
GitLab


From 3191dd5a1179ef0fad5a050a1702ae98b6251e8f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 13 Feb 2022 22:48:04 +0100
Subject: [PATCH 0564/1586] random: clear fast pool, crng, and batches in cpuhp
 bring up

For the irq randomness fast pool, rather than having to use expensive
atomics, which were visibly the most expensive thing in the entire irq
handler, simply take care of the extreme edge case of resetting count to
zero in the cpuhp online handler, just after workqueues have been
reenabled. This simplifies the code a bit and lets us use vanilla
variables rather than atomics, and performance should be improved.

As well, very early on when the CPU comes up, while interrupts are still
disabled, we clear out the per-cpu crng and its batches, so that it
always starts with fresh randomness.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c      | 62 +++++++++++++++++++++++++++++---------
 include/linux/cpuhotplug.h |  2 ++
 include/linux/random.h     |  5 +++
 kernel/cpu.c               | 11 +++++++
 4 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index bca4467e540f7..d73a75cbe82d6 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -698,6 +698,25 @@ u32 get_random_u32(void)
 }
 EXPORT_SYMBOL(get_random_u32);
 
+#ifdef CONFIG_SMP
+/*
+ * This function is called when the CPU is coming up, with entry
+ * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
+ */
+int random_prepare_cpu(unsigned int cpu)
+{
+	/*
+	 * When the cpu comes back online, immediately invalidate both
+	 * the per-cpu crng and all batches, so that we serve fresh
+	 * randomness.
+	 */
+	per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
+	per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
+	per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
+	return 0;
+}
+#endif
+
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.
@@ -1183,7 +1202,7 @@ struct fast_pool {
 	};
 	struct work_struct mix;
 	unsigned long last;
-	atomic_t count;
+	unsigned int count;
 	u16 reg_idx;
 };
 
@@ -1219,6 +1238,29 @@ static void fast_mix(u32 pool[4])
 
 static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
 
+#ifdef CONFIG_SMP
+/*
+ * This function is called when the CPU has just come online, with
+ * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
+ */
+int random_online_cpu(unsigned int cpu)
+{
+	/*
+	 * During CPU shutdown and before CPU onlining, add_interrupt_
+	 * randomness() may schedule mix_interrupt_randomness(), and
+	 * set the MIX_INFLIGHT flag. However, because the worker can
+	 * be scheduled on a different CPU during this period, that
+	 * flag will never be cleared. For that reason, we zero out
+	 * the flag here, which runs just after workqueues are onlined
+	 * for the CPU again. This also has the effect of setting the
+	 * irq randomness count to zero so that new accumulated irqs
+	 * are fresh.
+	 */
+	per_cpu_ptr(&irq_randomness, cpu)->count = 0;
+	return 0;
+}
+#endif
+
 static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
 {
 	u32 *ptr = (u32 *)regs;
@@ -1243,15 +1285,6 @@ static void mix_interrupt_randomness(struct work_struct *work)
 	local_irq_disable();
 	if (fast_pool != this_cpu_ptr(&irq_randomness)) {
 		local_irq_enable();
-		/*
-		 * If we are unlucky enough to have been moved to another CPU,
-		 * during CPU hotplug while the CPU was shutdown then we set
-		 * our count to zero atomically so that when the CPU comes
-		 * back online, it can enqueue work again. The _release here
-		 * pairs with the atomic_inc_return_acquire in
-		 * add_interrupt_randomness().
-		 */
-		atomic_set_release(&fast_pool->count, 0);
 		return;
 	}
 
@@ -1260,7 +1293,7 @@ static void mix_interrupt_randomness(struct work_struct *work)
 	 * consistent view, before we reenable irqs again.
 	 */
 	memcpy(pool, fast_pool->pool32, sizeof(pool));
-	atomic_set(&fast_pool->count, 0);
+	fast_pool->count = 0;
 	fast_pool->last = jiffies;
 	local_irq_enable();
 
@@ -1296,14 +1329,13 @@ void add_interrupt_randomness(int irq)
 	}
 
 	fast_mix(fast_pool->pool32);
-	/* The _acquire here pairs with the atomic_set_release in mix_interrupt_randomness(). */
-	new_count = (unsigned int)atomic_inc_return_acquire(&fast_pool->count);
+	new_count = ++fast_pool->count;
 
 	if (unlikely(crng_init == 0)) {
 		if (new_count >= 64 &&
 		    crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32),
 					 true, true) > 0) {
-			atomic_set(&fast_pool->count, 0);
+			fast_pool->count = 0;
 			fast_pool->last = now;
 			if (spin_trylock(&input_pool.lock)) {
 				_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
@@ -1321,7 +1353,7 @@ void add_interrupt_randomness(int irq)
 
 	if (unlikely(!fast_pool->mix.func))
 		INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
-	atomic_or(MIX_INFLIGHT, &fast_pool->count);
+	fast_pool->count |= MIX_INFLIGHT;
 	queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
 }
 EXPORT_SYMBOL_GPL(add_interrupt_randomness);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 411a428ace4d4..481e565cc5c42 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,6 +100,7 @@ enum cpuhp_state {
 	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
 	CPUHP_PADATA_DEAD,
 	CPUHP_AP_DTPM_CPU_DEAD,
+	CPUHP_RANDOM_PREPARE,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
@@ -240,6 +241,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_CSKY_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
+	CPUHP_AP_RANDOM_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_BASE_CACHEINFO_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/random.h b/include/linux/random.h
index d7354de9351e9..6148b8d1ccf34 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -156,4 +156,9 @@ static inline bool __init arch_get_random_long_early(unsigned long *v)
 }
 #endif
 
+#ifdef CONFIG_SMP
+extern int random_prepare_cpu(unsigned int cpu);
+extern int random_online_cpu(unsigned int cpu);
+#endif
+
 #endif /* _LINUX_RANDOM_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 407a2568f35eb..238cba15449f6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,6 +34,7 @@
 #include <linux/scs.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
+#include <linux/random.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -1659,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= perf_event_init_cpu,
 		.teardown.single	= perf_event_exit_cpu,
 	},
+	[CPUHP_RANDOM_PREPARE] = {
+		.name			= "random:prepare",
+		.startup.single		= random_prepare_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_WORKQUEUE_PREP] = {
 		.name			= "workqueue:prepare",
 		.startup.single		= workqueue_prepare_cpu,
@@ -1782,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= workqueue_online_cpu,
 		.teardown.single	= workqueue_offline_cpu,
 	},
+	[CPUHP_AP_RANDOM_ONLINE] = {
+		.name			= "random:online",
+		.startup.single		= random_online_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_AP_RCUTREE_ONLINE] = {
 		.name			= "RCU/tree:online",
 		.startup.single		= rcutree_online_cpu,
-- 
GitLab


From cd3bc044af483422cc81a93f23c78c20c978b17c Mon Sep 17 00:00:00 2001
From: Yael Tzur <yaelt@google.com>
Date: Tue, 15 Feb 2022 09:19:53 -0500
Subject: [PATCH 0565/1586] KEYS: encrypted: Instantiate key with user-provided
 decrypted data

For availability and performance reasons master keys often need to be
released outside of a Key Management Service (KMS) to clients. It
would be beneficial to provide a mechanism where the
wrapping/unwrapping of data encryption keys (DEKs) is not dependent
on a remote call at runtime yet security is not (or only minimally)
compromised. Master keys could be securely stored in the Kernel and
be used to wrap/unwrap keys from Userspace.

The encrypted.c class supports instantiation of encrypted keys with
either an already-encrypted key material, or by generating new key
material based on random numbers. This patch defines a new datablob
format: [<format>] <master-key name> <decrypted data length>
<decrypted data> that allows to inject and encrypt user-provided
decrypted data. The decrypted data must be hex-ascii encoded.

Signed-off-by: Yael Tzur <yaelt@google.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 .../security/keys/trusted-encrypted.rst       | 25 +++++--
 security/keys/Kconfig                         | 19 +++--
 security/keys/encrypted-keys/encrypted.c      | 71 ++++++++++++++-----
 3 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst
index 80d5a5af62a1d..f614dad7de12f 100644
--- a/Documentation/security/keys/trusted-encrypted.rst
+++ b/Documentation/security/keys/trusted-encrypted.rst
@@ -107,12 +107,13 @@ Encrypted Keys
 --------------
 
 Encrypted keys do not depend on a trust source, and are faster, as they use AES
-for encryption/decryption. New keys are created from kernel-generated random
-numbers, and are encrypted/decrypted using a specified ‘master’ key. The
-‘master’ key can either be a trusted-key or user-key type. The main disadvantage
-of encrypted keys is that if they are not rooted in a trusted key, they are only
-as secure as the user key encrypting them. The master user key should therefore
-be loaded in as secure a way as possible, preferably early in boot.
+for encryption/decryption. New keys are created either from kernel-generated
+random numbers or user-provided decrypted data, and are encrypted/decrypted
+using a specified ‘master’ key. The ‘master’ key can either be a trusted-key or
+user-key type. The main disadvantage of encrypted keys is that if they are not
+rooted in a trusted key, they are only as secure as the user key encrypting
+them. The master user key should therefore be loaded in as secure a way as
+possible, preferably early in boot.
 
 
 Usage
@@ -199,6 +200,8 @@ Usage::
 
     keyctl add encrypted name "new [format] key-type:master-key-name keylen"
         ring
+    keyctl add encrypted name "new [format] key-type:master-key-name keylen
+        decrypted-data" ring
     keyctl add encrypted name "load hex_blob" ring
     keyctl update keyid "update key-type:master-key-name"
 
@@ -303,6 +306,16 @@ Load an encrypted key "evm" from saved blob::
     82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
     24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
 
+Instantiate an encrypted key "evm" using user-provided decrypted data::
+
+    $ keyctl add encrypted evm "new default user:kmk 32 `cat evm_decrypted_data.blob`" @u
+    794890253
+
+    $ keyctl print 794890253
+    default user:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382d
+    bbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0247
+    17c64 5972dcb82ab2dde83376d82b2e3c09ffc
+
 Other uses for trusted and encrypted keys, such as for disk and file encryption
 are anticipated.  In particular the new format 'ecryptfs' has been defined
 in order to use encrypted keys to mount an eCryptfs filesystem.  More details
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 969122c7b92f4..0e30b361e1c1e 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -98,10 +98,21 @@ config ENCRYPTED_KEYS
 	select CRYPTO_RNG
 	help
 	  This option provides support for create/encrypting/decrypting keys
-	  in the kernel.  Encrypted keys are kernel generated random numbers,
-	  which are encrypted/decrypted with a 'master' symmetric key. The
-	  'master' key can be either a trusted-key or user-key type.
-	  Userspace only ever sees/stores encrypted blobs.
+	  in the kernel.  Encrypted keys are instantiated using kernel
+	  generated random numbers or provided decrypted data, and are
+	  encrypted/decrypted with a 'master' symmetric key. The 'master'
+	  key can be either a trusted-key or user-key type. Only encrypted
+	  blobs are ever output to Userspace.
+
+	  If you are unsure as to whether this is required, answer N.
+
+config USER_DECRYPTED_DATA
+	bool "Allow encrypted keys with user decrypted data"
+	depends on ENCRYPTED_KEYS
+	help
+	  This option provides support for instantiating encrypted keys using
+	  user-provided decrypted data.  The decrypted data must be hex-ascii
+	  encoded.
 
 	  If you are unsure as to whether this is required, answer N.
 
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 87432b35d7713..e05cfc2e49aeb 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -78,6 +78,11 @@ static const match_table_t key_tokens = {
 	{Opt_err, NULL}
 };
 
+static bool user_decrypted_data = IS_ENABLED(CONFIG_USER_DECRYPTED_DATA);
+module_param(user_decrypted_data, bool, 0);
+MODULE_PARM_DESC(user_decrypted_data,
+	"Allow instantiation of encrypted keys using provided decrypted data");
+
 static int aes_get_sizes(void)
 {
 	struct crypto_skcipher *tfm;
@@ -158,7 +163,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc)
  * datablob_parse - parse the keyctl data
  *
  * datablob format:
- * new [<format>] <master-key name> <decrypted data length>
+ * new [<format>] <master-key name> <decrypted data length> [<decrypted data>]
  * load [<format>] <master-key name> <decrypted data length>
  *     <encrypted iv + data>
  * update <new-master-key name>
@@ -170,7 +175,7 @@ static int valid_master_desc(const char *new_desc, const char *orig_desc)
  */
 static int datablob_parse(char *datablob, const char **format,
 			  char **master_desc, char **decrypted_datalen,
-			  char **hex_encoded_iv)
+			  char **hex_encoded_iv, char **decrypted_data)
 {
 	substring_t args[MAX_OPT_ARGS];
 	int ret = -EINVAL;
@@ -231,6 +236,7 @@ static int datablob_parse(char *datablob, const char **format,
 				"when called from .update method\n", keyword);
 			break;
 		}
+		*decrypted_data = strsep(&datablob, " \t");
 		ret = 0;
 		break;
 	case Opt_load:
@@ -595,7 +601,8 @@ out:
 static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
 							 const char *format,
 							 const char *master_desc,
-							 const char *datalen)
+							 const char *datalen,
+							 const char *decrypted_data)
 {
 	struct encrypted_key_payload *epayload = NULL;
 	unsigned short datablob_len;
@@ -604,6 +611,7 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
 	unsigned int encrypted_datalen;
 	unsigned int format_len;
 	long dlen;
+	int i;
 	int ret;
 
 	ret = kstrtol(datalen, 10, &dlen);
@@ -613,6 +621,24 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
 	format_len = (!format) ? strlen(key_format_default) : strlen(format);
 	decrypted_datalen = dlen;
 	payload_datalen = decrypted_datalen;
+
+	if (decrypted_data) {
+		if (!user_decrypted_data) {
+			pr_err("encrypted key: instantiation of keys using provided decrypted data is disabled since CONFIG_USER_DECRYPTED_DATA is set to false\n");
+			return ERR_PTR(-EINVAL);
+		}
+		if (strlen(decrypted_data) != decrypted_datalen) {
+			pr_err("encrypted key: decrypted data provided does not match decrypted data length provided\n");
+			return ERR_PTR(-EINVAL);
+		}
+		for (i = 0; i < strlen(decrypted_data); i++) {
+			if (!isxdigit(decrypted_data[i])) {
+				pr_err("encrypted key: decrypted data provided must contain only hexadecimal characters\n");
+				return ERR_PTR(-EINVAL);
+			}
+		}
+	}
+
 	if (format) {
 		if (!strcmp(format, key_format_ecryptfs)) {
 			if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
@@ -740,13 +766,14 @@ static void __ekey_init(struct encrypted_key_payload *epayload,
 /*
  * encrypted_init - initialize an encrypted key
  *
- * For a new key, use a random number for both the iv and data
- * itself.  For an old key, decrypt the hex encoded data.
+ * For a new key, use either a random number or user-provided decrypted data in
+ * case it is provided. A random number is used for the iv in both cases. For
+ * an old key, decrypt the hex encoded data.
  */
 static int encrypted_init(struct encrypted_key_payload *epayload,
 			  const char *key_desc, const char *format,
 			  const char *master_desc, const char *datalen,
-			  const char *hex_encoded_iv)
+			  const char *hex_encoded_iv, const char *decrypted_data)
 {
 	int ret = 0;
 
@@ -760,21 +787,26 @@ static int encrypted_init(struct encrypted_key_payload *epayload,
 	}
 
 	__ekey_init(epayload, format, master_desc, datalen);
-	if (!hex_encoded_iv) {
-		get_random_bytes(epayload->iv, ivsize);
-
-		get_random_bytes(epayload->decrypted_data,
-				 epayload->decrypted_datalen);
-	} else
+	if (hex_encoded_iv) {
 		ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv);
+	} else if (decrypted_data) {
+		get_random_bytes(epayload->iv, ivsize);
+		memcpy(epayload->decrypted_data, decrypted_data,
+				   epayload->decrypted_datalen);
+	} else {
+		get_random_bytes(epayload->iv, ivsize);
+		get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen);
+	}
 	return ret;
 }
 
 /*
  * encrypted_instantiate - instantiate an encrypted key
  *
- * Decrypt an existing encrypted datablob or create a new encrypted key
- * based on a kernel random number.
+ * Instantiates the key:
+ * - by decrypting an existing encrypted datablob, or
+ * - by creating a new encrypted key based on a kernel random number, or
+ * - using provided decrypted data.
  *
  * On success, return 0. Otherwise return errno.
  */
@@ -787,6 +819,7 @@ static int encrypted_instantiate(struct key *key,
 	char *master_desc = NULL;
 	char *decrypted_datalen = NULL;
 	char *hex_encoded_iv = NULL;
+	char *decrypted_data = NULL;
 	size_t datalen = prep->datalen;
 	int ret;
 
@@ -799,18 +832,18 @@ static int encrypted_instantiate(struct key *key,
 	datablob[datalen] = 0;
 	memcpy(datablob, prep->data, datalen);
 	ret = datablob_parse(datablob, &format, &master_desc,
-			     &decrypted_datalen, &hex_encoded_iv);
+			     &decrypted_datalen, &hex_encoded_iv, &decrypted_data);
 	if (ret < 0)
 		goto out;
 
 	epayload = encrypted_key_alloc(key, format, master_desc,
-				       decrypted_datalen);
+				       decrypted_datalen, decrypted_data);
 	if (IS_ERR(epayload)) {
 		ret = PTR_ERR(epayload);
 		goto out;
 	}
 	ret = encrypted_init(epayload, key->description, format, master_desc,
-			     decrypted_datalen, hex_encoded_iv);
+			     decrypted_datalen, hex_encoded_iv, decrypted_data);
 	if (ret < 0) {
 		kfree_sensitive(epayload);
 		goto out;
@@ -860,7 +893,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 
 	buf[datalen] = 0;
 	memcpy(buf, prep->data, datalen);
-	ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
+	ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL, NULL);
 	if (ret < 0)
 		goto out;
 
@@ -869,7 +902,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 		goto out;
 
 	new_epayload = encrypted_key_alloc(key, epayload->format,
-					   new_master_desc, epayload->datalen);
+					   new_master_desc, epayload->datalen, NULL);
 	if (IS_ERR(new_epayload)) {
 		ret = PTR_ERR(new_epayload);
 		goto out;
-- 
GitLab


From 5741150c808b2bbeb1017609f3029daf6651b7d5 Mon Sep 17 00:00:00 2001
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
Date: Tue, 1 Feb 2022 12:51:41 +0100
Subject: [PATCH 0566/1586] spi: stm32: ignore Rx queue not empty in stm32f4 Tx
 only mode

STM32F4_SPI_SR_RXNE and STM32F4_SPI_SR_OVR are distinct bits in the same
status register.  ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE is thus
equal to ~STM32F4_SPI_SR_OVR.

The original intention was likely for transmission-only transfers to
ignore interrupts both for when the Rx queue has bytes (RXNE) as well
as when these bytes haven't been read in time (OVR).

Fix the typo by adding the missing parenthesis.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Link: https://lore.kernel.org/r/20220201115142.3999860-1-a.fatoum@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 7fc24505a72cd..a6adc20f68626 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -763,7 +763,7 @@ static irqreturn_t stm32f4_spi_irq_event(int irq, void *dev_id)
 	if (!spi->cur_usedma && (spi->cur_comm == SPI_SIMPLEX_TX ||
 				 spi->cur_comm == SPI_3WIRE_TX)) {
 		/* OVR flag shouldn't be handled for TX only mode */
-		sr &= ~STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE;
+		sr &= ~(STM32F4_SPI_SR_OVR | STM32F4_SPI_SR_RXNE);
 		mask |= STM32F4_SPI_SR_TXE;
 	}
 
-- 
GitLab


From 1847e3046c528bd85bd51e2860f4139bd9052d6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Sat, 19 Feb 2022 14:15:48 +0100
Subject: [PATCH 0567/1586] spi: gpio: Implement LSB First bitbang support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for slave DT property spi-lsb-first, i.e., SPI_LSB_FIRST mode.
Duplicate the inline helpers bitbang_txrx_be_cpha{0,1} as LE versions.
Conditionally call them from all the spi-gpio txrx_word callbacks.

Some alternatives to this implementation approach were discussed back
then [0], but eventually it was considered reasonable.

[0] https://lore.kernel.org/linux-arm-kernel/20191212033952.5967-8-afaerber@suse.de/

Signed-off-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Tested-by: Christian Hewitt <christianshewitt@gmail.com>
Link: https://lore.kernel.org/r/feac3377-4ad1-77d8-9a18-3588d80fb909@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang-txrx.h | 66 ++++++++++++++++++++++++++++++++++
 drivers/spi/spi-gpio.c         | 42 +++++++++++++++++-----
 2 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index ae61d72c7d281..267342dfa7388 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -41,6 +41,8 @@
  * chips need ... there may be several reasons you'd need to tweak timings
  * in these routines, not just to make it faster or slower to match a
  * particular CPU clock rate.
+ *
+ * ToDo: Maybe the bitrev macros can be used to improve the code?
  */
 
 static inline u32
@@ -106,3 +108,67 @@ bitbang_txrx_be_cpha1(struct spi_device *spi,
 	}
 	return word;
 }
+
+static inline u32
+bitbang_txrx_le_cpha0(struct spi_device *spi,
+		unsigned int nsecs, unsigned int cpol, unsigned int flags,
+		u32 word, u8 bits)
+{
+	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
+
+	u32 oldbit = !(word & 1);
+	/* clock starts at inactive polarity */
+	for (; likely(bits); bits--) {
+
+		/* setup LSB (to slave) on trailing edge */
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & 1) != oldbit) {
+				setmosi(spi, word & 1);
+				oldbit = word & 1;
+			}
+		}
+		spidelay(nsecs);	/* T(setup) */
+
+		setsck(spi, !cpol);
+		spidelay(nsecs);
+
+		/* sample LSB (from slave) on leading edge */
+		word >>= 1;
+		if ((flags & SPI_MASTER_NO_RX) == 0)
+			word |= getmiso(spi) << (bits - 1);
+		setsck(spi, cpol);
+	}
+	return word;
+}
+
+static inline u32
+bitbang_txrx_le_cpha1(struct spi_device *spi,
+		unsigned int nsecs, unsigned int cpol, unsigned int flags,
+		u32 word, u8 bits)
+{
+	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
+
+	u32 oldbit = !(word & 1);
+	/* clock starts at inactive polarity */
+	for (; likely(bits); bits--) {
+
+		/* setup LSB (to slave) on leading edge */
+		setsck(spi, !cpol);
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & 1) != oldbit) {
+				setmosi(spi, word & 1);
+				oldbit = word & 1;
+			}
+		}
+		spidelay(nsecs); /* T(setup) */
+
+		setsck(spi, cpol);
+		spidelay(nsecs);
+
+		/* sample LSB (from slave) on trailing edge */
+		word >>= 1;
+		if ((flags & SPI_MASTER_NO_RX) == 0)
+			word |= getmiso(spi) << (bits - 1);
+	}
+	return word;
+}
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 0584f4d2fde29..4b12c4964a664 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -135,25 +135,37 @@ static inline int getmiso(const struct spi_device *spi)
 static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 /*
@@ -170,28 +182,40 @@ static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
 	flags = spi->master->flags;
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
 	flags = spi->master->flags;
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
 	flags = spi->master->flags;
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
 	flags = spi->master->flags;
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
+	if (unlikely(spi->mode & SPI_LSB_FIRST))
+		return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits);
+	else
+		return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 /*----------------------------------------------------------------------*/
@@ -378,7 +402,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL |
-			    SPI_CS_HIGH;
+			    SPI_CS_HIGH | SPI_LSB_FIRST;
 	if (!spi_gpio->mosi) {
 		/* HW configuration without MOSI pin
 		 *
-- 
GitLab


From 86d7331299fda7634b11c1b7c911432679d525a5 Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Thu, 17 Feb 2022 14:42:47 +0800
Subject: [PATCH 0568/1586] block: update io_ticks when io hang

When the inflight IOs are slow and no new IOs are issued, we expect
iostat could manifest the IO hang problem. However after
commit 5b18b5a73760 ("block: delete part_round_stats and switch to less
precise counting"), io_tick and time_in_queue will not be updated until
the end of IO, and the avgqu-sz and %util columns of iostat will be zero.

Because it has using stat.nsecs accumulation to express time_in_queue
which is not suitable to change, and may %util will express the status
better when io hang occur. To fix io_ticks, we use update_io_ticks and
inflight to update io_ticks when diskstats_show and part_stat_show
been called.

Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220217064247.4041435-1-zhangwensheng5@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index e351fac41bf25..54f60ded2ee6f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -919,12 +919,17 @@ ssize_t part_stat_show(struct device *dev,
 	struct disk_stats stat;
 	unsigned int inflight;
 
-	part_stat_read_all(bdev, &stat);
 	if (queue_is_mq(q))
 		inflight = blk_mq_in_flight(q, bdev);
 	else
 		inflight = part_in_flight(bdev);
 
+	if (inflight) {
+		part_stat_lock();
+		update_io_ticks(bdev, jiffies, true);
+		part_stat_unlock();
+	}
+	part_stat_read_all(bdev, &stat);
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
@@ -1184,12 +1189,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 	xa_for_each(&gp->part_tbl, idx, hd) {
 		if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
 			continue;
-		part_stat_read_all(hd, &stat);
 		if (queue_is_mq(gp->queue))
 			inflight = blk_mq_in_flight(gp->queue, hd);
 		else
 			inflight = part_in_flight(hd);
 
+		if (inflight) {
+			part_stat_lock();
+			update_io_ticks(hd, jiffies, true);
+			part_stat_unlock();
+		}
+		part_stat_read_all(hd, &stat);
 		seq_printf(seqf, "%4d %7d %pg "
 			   "%lu %lu %lu %u "
 			   "%lu %lu %lu %u "
-- 
GitLab


From e0891269a8c25715bd9510dc355326b00ab42db2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 16 Feb 2022 16:22:26 +0000
Subject: [PATCH 0569/1586] linkage: add SYM_FUNC_ALIAS{,_LOCAL,_WEAK}()

Currently aliasing an asm function requires adding START and END
annotations for each name, as per Documentation/asm-annotations.rst:

	SYM_FUNC_START_ALIAS(__memset)
	SYM_FUNC_START(memset)
	    ... asm insns ...
	SYM_FUNC_END(memset)
	SYM_FUNC_END_ALIAS(__memset)

This is more painful than necessary to maintain, especially where a
function has many aliases, some of which we may wish to define
conditionally. For example, arm64's memcpy/memmove implementation (which
uses some arch-specific SYM_*() helpers) has:

	SYM_FUNC_START_ALIAS(__memmove)
	SYM_FUNC_START_ALIAS_WEAK_PI(memmove)
	SYM_FUNC_START_ALIAS(__memcpy)
	SYM_FUNC_START_WEAK_PI(memcpy)
	    ... asm insns ...
	SYM_FUNC_END_PI(memcpy)
	EXPORT_SYMBOL(memcpy)
	SYM_FUNC_END_ALIAS(__memcpy)
	EXPORT_SYMBOL(__memcpy)
	SYM_FUNC_END_ALIAS_PI(memmove)
	EXPORT_SYMBOL(memmove)
	SYM_FUNC_END_ALIAS(__memmove)
	EXPORT_SYMBOL(__memmove)
	SYM_FUNC_START(name)

It would be much nicer if we could define the aliases *after* the
standard function definition. This would avoid the need to specify each
symbol name twice, and would make it easier to spot the canonical
function definition.

This patch adds new macros to allow us to do so, which allows the above
example to be rewritten more succinctly as:

	SYM_FUNC_START(__pi_memcpy)
	    ... asm insns ...
	SYM_FUNC_END(__pi_memcpy)

	SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
	EXPORT_SYMBOL(__memcpy)
	SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
	EXPORT_SYMBOL(memcpy)

	SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)
	SYM_FUNC_ALIAS(__memmove, __pi_memmove)
	EXPORT_SYMBOL(__memmove)
	SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
	EXPORT_SYMBOL(memmove)

The reduction in duplication will also make it possible to replace some
uses of WEAK with more accurate Kconfig guards, e.g.

	#ifndef CONFIG_KASAN
	SYM_FUNC_ALIAS(memmove, __memmove)
	EXPORT_SYMBOL(memmove)
	#endif

... which should make it easier to ensure that symbols are neither used
nor overidden unexpectedly.

The existing SYM_FUNC_START_ALIAS() and SYM_FUNC_START_LOCAL_ALIAS() are
marked as deprecated, and will be removed once existing users are moved
over to the new scheme.

The tools/perf/ copy of linkage.h is updated to match. A subsequent
patch will depend upon this when updating the x86 asm annotations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220216162229.1076788-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/asm-annotations.rst       | 16 +++++++++--
 include/linux/linkage.h                 | 37 ++++++++++++++++++++++++-
 tools/perf/util/include/linux/linkage.h | 35 +++++++++++++++++++++++
 3 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst
index f4bf0f6395fb9..4868b58c60fb1 100644
--- a/Documentation/asm-annotations.rst
+++ b/Documentation/asm-annotations.rst
@@ -130,8 +130,20 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
   In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
   and ``ENDPROC`` macros.
 
-* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
-  who decided to have two or more names for one function. The typical use is::
+* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
+  be used to define multiple names for a function. The typical use is::
+
+    SYM_FUNC_START(__memset)
+        ... asm insns ...
+    SYN_FUNC_END(__memset)
+    SYM_FUNC_ALIAS(memset, __memset)
+
+  In this example, one can call ``__memset`` or ``memset`` with the same
+  result, except the debug information for the instructions is generated to
+  the object file only once -- for the non-``ALIAS`` case.
+
+* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` are deprecated
+    ways to define two or more names for one function. The typical use is::
 
     SYM_FUNC_START_ALIAS(__memset)
     SYM_FUNC_START(memset)
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index dbf8506decca0..e574a84d8b11a 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -165,7 +165,18 @@
 #ifndef SYM_END
 #define SYM_END(name, sym_type)				\
 	.type name sym_type ASM_NL			\
-	.size name, .-name
+	.set .L__sym_size_##name, .-name ASM_NL		\
+	.size name, .L__sym_size_##name
+#endif
+
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage)			\
+	linkage(alias) ASM_NL						\
+	.set alias, name ASM_NL						\
+	.type alias sym_type ASM_NL					\
+	.set .L__sym_size_##alias, .L__sym_size_##name ASM_NL		\
+	.size alias, .L__sym_size_##alias
 #endif
 
 /* === code annotations === */
@@ -275,6 +286,30 @@
 	SYM_END(name, SYM_T_FUNC)
 #endif
 
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name)					\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
 /* SYM_CODE_START -- use for non-C (special) functions */
 #ifndef SYM_CODE_START
 #define SYM_CODE_START(name)				\
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 5acf053fca7d4..7b4cd7947e3f2 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -50,9 +50,20 @@
 #ifndef SYM_END
 #define SYM_END(name, sym_type)				\
 	.type name sym_type ASM_NL			\
+	.set .L__sym_size_##name, .-name ASM_NL		\
 	.size name, .-name
 #endif
 
+/* SYM_ALIAS -- use only if you have to */
+#ifndef SYM_ALIAS
+#define SYM_ALIAS(alias, name, sym_type, linkage)			\
+	linkage(alias) ASM_NL						\
+	.set alias, name ASM_NL						\
+	.type alias sym_type ASM_NL					\
+	.set .L__sym_size_##alias, .L__sym_size_##name ASM_NL		\
+	.size alias, .L__sym_size_##alias
+#endif
+
 /*
  * SYM_FUNC_START_ALIAS -- use where there are two global names for one
  * function
@@ -101,4 +112,28 @@
 	SYM_END(name, SYM_T_FUNC)
 #endif
 
+/*
+ * SYM_FUNC_ALIAS -- define a global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS
+#define SYM_FUNC_ALIAS(alias, name)					\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_LOCAL
+#define SYM_FUNC_ALIAS_LOCAL(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL)
+#endif
+
+/*
+ * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function
+ */
+#ifndef SYM_FUNC_ALIAS_WEAK
+#define SYM_FUNC_ALIAS_WEAK(alias, name)				\
+	SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
+#endif
+
 #endif	/* PERF_LINUX_LINKAGE_H_ */
-- 
GitLab


From 0f61f6be1f7f44edfab0cb731c0a2340a838956f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 16 Feb 2022 16:22:27 +0000
Subject: [PATCH 0570/1586] arm64: clean up symbol aliasing

Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those
to simplify and more consistently define function aliases across
arch/arm64.

Aliases are now defined in terms of a canonical function name. For
position-independent functions I've made the __pi_<func> name the
canonical name, and defined other alises in terms of this.

The SYM_FUNC_{START,END}_PI(func) macros obscure the __pi_<func> name,
and make this hard to seatch for. The SYM_FUNC_START_WEAK_PI() macro
also obscures the fact that the __pi_<func> fymbol is global and the
<func> symbol is weak. For clarity, I have removed these macros and used
SYM_FUNC_{START,END}() directly with the __pi_<func> name.

For example:

	SYM_FUNC_START_WEAK_PI(func)
	... asm insns ...
	SYM_FUNC_END_PI(func)
	EXPORT_SYMBOL(func)

... becomes:

	SYM_FUNC_START(__pi_func)
	... asm insns ...
	SYM_FUNC_END(__pi_func)

	SYM_FUNC_ALIAS_WEAK(func, __pi_func)
	EXPORT_SYMBOL(func)

For clarity, where there are multiple annotations such as
EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For
example, where a function has a name and an alias which are both
exported, this is organised as:

	SYM_FUNC_START(func)
	... asm insns ...
	SYM_FUNC_END(func)
	EXPORT_SYMBOL(func)

	SYM_FUNC_ALIAS(alias, func)
	EXPORT_SYMBOL(alias)

For consistency with the other string functions, I've defined strrchr as
a position-independent function, as it can safely be used as such even
though we have no users today.

As we no longer use SYM_FUNC_{START,END}_ALIAS(), our local copies are
removed. The common versions will be removed by a subsequent patch.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220216162229.1076788-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/linkage.h | 24 ----------------------
 arch/arm64/kvm/hyp/nvhe/cache.S  |  5 +++--
 arch/arm64/lib/clear_page.S      |  5 +++--
 arch/arm64/lib/copy_page.S       |  5 +++--
 arch/arm64/lib/memchr.S          |  5 +++--
 arch/arm64/lib/memcmp.S          |  6 +++---
 arch/arm64/lib/memcpy.S          | 21 ++++++++++---------
 arch/arm64/lib/memset.S          | 12 ++++++-----
 arch/arm64/lib/strchr.S          |  6 ++++--
 arch/arm64/lib/strcmp.S          |  6 +++---
 arch/arm64/lib/strlen.S          |  6 +++---
 arch/arm64/lib/strncmp.S         |  6 +++---
 arch/arm64/lib/strnlen.S         |  6 ++++--
 arch/arm64/lib/strrchr.S         |  5 +++--
 arch/arm64/mm/cache.S            | 35 +++++++++++++++++++-------------
 15 files changed, 74 insertions(+), 79 deletions(-)

diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index b77e9b3f5371c..43f8c25b3fda6 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -39,28 +39,4 @@
 	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
 	bti c ;
 
-/*
- * Annotate a function as position independent, i.e., safe to be called before
- * the kernel virtual mapping is activated.
- */
-#define SYM_FUNC_START_PI(x)			\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_FUNC_START(x)
-
-#define SYM_FUNC_START_WEAK_PI(x)		\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_FUNC_START_WEAK(x)
-
-#define SYM_FUNC_START_WEAK_ALIAS_PI(x)		\
-		SYM_FUNC_START_ALIAS(__pi_##x);	\
-		SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
-
-#define SYM_FUNC_END_PI(x)			\
-		SYM_FUNC_END(x);		\
-		SYM_FUNC_END_ALIAS(__pi_##x)
-
-#define SYM_FUNC_END_ALIAS_PI(x)		\
-		SYM_FUNC_END_ALIAS(x);		\
-		SYM_FUNC_END_ALIAS(__pi_##x)
-
 #endif
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 958734f4d6b0e..0c367eb5f4e28 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,8 @@
 #include <asm/assembler.h>
 #include <asm/alternative.h>
 
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 1fd5d790ab800..ebde40e7fa2b2 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
  * Parameters:
  *	x0 - dest
  */
-SYM_FUNC_START_PI(clear_page)
+SYM_FUNC_START(__pi_clear_page)
 	mrs	x1, dczid_el0
 	tbnz	x1, #4, 2f	/* Branch if DC ZVA is prohibited */
 	and	w1, w1, #0xf
@@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page)
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	2b
 	ret
-SYM_FUNC_END_PI(clear_page)
+SYM_FUNC_END(__pi_clear_page)
+SYM_FUNC_ALIAS(clear_page, __pi_clear_page)
 EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 29144f4cd4492..c336d2ffdec55 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
  *	x0 - dest
  *	x1 - src
  */
-SYM_FUNC_START_PI(copy_page)
+SYM_FUNC_START(__pi_copy_page)
 alternative_if ARM64_HAS_NO_HW_PREFETCH
 	// Prefetch three cache lines ahead.
 	prfm	pldl1strm, [x1, #128]
@@ -75,5 +75,6 @@ alternative_else_nop_endif
 	stnp	x16, x17, [x0, #112 - 256]
 
 	ret
-SYM_FUNC_END_PI(copy_page)
+SYM_FUNC_END(__pi_copy_page)
+SYM_FUNC_ALIAS(copy_page, __pi_copy_page)
 EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 7c2276fdab543..37a9f2a4f7f4b 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -38,7 +38,7 @@
 
 	.p2align 4
 	nop
-SYM_FUNC_START_WEAK_PI(memchr)
+SYM_FUNC_START(__pi_memchr)
 	and	chrin, chrin, #0xff
 	lsr	wordcnt, cntin, #3
 	cbz	wordcnt, L(byte_loop)
@@ -71,5 +71,6 @@ CPU_LE(	rev	tmp, tmp)
 L(not_found):
 	mov	result, #0
 	ret
-SYM_FUNC_END_PI(memchr)
+SYM_FUNC_END(__pi_memchr)
+SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr)
 EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 7d956384222ff..a5ccf2c55f911 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -32,7 +32,7 @@
 #define tmp1		x7
 #define tmp2		x8
 
-SYM_FUNC_START_WEAK_PI(memcmp)
+SYM_FUNC_START(__pi_memcmp)
 	subs	limit, limit, 8
 	b.lo	L(less8)
 
@@ -134,6 +134,6 @@ L(byte_loop):
 	b.eq	L(byte_loop)
 	sub	result, data1w, data2w
 	ret
-
-SYM_FUNC_END_PI(memcmp)
+SYM_FUNC_END(__pi_memcmp)
+SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
 EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index b82fd64ee1e1c..4ab48d49c4515 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,10 +57,7 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK_PI(memcpy)
+SYM_FUNC_START(__pi_memcpy)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
@@ -241,12 +238,16 @@ L(copy64_from_start):
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 	ret
+SYM_FUNC_END(__pi_memcpy)
 
-SYM_FUNC_END_PI(memcpy)
-EXPORT_SYMBOL(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
+SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
 EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_END_ALIAS_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
+SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)
+
+SYM_FUNC_ALIAS(__memmove, __pi_memmove)
 EXPORT_SYMBOL(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
+EXPORT_SYMBOL(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index a9c1c9a01ea90..a5aebe82ad73b 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -42,8 +42,7 @@ dst		.req	x8
 tmp3w		.req	w9
 tmp3		.req	x9
 
-SYM_FUNC_START_ALIAS(__memset)
-SYM_FUNC_START_WEAK_PI(memset)
+SYM_FUNC_START(__pi_memset)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
 	orr	A_lw, A_lw, A_lw, lsl #8
@@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-SYM_FUNC_END_PI(memset)
-EXPORT_SYMBOL(memset)
-SYM_FUNC_END_ALIAS(__memset)
+SYM_FUNC_END(__pi_memset)
+
+SYM_FUNC_ALIAS(__memset, __pi_memset)
 EXPORT_SYMBOL(__memset)
+
+SYM_FUNC_ALIAS_WEAK(memset, __pi_memset)
+EXPORT_SYMBOL(memset)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index 1f47eae3b0d6d..94ee67a6b212c 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
-SYM_FUNC_START_WEAK(strchr)
+SYM_FUNC_START(__pi_strchr)
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 	cmp	w2, w1
@@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr)
 	cmp	w2, w1
 	csel	x0, x0, xzr, eq
 	ret
-SYM_FUNC_END(strchr)
+SYM_FUNC_END(__pi_strchr)
+
+SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr)
 EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 83bcad72ec972..cda7de747efcf 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -41,7 +41,7 @@
 
 	/* Start of performance-critical section  -- one 64B cache line.  */
 	.align 6
-SYM_FUNC_START_WEAK_PI(strcmp)
+SYM_FUNC_START(__pi_strcmp)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
@@ -171,6 +171,6 @@ L(loop_misaligned):
 L(done):
 	sub	result, data1, data2
 	ret
-
-SYM_FUNC_END_PI(strcmp)
+SYM_FUNC_END(__pi_strcmp)
+SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
 EXPORT_SYMBOL_NOHWKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 1648790e91b3c..4919fe81ae540 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -79,7 +79,7 @@
 	   whether the first fetch, which may be misaligned, crosses a page
 	   boundary.  */
 
-SYM_FUNC_START_WEAK_PI(strlen)
+SYM_FUNC_START(__pi_strlen)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	mov	zeroones, REP8_01
 	cmp	tmp1, MIN_PAGE_SIZE - 16
@@ -208,6 +208,6 @@ L(page_cross):
 	csel	data1, data1, tmp4, eq
 	csel	data2, data2, tmp2, eq
 	b	L(page_cross_entry)
-
-SYM_FUNC_END_PI(strlen)
+SYM_FUNC_END(__pi_strlen)
+SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)
 EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6f..a848abcec975e 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -44,7 +44,7 @@
 #define endloop		x15
 #define count		mask
 
-SYM_FUNC_START_WEAK_PI(strncmp)
+SYM_FUNC_START(__pi_strncmp)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
@@ -256,6 +256,6 @@ L(done_loop):
 L(ret0):
 	mov	result, #0
 	ret
-
-SYM_FUNC_END_PI(strncmp)
+SYM_FUNC_END(__pi_strncmp)
+SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
 EXPORT_SYMBOL_NOHWKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index b72913a990389..d5ac0e10a01db 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -47,7 +47,7 @@ limit_wd	.req	x14
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-SYM_FUNC_START_WEAK_PI(strnlen)
+SYM_FUNC_START(__pi_strnlen)
 	cbz	limit, .Lhit_limit
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
@@ -156,5 +156,7 @@ CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
 .Lhit_limit:
 	mov	len, limit
 	ret
-SYM_FUNC_END_PI(strnlen)
+SYM_FUNC_END(__pi_strnlen)
+
+SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
 EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
index 13132d1ed6d12..a5123cf0ce125 100644
--- a/arch/arm64/lib/strrchr.S
+++ b/arch/arm64/lib/strrchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of last occurrence of 'c' or 0
  */
-SYM_FUNC_START_WEAK_PI(strrchr)
+SYM_FUNC_START(__pi_strrchr)
 	mov	x3, #0
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
@@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr)
 	b	1b
 2:	mov	x0, x3
 	ret
-SYM_FUNC_END_PI(strrchr)
+SYM_FUNC_END(__pi_strrchr)
+SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr)
 EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7d0563db42014..0ea6cc25dc663 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_inval_poc)
+SYM_FUNC_START(__pi_dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_inval_poc)
+SYM_FUNC_END(__pi_dcache_clean_inval_poc)
+SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
 
 /*
  *	dcache_clean_pou(start, end)
@@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou)
  *	- start   - kernel start address of region
  *	- end     - kernel end address of region
  */
-SYM_FUNC_START_PI(dcache_inval_poc)
+SYM_FUNC_START(__pi_dcache_inval_poc)
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc)
 	b.lo	2b
 	dsb	sy
 	ret
-SYM_FUNC_END_PI(dcache_inval_poc)
+SYM_FUNC_END(__pi_dcache_inval_poc)
+SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc)
 
 /*
  *	dcache_clean_poc(start, end)
@@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_poc)
+SYM_FUNC_START(__pi_dcache_clean_poc)
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_poc)
+SYM_FUNC_END(__pi_dcache_clean_poc)
+SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc)
 
 /*
  *	dcache_clean_pop(start, end)
@@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(dcache_clean_pop)
+SYM_FUNC_START(__pi_dcache_clean_pop)
 	alternative_if_not ARM64_HAS_DCPOP
 	b	dcache_clean_poc
 	alternative_else_nop_endif
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(dcache_clean_pop)
+SYM_FUNC_END(__pi_dcache_clean_pop)
+SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
 
 /*
  *	__dma_flush_area(start, size)
@@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
-SYM_FUNC_START_PI(__dma_flush_area)
+SYM_FUNC_START(__pi___dma_flush_area)
 	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__dma_flush_area)
+SYM_FUNC_END(__pi___dma_flush_area)
+SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-SYM_FUNC_START_PI(__dma_map_area)
+SYM_FUNC_START(__pi___dma_map_area)
 	add	x1, x0, x1
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__pi_dcache_inval_poc
 	b	__pi_dcache_clean_poc
-SYM_FUNC_END_PI(__dma_map_area)
+SYM_FUNC_END(__pi___dma_map_area)
+SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
 
 /*
  *	__dma_unmap_area(start, size, dir)
@@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area)
  *	- size	- size of region
  *	- dir	- DMA direction
  */
-SYM_FUNC_START_PI(__dma_unmap_area)
+SYM_FUNC_START(__pi___dma_unmap_area)
 	add	x1, x0, x1
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__pi_dcache_inval_poc
 	ret
-SYM_FUNC_END_PI(__dma_unmap_area)
+SYM_FUNC_END(__pi___dma_unmap_area)
+SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
-- 
GitLab


From 7be2e319640c8926bbba4e004a1bee9cf6ed67b0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 16 Feb 2022 16:22:28 +0000
Subject: [PATCH 0571/1586] x86: clean up symbol aliasing

Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those
to simplify the definition of function aliases across arch/x86.

For clarity, where there are multiple annotations such as
EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For
example, where a function has a name and an alias which are both
exported, this is organised as:

	SYM_FUNC_START(func)
	    ... asm insns ...
	SYM_FUNC_END(func)
	EXPORT_SYMBOL(func)

	SYM_FUNC_ALIAS(alias, func)
	EXPORT_SYMBOL(alias)

Where there are only aliases and no exports or other annotations, I have
not bothered with line spacing, e.g.

	SYM_FUNC_START(func)
	    ... asm insns ...
	SYM_FUNC_END(func)
	SYM_FUNC_ALIAS(alias, func)

The tools/perf/ copies of memset_64.S and memset_64.S are updated
likewise to avoid the build system complaining these are mismatched:

| Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
| diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
| Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
| diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220216162229.1076788-4-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/x86/boot/compressed/head_32.S |  3 +--
 arch/x86/boot/compressed/head_64.S |  3 +--
 arch/x86/crypto/aesni-intel_asm.S  |  4 +---
 arch/x86/lib/memcpy_64.S           | 10 +++++-----
 arch/x86/lib/memmove_64.S          |  4 ++--
 arch/x86/lib/memset_64.S           |  6 +++---
 tools/arch/x86/lib/memcpy_64.S     | 10 +++++-----
 tools/arch/x86/lib/memset_64.S     |  6 +++---
 8 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 659fad53ca823..3b354eb9516df 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32)
 
 #ifdef CONFIG_EFI_STUB
 SYM_FUNC_START(efi32_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
 	add	$0x4, %esp
 	movl	8(%esp), %esi	/* save boot_params pointer */
 	call	efi_main
 	/* efi_main returns the possibly relocated address of startup_32 */
 	jmp	*%eax
 SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
 #endif
 
 	.text
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fd9441f404570..dea95301196b8 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -535,7 +535,6 @@ SYM_CODE_END(startup_64)
 #ifdef CONFIG_EFI_STUB
 	.org 0x390
 SYM_FUNC_START(efi64_stub_entry)
-SYM_FUNC_START_ALIAS(efi_stub_entry)
 	and	$~0xf, %rsp			/* realign the stack */
 	movq	%rdx, %rbx			/* save boot_params pointer */
 	call	efi_main
@@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
 	leaq	rva(startup_64)(%rax), %rax
 	jmp	*%rax
 SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_END_ALIAS(efi_stub_entry)
+SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
 #endif
 
 	.text
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 363699dd72206..837c1e0aa0217 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize)
 
 #endif
 
-
-SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
 SYM_FUNC_START_LOCAL(_key_expansion_256a)
 	pshufd $0b11111111, %xmm1, %xmm1
 	shufps $0b00010000, %xmm0, %xmm4
@@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a)
 	add $0x10, TKEYP
 	RET
 SYM_FUNC_END(_key_expansion_256a)
-SYM_FUNC_END_ALIAS(_key_expansion_128)
+SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
 
 SYM_FUNC_START_LOCAL(_key_expansion_192a)
 	pshufd $0b01010101, %xmm1, %xmm1
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 59cf2343f3d90..d0d7b9bc6cad3 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
  * Output:
  * rax original destination
  */
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
 	movl %edx, %ecx
 	rep movsb
 	RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
 EXPORT_SYMBOL(__memcpy)
 
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 50ea390df7128..d83cba364e31d 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,7 +24,6 @@
  * Output:
  * rax: dest
  */
-SYM_FUNC_START_WEAK(memmove)
 SYM_FUNC_START(__memmove)
 
 	mov %rdi, %rax
@@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove)
 13:
 	RET
 SYM_FUNC_END(__memmove)
-SYM_FUNC_END_ALIAS(memmove)
 EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
 EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index d624f2bc42f16..fc9ffd3ff3b21 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
 	movq %r9,%rax
 	RET
 SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
 EXPORT_SYMBOL(__memset)
 
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  * enhanced rep stosb to override the fast string function.
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 59cf2343f3d90..d0d7b9bc6cad3 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -27,8 +27,7 @@
  * Output:
  * rax original destination
  */
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy)
 	movl %edx, %ecx
 	rep movsb
 	RET
-SYM_FUNC_END(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(memcpy)
+SYM_FUNC_END(__memcpy)
 EXPORT_SYMBOL(__memcpy)
 
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index d624f2bc42f16..fc9ffd3ff3b21 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -17,7 +17,6 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -42,10 +41,11 @@ SYM_FUNC_START(__memset)
 	movq %r9,%rax
 	RET
 SYM_FUNC_END(__memset)
-SYM_FUNC_END_ALIAS(memset)
-EXPORT_SYMBOL(memset)
 EXPORT_SYMBOL(__memset)
 
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
+EXPORT_SYMBOL(memset)
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  * enhanced rep stosb to override the fast string function.
-- 
GitLab


From be9aea74400433e03c2a8b0260fc9ffe2495f698 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 16 Feb 2022 16:22:29 +0000
Subject: [PATCH 0572/1586] linkage: remove SYM_FUNC_{START,END}_ALIAS()

Now that all aliases are defined using SYM_FUNC_ALIAS(), remove the old
SYM_FUNC_{START,END}_ALIAS() macros.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220216162229.1076788-5-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/asm-annotations.rst       | 13 -----------
 include/linux/linkage.h                 | 30 -------------------------
 tools/perf/util/include/linux/linkage.h | 21 -----------------
 3 files changed, 64 deletions(-)

diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst
index 4868b58c60fb1..a64f2ca469d45 100644
--- a/Documentation/asm-annotations.rst
+++ b/Documentation/asm-annotations.rst
@@ -142,19 +142,6 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
   result, except the debug information for the instructions is generated to
   the object file only once -- for the non-``ALIAS`` case.
 
-* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` are deprecated
-    ways to define two or more names for one function. The typical use is::
-
-    SYM_FUNC_START_ALIAS(__memset)
-    SYM_FUNC_START(memset)
-        ... asm insns ...
-    SYM_FUNC_END(memset)
-    SYM_FUNC_END_ALIAS(__memset)
-
-  In this example, one can call ``__memset`` or ``memset`` with the same
-  result, except the debug information for the instructions is generated to
-  the object file only once -- for the non-``ALIAS`` case.
-
 * ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in
   special cases -- if you know what you are doing. This is used exclusively
   for interrupt handlers and similar where the calling convention is not the C
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index e574a84d8b11a..acb1ad2356f1b 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -211,30 +211,8 @@
 	SYM_ENTRY(name, linkage, SYM_A_NONE)
 #endif
 
-/*
- * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one
- * function
- */
-#ifndef SYM_FUNC_START_LOCAL_ALIAS
-#define SYM_FUNC_START_LOCAL_ALIAS(name)		\
-	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
-#endif
-
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name)			\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
-#endif
-
 /* SYM_FUNC_START -- use for global functions */
 #ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
 #define SYM_FUNC_START(name)				\
 	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #endif
@@ -247,7 +225,6 @@
 
 /* SYM_FUNC_START_LOCAL -- use for local functions */
 #ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_START_LOCAL(name)			\
 	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
 #endif
@@ -270,18 +247,11 @@
 	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
 #endif
 
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name)			\
-	SYM_END(name, SYM_T_FUNC)
-#endif
-
 /*
  * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
  * SYM_FUNC_START_WEAK, ...
  */
 #ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_END(name)				\
 	SYM_END(name, SYM_T_FUNC)
 #endif
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 7b4cd7947e3f2..aa0c5179836d1 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -64,38 +64,18 @@
 	.size alias, .L__sym_size_##alias
 #endif
 
-/*
- * SYM_FUNC_START_ALIAS -- use where there are two global names for one
- * function
- */
-#ifndef SYM_FUNC_START_ALIAS
-#define SYM_FUNC_START_ALIAS(name)			\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
-#endif
-
 /* SYM_FUNC_START -- use for global functions */
 #ifndef SYM_FUNC_START
-/*
- * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
- * later.
- */
 #define SYM_FUNC_START(name)				\
 	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #endif
 
 /* SYM_FUNC_START_LOCAL -- use for local functions */
 #ifndef SYM_FUNC_START_LOCAL
-/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_START_LOCAL(name)			\
 	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
 #endif
 
-/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
-#ifndef SYM_FUNC_END_ALIAS
-#define SYM_FUNC_END_ALIAS(name)			\
-	SYM_END(name, SYM_T_FUNC)
-#endif
-
 /* SYM_FUNC_START_WEAK -- use for weak functions */
 #ifndef SYM_FUNC_START_WEAK
 #define SYM_FUNC_START_WEAK(name)			\
@@ -107,7 +87,6 @@
  * SYM_FUNC_START_WEAK, ...
  */
 #ifndef SYM_FUNC_END
-/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
 #define SYM_FUNC_END(name)				\
 	SYM_END(name, SYM_T_FUNC)
 #endif
-- 
GitLab


From 272ceeaea355214b301530e262a0df8600bfca95 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Feb 2022 11:44:51 -0500
Subject: [PATCH 0573/1586] audit: log AUDIT_TIME_* records only from rules

AUDIT_TIME_* events are generated when there are syscall rules present
that are not related to time keeping.  This will produce noisy log
entries that could flood the logs and hide events we really care about.

Rather than immediately produce the AUDIT_TIME_* records, store the data
in the context and log it at syscall exit time respecting the filter
rules.

Note: This eats the audit_buffer, unlike any others in show_special().

Please see https://bugzilla.redhat.com/show_bug.cgi?id=1991919

Fixes: 7e8eda734d30 ("ntp: Audit NTP parameters adjustment")
Fixes: 2d87a0674bd6 ("timekeeping: Audit clock adjustments")
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: fixed style/whitespace issues]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.h   |  4 +++
 kernel/auditsc.c | 87 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/kernel/audit.h b/kernel/audit.h
index c4498090a5bd6..58b66543b4d57 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -201,6 +201,10 @@ struct audit_context {
 		struct {
 			char			*name;
 		} module;
+		struct {
+			struct audit_ntp_data	ntp_data;
+			struct timespec64	tk_injoffset;
+		} time;
 	};
 	int fds[2];
 	struct audit_proctitle proctitle;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fce5d43a933f0..0efd75e4730f2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1340,6 +1340,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 			 from_kuid(&init_user_ns, name->fcap.rootid));
 }
 
+static void audit_log_time(struct audit_context *context, struct audit_buffer **ab)
+{
+	const struct audit_ntp_data *ntp = &context->time.ntp_data;
+	const struct timespec64 *tk = &context->time.tk_injoffset;
+	static const char * const ntp_name[] = {
+		"offset",
+		"freq",
+		"status",
+		"tai",
+		"tick",
+		"adjust",
+	};
+	int type;
+
+	if (context->type == AUDIT_TIME_ADJNTPVAL) {
+		for (type = 0; type < AUDIT_NTP_NVALS; type++) {
+			if (ntp->vals[type].newval != ntp->vals[type].oldval) {
+				if (!*ab) {
+					*ab = audit_log_start(context,
+							GFP_KERNEL,
+							AUDIT_TIME_ADJNTPVAL);
+					if (!*ab)
+						return;
+				}
+				audit_log_format(*ab, "op=%s old=%lli new=%lli",
+						 ntp_name[type],
+						 ntp->vals[type].oldval,
+						 ntp->vals[type].newval);
+				audit_log_end(*ab);
+				*ab = NULL;
+			}
+		}
+	}
+	if (tk->tv_sec != 0 || tk->tv_nsec != 0) {
+		if (!*ab) {
+			*ab = audit_log_start(context, GFP_KERNEL,
+					      AUDIT_TIME_INJOFFSET);
+			if (!*ab)
+				return;
+		}
+		audit_log_format(*ab, "sec=%lli nsec=%li",
+				 (long long)tk->tv_sec, tk->tv_nsec);
+		audit_log_end(*ab);
+		*ab = NULL;
+	}
+}
+
 static void show_special(struct audit_context *context, int *call_panic)
 {
 	struct audit_buffer *ab;
@@ -1454,6 +1501,11 @@ static void show_special(struct audit_context *context, int *call_panic)
 			audit_log_format(ab, "(null)");
 
 		break;
+	case AUDIT_TIME_ADJNTPVAL:
+	case AUDIT_TIME_INJOFFSET:
+		/* this call deviates from the rest, eating the buffer */
+		audit_log_time(context, &ab);
+		break;
 	}
 	audit_log_end(ab);
 }
@@ -2849,31 +2901,26 @@ void __audit_fanotify(unsigned int response)
 
 void __audit_tk_injoffset(struct timespec64 offset)
 {
-	audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET,
-		  "sec=%lli nsec=%li",
-		  (long long)offset.tv_sec, offset.tv_nsec);
-}
-
-static void audit_log_ntp_val(const struct audit_ntp_data *ad,
-			      const char *op, enum audit_ntp_type type)
-{
-	const struct audit_ntp_val *val = &ad->vals[type];
-
-	if (val->newval == val->oldval)
-		return;
+	struct audit_context *context = audit_context();
 
-	audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL,
-		  "op=%s old=%lli new=%lli", op, val->oldval, val->newval);
+	/* only set type if not already set by NTP */
+	if (!context->type)
+		context->type = AUDIT_TIME_INJOFFSET;
+	memcpy(&context->time.tk_injoffset, &offset, sizeof(offset));
 }
 
 void __audit_ntp_log(const struct audit_ntp_data *ad)
 {
-	audit_log_ntp_val(ad, "offset",	AUDIT_NTP_OFFSET);
-	audit_log_ntp_val(ad, "freq",	AUDIT_NTP_FREQ);
-	audit_log_ntp_val(ad, "status",	AUDIT_NTP_STATUS);
-	audit_log_ntp_val(ad, "tai",	AUDIT_NTP_TAI);
-	audit_log_ntp_val(ad, "tick",	AUDIT_NTP_TICK);
-	audit_log_ntp_val(ad, "adjust",	AUDIT_NTP_ADJUST);
+	struct audit_context *context = audit_context();
+	int type;
+
+	for (type = 0; type < AUDIT_NTP_NVALS; type++)
+		if (ad->vals[type].newval != ad->vals[type].oldval) {
+			/* unconditionally set type, overwriting TK */
+			context->type = AUDIT_TIME_ADJNTPVAL;
+			memcpy(&context->time.ntp_data, ad, sizeof(*ad));
+			break;
+		}
 }
 
 void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
-- 
GitLab


From be9a2277cafd318976d59c41a7f45a934ec43b26 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:23:59 +0100
Subject: [PATCH 0574/1586] fork: Redo ifdefs around task stack handling

The use of ifdef CONFIG_VMAP_STACK is confusing in terms what is
actually happenning and what can happen.

For instance from reading free_thread_stack() it appears that in the
CONFIG_VMAP_STACK case it may receive a non-NULL vm pointer but it may also
be NULL in which case __free_pages() is used to free the stack.  This is
however not the case because in the VMAP case a non-NULL pointer is always
returned here.  Since it looks like this might happen, the compiler creates
the correct dead code with the invocation to __free_pages() and everything
around it. Twice.

Add spaces between the ifdef and the identifer to recognize the ifdef
level which is currently in scope.

Add the current identifer as a comment behind #else and #endif.
Move the code within free_thread_stack() and alloc_thread_stack_node()
into the relevant ifdef blocks.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-2-bigeasy@linutronix.de
---
 kernel/fork.c | 74 +++++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index a024bf6254df8..f5cc10164334a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -185,7 +185,7 @@ static inline void free_task_struct(struct task_struct *tsk)
  */
 # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
 
-#ifdef CONFIG_VMAP_STACK
+#  ifdef CONFIG_VMAP_STACK
 /*
  * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
  * flush.  Try to minimize the number of calls by caching stacks.
@@ -210,11 +210,9 @@ static int free_vm_stack_cache(unsigned int cpu)
 
 	return 0;
 }
-#endif
 
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
-#ifdef CONFIG_VMAP_STACK
 	void *stack;
 	int i;
 
@@ -258,45 +256,53 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 		tsk->stack = stack;
 	}
 	return stack;
-#else
-	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
-					     THREAD_SIZE_ORDER);
-
-	if (likely(page)) {
-		tsk->stack = kasan_reset_tag(page_address(page));
-		return tsk->stack;
-	}
-	return NULL;
-#endif
 }
 
-static inline void free_thread_stack(struct task_struct *tsk)
+static void free_thread_stack(struct task_struct *tsk)
 {
-#ifdef CONFIG_VMAP_STACK
 	struct vm_struct *vm = task_stack_vm_area(tsk);
+	int i;
 
-	if (vm) {
-		int i;
+	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+		memcg_kmem_uncharge_page(vm->pages[i], 0);
 
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
-			memcg_kmem_uncharge_page(vm->pages[i], 0);
+	for (i = 0; i < NR_CACHED_STACKS; i++) {
+		if (this_cpu_cmpxchg(cached_stacks[i], NULL,
+				     tsk->stack_vm_area) != NULL)
+			continue;
 
-		for (i = 0; i < NR_CACHED_STACKS; i++) {
-			if (this_cpu_cmpxchg(cached_stacks[i],
-					NULL, tsk->stack_vm_area) != NULL)
-				continue;
+		tsk->stack = NULL;
+		tsk->stack_vm_area = NULL;
+		return;
+	}
+	vfree_atomic(tsk->stack);
+	tsk->stack = NULL;
+	tsk->stack_vm_area = NULL;
+}
 
-			return;
-		}
+#  else /* !CONFIG_VMAP_STACK */
 
-		vfree_atomic(tsk->stack);
-		return;
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+					     THREAD_SIZE_ORDER);
+
+	if (likely(page)) {
+		tsk->stack = kasan_reset_tag(page_address(page));
+		return tsk->stack;
 	}
-#endif
+	return NULL;
+}
 
+static void free_thread_stack(struct task_struct *tsk)
+{
 	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+	tsk->stack = NULL;
 }
-# else
+
+#  endif /* CONFIG_VMAP_STACK */
+# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
+
 static struct kmem_cache *thread_stack_cache;
 
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
@@ -312,6 +318,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 static void free_thread_stack(struct task_struct *tsk)
 {
 	kmem_cache_free(thread_stack_cache, tsk->stack);
+	tsk->stack = NULL;
 }
 
 void thread_stack_cache_init(void)
@@ -321,8 +328,9 @@ void thread_stack_cache_init(void)
 					THREAD_SIZE, NULL);
 	BUG_ON(thread_stack_cache == NULL);
 }
-# endif
-#endif
+
+# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
 static struct kmem_cache *signal_cachep;
@@ -432,10 +440,6 @@ static void release_task_stack(struct task_struct *tsk)
 
 	account_kernel_stack(tsk, -1);
 	free_thread_stack(tsk);
-	tsk->stack = NULL;
-#ifdef CONFIG_VMAP_STACK
-	tsk->stack_vm_area = NULL;
-#endif
 }
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-- 
GitLab


From 546c42b2c5c161619736dd730d3df709181999d0 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:00 +0100
Subject: [PATCH 0575/1586] fork: Duplicate task_struct before stack allocation

alloc_thread_stack_node() already populates the task_struct::stack
member except on IA64. The stack pointer is saved and populated again
because IA64 needs it and arch_dup_task_struct() overwrites it.

Allocate thread's stack after task_struct has been duplicated as a
preparation for further changes.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-3-bigeasy@linutronix.de
---
 kernel/fork.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index f5cc10164334a..30c01ce2ae57d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -888,6 +888,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!tsk)
 		return NULL;
 
+	err = arch_dup_task_struct(tsk, orig);
+	if (err)
+		goto free_tsk;
+
 	stack = alloc_thread_stack_node(tsk, node);
 	if (!stack)
 		goto free_tsk;
@@ -897,8 +901,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	stack_vm_area = task_stack_vm_area(tsk);
 
-	err = arch_dup_task_struct(tsk, orig);
-
 	/*
 	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
 	 * sure they're properly initialized before using any stack-related
@@ -912,9 +914,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
 
-	if (err)
-		goto free_stack;
-
 	err = scs_prepare(tsk, node);
 	if (err)
 		goto free_stack;
-- 
GitLab


From 2bb0529c0bc0698f3baf3e88ffd61a18eef252a7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:01 +0100
Subject: [PATCH 0576/1586] fork, IA64: Provide alloc_thread_stack_node() for
 IA64

Provide a generic alloc_thread_stack_node() for IA64 and
CONFIG_ARCH_THREAD_STACK_ALLOCATOR which returns stack pointer and sets
task_struct::stack so it behaves exactly like the other implementations.

Rename IA64's alloc_thread_stack_node() and add the generic version to the
fork code so it is in one place _and_ to drastically lower the chances of
fat fingering the IA64 code.  Do the same for free_thread_stack().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-4-bigeasy@linutronix.de
---
 arch/ia64/include/asm/thread_info.h |  6 +++---
 kernel/fork.c                       | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 51d20cb377062..1684716f08201 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -55,15 +55,15 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_stack_node(tsk, node)	\
+#define arch_alloc_thread_stack_node(tsk, node)	\
 		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
+#define arch_alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_stack(tsk)	/* nothing */
+#define arch_free_thread_stack(tsk)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/kernel/fork.c b/kernel/fork.c
index 30c01ce2ae57d..7b70c47410720 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -330,6 +330,23 @@ void thread_stack_cache_init(void)
 }
 
 # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
+#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+{
+	unsigned long *stack;
+
+	stack = arch_alloc_thread_stack_node(tsk, node);
+	tsk->stack = stack;
+	return stack;
+}
+
+static void free_thread_stack(struct task_struct *tsk)
+{
+	arch_free_thread_stack(tsk);
+	tsk->stack = NULL;
+}
+
 #endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
-- 
GitLab


From 7865aba3ade4cf30f0ac08e015550084a50d9afb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:02 +0100
Subject: [PATCH 0577/1586] fork: Don't assign the stack pointer in
 dup_task_struct()

All four versions of alloc_thread_stack_node() assign now
task_struct::stack in case the allocation was successful.

Let alloc_thread_stack_node() return an error code instead of the stack
pointer and remove the stack assignment in dup_task_struct().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-5-bigeasy@linutronix.de
---
 kernel/fork.c | 47 ++++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 7b70c47410720..875bd43f02ca8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,7 +211,7 @@ static int free_vm_stack_cache(unsigned int cpu)
 	return 0;
 }
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	void *stack;
 	int i;
@@ -232,7 +232,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 
 		tsk->stack_vm_area = s;
 		tsk->stack = s->addr;
-		return s->addr;
+		return 0;
 	}
 
 	/*
@@ -245,17 +245,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 				     THREADINFO_GFP & ~__GFP_ACCOUNT,
 				     PAGE_KERNEL,
 				     0, node, __builtin_return_address(0));
-
+	if (!stack)
+		return -ENOMEM;
 	/*
 	 * We can't call find_vm_area() in interrupt context, and
 	 * free_thread_stack() can be called in interrupt context,
 	 * so cache the vm_struct.
 	 */
-	if (stack) {
-		tsk->stack_vm_area = find_vm_area(stack);
-		tsk->stack = stack;
-	}
-	return stack;
+	tsk->stack_vm_area = find_vm_area(stack);
+	tsk->stack = stack;
+	return 0;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
@@ -282,16 +281,16 @@ static void free_thread_stack(struct task_struct *tsk)
 
 #  else /* !CONFIG_VMAP_STACK */
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);
 
 	if (likely(page)) {
 		tsk->stack = kasan_reset_tag(page_address(page));
-		return tsk->stack;
+		return 0;
 	}
-	return NULL;
+	return -ENOMEM;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
@@ -305,14 +304,13 @@ static void free_thread_stack(struct task_struct *tsk)
 
 static struct kmem_cache *thread_stack_cache;
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
-						  int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	unsigned long *stack;
 	stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 	stack = kasan_reset_tag(stack);
 	tsk->stack = stack;
-	return stack;
+	return stack ? 0 : -ENOMEM;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
@@ -332,13 +330,13 @@ void thread_stack_cache_init(void)
 # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
 #else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
 
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	unsigned long *stack;
 
 	stack = arch_alloc_thread_stack_node(tsk, node);
 	tsk->stack = stack;
-	return stack;
+	return stack ? 0 : -ENOMEM;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
@@ -895,8 +893,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
-	unsigned long *stack;
-	struct vm_struct *stack_vm_area __maybe_unused;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -909,24 +905,13 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (err)
 		goto free_tsk;
 
-	stack = alloc_thread_stack_node(tsk, node);
-	if (!stack)
+	err = alloc_thread_stack_node(tsk, node);
+	if (err)
 		goto free_tsk;
 
 	if (memcg_charge_kernel_stack(tsk))
 		goto free_stack;
 
-	stack_vm_area = task_stack_vm_area(tsk);
-
-	/*
-	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
-	 * sure they're properly initialized before using any stack-related
-	 * functions again.
-	 */
-	tsk->stack = stack;
-#ifdef CONFIG_VMAP_STACK
-	tsk->stack_vm_area = stack_vm_area;
-#endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
-- 
GitLab


From f1c1a9ee00e4c53c9ccc03ec1aff4792948a25eb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:03 +0100
Subject: [PATCH 0578/1586] fork: Move memcg_charge_kernel_stack() into
 CONFIG_VMAP_STACK

memcg_charge_kernel_stack() is only used in the CONFIG_VMAP_STACK case.

Move memcg_charge_kernel_stack() into the CONFIG_VMAP_STACK block and
invoke it from within alloc_thread_stack_node().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-6-bigeasy@linutronix.de
---
 kernel/fork.c | 69 +++++++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 875bd43f02ca8..ac63e7fa88165 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,6 +211,32 @@ static int free_vm_stack_cache(unsigned int cpu)
 	return 0;
 }
 
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+	struct vm_struct *vm = task_stack_vm_area(tsk);
+	int i;
+	int ret;
+
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+	BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+		ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
+		if (ret)
+			goto err;
+	}
+	return 0;
+err:
+	/*
+	 * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
+	 * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
+	 * ignore this page.
+	 */
+	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+		memcg_kmem_uncharge_page(vm->pages[i], 0);
+	return ret;
+}
+
 static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	void *stack;
@@ -230,6 +256,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 		/* Clear stale pointers from reused stack. */
 		memset(s->addr, 0, THREAD_SIZE);
 
+		if (memcg_charge_kernel_stack(tsk)) {
+			vfree(s->addr);
+			return -ENOMEM;
+		}
+
 		tsk->stack_vm_area = s;
 		tsk->stack = s->addr;
 		return 0;
@@ -247,6 +278,11 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 				     0, node, __builtin_return_address(0));
 	if (!stack)
 		return -ENOMEM;
+
+	if (memcg_charge_kernel_stack(tsk)) {
+		vfree(stack);
+		return -ENOMEM;
+	}
 	/*
 	 * We can't call find_vm_area() in interrupt context, and
 	 * free_thread_stack() can be called in interrupt context,
@@ -418,36 +454,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	}
 }
 
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
-{
-#ifdef CONFIG_VMAP_STACK
-	struct vm_struct *vm = task_stack_vm_area(tsk);
-	int ret;
-
-	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
-	if (vm) {
-		int i;
-
-		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-			/*
-			 * If memcg_kmem_charge_page() fails, page's
-			 * memory cgroup pointer is NULL, and
-			 * memcg_kmem_uncharge_page() in free_thread_stack()
-			 * will ignore this page.
-			 */
-			ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
-						     0);
-			if (ret)
-				return ret;
-		}
-	}
-#endif
-	return 0;
-}
-
 static void release_task_stack(struct task_struct *tsk)
 {
 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
@@ -909,9 +915,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (err)
 		goto free_tsk;
 
-	if (memcg_charge_kernel_stack(tsk))
-		goto free_stack;
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
-- 
GitLab


From 1a03d3f13ffe5dd24142d6db629e72c11b704d99 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:04 +0100
Subject: [PATCH 0579/1586] fork: Move task stack accounting to do_exit()

There is no need to perform the stack accounting of the outgoing task in
its final schedule() invocation which happens with preemption disabled.
The task is leaving, the resources will be freed and the accounting can
happen in do_exit() before the actual schedule invocation which
frees the stack memory.

Move the accounting of the stack memory from release_task_stack() to
exit_task_stack_account() which then can be invoked from do_exit().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de
---
 include/linux/sched/task_stack.h |  2 ++
 kernel/exit.c                    |  1 +
 kernel/fork.c                    | 35 +++++++++++++++++++++-----------
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d819..892562ebbd3aa 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
 static inline void put_task_stack(struct task_struct *tsk) {}
 #endif
 
+void exit_task_stack_account(struct task_struct *tsk);
+
 #define task_stack_end_corrupted(task) \
 		(*(end_of_stack(task)) != STACK_END_MAGIC)
 
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab93..c303cffe7fdb4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
 		put_page(tsk->task_frag.page);
 
 	validate_creds_for_do_exit(tsk);
+	exit_task_stack_account(tsk);
 
 	check_stack_usage();
 	preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index ac63e7fa88165..25828127db8dc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,9 +211,8 @@ static int free_vm_stack_cache(unsigned int cpu)
 	return 0;
 }
 
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
 {
-	struct vm_struct *vm = task_stack_vm_area(tsk);
 	int i;
 	int ret;
 
@@ -239,6 +238,7 @@ err:
 
 static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
+	struct vm_struct *vm;
 	void *stack;
 	int i;
 
@@ -256,7 +256,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 		/* Clear stale pointers from reused stack. */
 		memset(s->addr, 0, THREAD_SIZE);
 
-		if (memcg_charge_kernel_stack(tsk)) {
+		if (memcg_charge_kernel_stack(s)) {
 			vfree(s->addr);
 			return -ENOMEM;
 		}
@@ -279,7 +279,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 	if (!stack)
 		return -ENOMEM;
 
-	if (memcg_charge_kernel_stack(tsk)) {
+	vm = find_vm_area(stack);
+	if (memcg_charge_kernel_stack(vm)) {
 		vfree(stack);
 		return -ENOMEM;
 	}
@@ -288,19 +289,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 	 * free_thread_stack() can be called in interrupt context,
 	 * so cache the vm_struct.
 	 */
-	tsk->stack_vm_area = find_vm_area(stack);
+	tsk->stack_vm_area = vm;
 	tsk->stack = stack;
 	return 0;
 }
 
 static void free_thread_stack(struct task_struct *tsk)
 {
-	struct vm_struct *vm = task_stack_vm_area(tsk);
 	int i;
 
-	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
-		memcg_kmem_uncharge_page(vm->pages[i], 0);
-
 	for (i = 0; i < NR_CACHED_STACKS; i++) {
 		if (this_cpu_cmpxchg(cached_stacks[i], NULL,
 				     tsk->stack_vm_area) != NULL)
@@ -454,12 +451,25 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	}
 }
 
+void exit_task_stack_account(struct task_struct *tsk)
+{
+	account_kernel_stack(tsk, -1);
+
+	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+		struct vm_struct *vm;
+		int i;
+
+		vm = task_stack_vm_area(tsk);
+		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+			memcg_kmem_uncharge_page(vm->pages[i], 0);
+	}
+}
+
 static void release_task_stack(struct task_struct *tsk)
 {
 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
 		return;  /* Better to leak the stack than to free prematurely */
 
-	account_kernel_stack(tsk, -1);
 	free_thread_stack(tsk);
 }
 
@@ -918,6 +928,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
+	account_kernel_stack(tsk, 1);
 
 	err = scs_prepare(tsk, node);
 	if (err)
@@ -961,8 +972,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->wake_q.next = NULL;
 	tsk->worker_private = NULL;
 
-	account_kernel_stack(tsk, 1);
-
 	kcov_task_init(tsk);
 	kmap_local_fork(tsk);
 
@@ -981,6 +990,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	return tsk;
 
 free_stack:
+	exit_task_stack_account(tsk);
 	free_thread_stack(tsk);
 free_tsk:
 	free_task_struct(tsk);
@@ -2459,6 +2469,7 @@ bad_fork_cleanup_count:
 	exit_creds(p);
 bad_fork_free:
 	WRITE_ONCE(p->__state, TASK_DEAD);
+	exit_task_stack_account(p);
 	put_task_stack(p);
 	delayed_free_task(p);
 fork_out:
-- 
GitLab


From e540bf3162e822d7a1f07e69e3bb1b4f925ca368 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:05 +0100
Subject: [PATCH 0580/1586] fork: Only cache the VMAP stack in
 finish_task_switch()

The task stack could be deallocated later, but for fork()/exec() kind of
workloads (say a shell script executing several commands) it is important
that the stack is released in finish_task_switch() so that in VMAP_STACK
case it can be cached and reused in the new task.

For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could
be avoided in the scheduling path. Far worse are the other
free_thread_stack() implementations which invoke __free_pages()/
kmem_cache_free() with disabled preemption.

Cache the stack in free_thread_stack() in the VMAP_STACK case and
RCU-delay the free path otherwise. Free the stack in the RCU callback.
In the VMAP_STACK case this is another opportunity to fill the cache.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-8-bigeasy@linutronix.de
---
 kernel/fork.c | 76 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 63 insertions(+), 13 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 25828127db8dc..177bc64078cd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk)
 #define NR_CACHED_STACKS 2
 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
 
+struct vm_stack {
+	struct rcu_head rcu;
+	struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_CACHED_STACKS; i++) {
+		if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+	if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+		return;
+
+	vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct vm_stack *vm_stack = tsk->stack;
+
+	vm_stack->stack_vm_area = tsk->stack_vm_area;
+	call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
 static int free_vm_stack_cache(unsigned int cpu)
 {
 	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 
 static void free_thread_stack(struct task_struct *tsk)
 {
-	int i;
+	if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+		thread_stack_delayed_free(tsk);
 
-	for (i = 0; i < NR_CACHED_STACKS; i++) {
-		if (this_cpu_cmpxchg(cached_stacks[i], NULL,
-				     tsk->stack_vm_area) != NULL)
-			continue;
-
-		tsk->stack = NULL;
-		tsk->stack_vm_area = NULL;
-		return;
-	}
-	vfree_atomic(tsk->stack);
 	tsk->stack = NULL;
 	tsk->stack_vm_area = NULL;
 }
 
 #  else /* !CONFIG_VMAP_STACK */
 
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	__free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct rcu_head *rh = tsk->stack;
+
+	call_rcu(rh, thread_stack_free_rcu);
+}
+
 static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 
 static void free_thread_stack(struct task_struct *tsk)
 {
-	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+	thread_stack_delayed_free(tsk);
 	tsk->stack = NULL;
 }
 
@@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk)
 
 static struct kmem_cache *thread_stack_cache;
 
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+	kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+	struct rcu_head *rh = tsk->stack;
+
+	call_rcu(rh, thread_stack_free_rcu);
+}
+
 static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 	unsigned long *stack;
@@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 
 static void free_thread_stack(struct task_struct *tsk)
 {
-	kmem_cache_free(thread_stack_cache, tsk->stack);
+	thread_stack_delayed_free(tsk);
 	tsk->stack = NULL;
 }
 
-- 
GitLab


From 0ce055f85335e48bc571114d61a70ae217039362 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Feb 2022 11:24:06 +0100
Subject: [PATCH 0581/1586] fork: Use IS_ENABLED() in account_kernel_stack()

Not strickly needed but checking CONFIG_VMAP_STACK instead of
task_stack_vm_area()' result allows the compiler the remove the else
path in the CONFIG_VMAP_STACK case where the pointer can't be NULL.

Check for CONFIG_VMAP_STACK in order to use the proper path.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220217102406.3697941-9-bigeasy@linutronix.de
---
 kernel/fork.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 177bc64078cd5..1279b57c4ad9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,16 +485,16 @@ void vm_area_free(struct vm_area_struct *vma)
 
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
-	void *stack = task_stack_page(tsk);
-	struct vm_struct *vm = task_stack_vm_area(tsk);
-
-	if (vm) {
+	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+		struct vm_struct *vm = task_stack_vm_area(tsk);
 		int i;
 
 		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
 			mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
 					      account * (PAGE_SIZE / 1024));
 	} else {
+		void *stack = task_stack_page(tsk);
+
 		/* All stack pages are in the same node. */
 		mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
 				      account * (THREAD_SIZE / 1024));
-- 
GitLab


From 97e58e395e9c074fd096dad13c54e9f4112cf71d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 17 Feb 2022 15:22:29 +0800
Subject: [PATCH 0582/1586] arm64: move AARCH64_BREAK_FAULT into insn-def.h

If CONFIG_ARM64_LSE_ATOMICS is off, encoders for LSE-related instructions
can return AARCH64_BREAK_FAULT directly in insn.h. In order to access
AARCH64_BREAK_FAULT in insn.h, we can not include debug-monitors.h in
insn.h, because debug-monitors.h has already depends on insn.h, so just
move AARCH64_BREAK_FAULT into insn-def.h.

It will be used by the following patch to eliminate unnecessary LSE-related
encoders when CONFIG_ARM64_LSE_ATOMICS is off.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220217072232.1186625-2-houtao1@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/debug-monitors.h | 12 ------------
 arch/arm64/include/asm/insn-def.h       | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 657c921fd784a..00c291067e57d 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -34,18 +34,6 @@
  */
 #define BREAK_INSTR_SIZE		AARCH64_INSN_SIZE
 
-/*
- * BRK instruction encoding
- * The #imm16 value should be placed at bits[20:5] within BRK ins
- */
-#define AARCH64_BREAK_MON	0xd4200000
-
-/*
- * BRK instruction for provoking a fault on purpose
- * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
- */
-#define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
-
 #define AARCH64_BREAK_KGDB_DYN_DBG	\
 	(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
 
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
index 2c075f615c6ac..1a7d0d483698e 100644
--- a/arch/arm64/include/asm/insn-def.h
+++ b/arch/arm64/include/asm/insn-def.h
@@ -3,7 +3,21 @@
 #ifndef __ASM_INSN_DEF_H
 #define __ASM_INSN_DEF_H
 
+#include <asm/brk-imm.h>
+
 /* A64 instructions are always 32 bits. */
 #define	AARCH64_INSN_SIZE		4
 
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON	0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT	(AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
 #endif /* __ASM_INSN_DEF_H */
-- 
GitLab


From fa1114d9eba5087ba5e81aab4c56f546995e6cd3 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 17 Feb 2022 15:22:30 +0800
Subject: [PATCH 0583/1586] arm64: insn: add encoders for atomic operations

It is a preparation patch for eBPF atomic supports under arm64. eBPF
needs support atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and
atomic[64]_{xchg|cmpxchg}. The ordering semantics of eBPF atomics are
the same with the implementations in linux kernel.

Add three helpers to support LDCLR/LDEOR/LDSET/SWP, CAS and DMB
instructions. STADD/STCLR/STEOR/STSET are simply encoded as aliases for
LDADD/LDCLR/LDEOR/LDSET with XZR as the destination register, so no extra
helper is added. atomic_fetch_add() and other atomic ops needs support for
STLXR instruction, so extend enum aarch64_insn_ldst_type to do that.

LDADD/LDEOR/LDSET/SWP and CAS instructions are only available when LSE
atomics is enabled, so just return AARCH64_BREAK_FAULT directly in
these newly-added helpers if CONFIG_ARM64_LSE_ATOMICS is disabled.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220217072232.1186625-3-houtao1@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h |  80 +++++++++++++--
 arch/arm64/lib/insn.c         | 185 +++++++++++++++++++++++++++++++---
 arch/arm64/net/bpf_jit.h      |  11 +-
 3 files changed, 253 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 6b776c8667b20..0b6b31307e684 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -205,7 +205,9 @@ enum aarch64_insn_ldst_type {
 	AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
 	AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
 	AARCH64_INSN_LDST_LOAD_EX,
+	AARCH64_INSN_LDST_LOAD_ACQ_EX,
 	AARCH64_INSN_LDST_STORE_EX,
+	AARCH64_INSN_LDST_STORE_REL_EX,
 };
 
 enum aarch64_insn_adsb_type {
@@ -280,6 +282,36 @@ enum aarch64_insn_adr_type {
 	AARCH64_INSN_ADR_TYPE_ADR,
 };
 
+enum aarch64_insn_mem_atomic_op {
+	AARCH64_INSN_MEM_ATOMIC_ADD,
+	AARCH64_INSN_MEM_ATOMIC_CLR,
+	AARCH64_INSN_MEM_ATOMIC_EOR,
+	AARCH64_INSN_MEM_ATOMIC_SET,
+	AARCH64_INSN_MEM_ATOMIC_SWP,
+};
+
+enum aarch64_insn_mem_order_type {
+	AARCH64_INSN_MEM_ORDER_NONE,
+	AARCH64_INSN_MEM_ORDER_ACQ,
+	AARCH64_INSN_MEM_ORDER_REL,
+	AARCH64_INSN_MEM_ORDER_ACQREL,
+};
+
+enum aarch64_insn_mb_type {
+	AARCH64_INSN_MB_SY,
+	AARCH64_INSN_MB_ST,
+	AARCH64_INSN_MB_LD,
+	AARCH64_INSN_MB_ISH,
+	AARCH64_INSN_MB_ISHST,
+	AARCH64_INSN_MB_ISHLD,
+	AARCH64_INSN_MB_NSH,
+	AARCH64_INSN_MB_NSHST,
+	AARCH64_INSN_MB_NSHLD,
+	AARCH64_INSN_MB_OSH,
+	AARCH64_INSN_MB_OSHST,
+	AARCH64_INSN_MB_OSHLD,
+};
+
 #define	__AARCH64_INSN_FUNCS(abbr, mask, val)				\
 static __always_inline bool aarch64_insn_is_##abbr(u32 code)		\
 {									\
@@ -303,6 +335,11 @@ __AARCH64_INSN_FUNCS(store_post,	0x3FE00C00, 0x38000400)
 __AARCH64_INSN_FUNCS(load_post,	0x3FE00C00, 0x38400400)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
 __AARCH64_INSN_FUNCS(ldadd,	0x3F20FC00, 0x38200000)
+__AARCH64_INSN_FUNCS(ldclr,	0x3F20FC00, 0x38201000)
+__AARCH64_INSN_FUNCS(ldeor,	0x3F20FC00, 0x38202000)
+__AARCH64_INSN_FUNCS(ldset,	0x3F20FC00, 0x38203000)
+__AARCH64_INSN_FUNCS(swp,	0x3F20FC00, 0x38208000)
+__AARCH64_INSN_FUNCS(cas,	0x3FA07C00, 0x08A07C00)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
 __AARCH64_INSN_FUNCS(ldr_lit,	0xBF000000, 0x18000000)
 __AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
@@ -474,13 +511,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 				   enum aarch64_insn_register state,
 				   enum aarch64_insn_size_type size,
 				   enum aarch64_insn_ldst_type type);
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
-			   enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size);
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size);
 u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 				 enum aarch64_insn_register src,
 				 int imm, enum aarch64_insn_variant variant,
@@ -541,6 +571,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
 			      enum aarch64_insn_prfm_type type,
 			      enum aarch64_insn_prfm_target target,
 			      enum aarch64_insn_prfm_policy policy);
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order);
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order);
+#else
+static inline
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order)
+{
+	return AARCH64_BREAK_FAULT;
+}
+
+static inline
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order)
+{
+	return AARCH64_BREAK_FAULT;
+}
+#endif
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+
 s32 aarch64_get_branch_offset(u32 insn);
 u32 aarch64_set_branch_offset(u32 insn, s32 offset);
 
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index e485cd735261c..5e90887deec4a 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 
 	switch (type) {
 	case AARCH64_INSN_LDST_LOAD_EX:
+	case AARCH64_INSN_LDST_LOAD_ACQ_EX:
 		insn = aarch64_insn_get_load_ex_value();
+		if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX)
+			insn |= BIT(15);
 		break;
 	case AARCH64_INSN_LDST_STORE_EX:
+	case AARCH64_INSN_LDST_STORE_REL_EX:
 		insn = aarch64_insn_get_store_ex_value();
+		if (type == AARCH64_INSN_LDST_STORE_REL_EX)
+			insn |= BIT(15);
 		break;
 	default:
 		pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
@@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
 					    state);
 }
 
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
-			   enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type,
+					  u32 insn)
 {
-	u32 insn = aarch64_insn_get_ldadd_value();
+	u32 order;
+
+	switch (type) {
+	case AARCH64_INSN_MEM_ORDER_NONE:
+		order = 0;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQ:
+		order = 2;
+		break;
+	case AARCH64_INSN_MEM_ORDER_REL:
+		order = 1;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQREL:
+		order = 3;
+		break;
+	default:
+		pr_err("%s: unknown mem order %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn &= ~GENMASK(23, 22);
+	insn |= order << 22;
+
+	return insn;
+}
+
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+				  enum aarch64_insn_register address,
+				  enum aarch64_insn_register value,
+				  enum aarch64_insn_size_type size,
+				  enum aarch64_insn_mem_atomic_op op,
+				  enum aarch64_insn_mem_order_type order)
+{
+	u32 insn;
+
+	switch (op) {
+	case AARCH64_INSN_MEM_ATOMIC_ADD:
+		insn = aarch64_insn_get_ldadd_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_CLR:
+		insn = aarch64_insn_get_ldclr_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_EOR:
+		insn = aarch64_insn_get_ldeor_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_SET:
+		insn = aarch64_insn_get_ldset_value();
+		break;
+	case AARCH64_INSN_MEM_ATOMIC_SWP:
+		insn = aarch64_insn_get_swp_value();
+		break;
+	default:
+		pr_err("%s: unimplemented mem atomic op %d\n", __func__, op);
+		return AARCH64_BREAK_FAULT;
+	}
 
 	switch (size) {
 	case AARCH64_INSN_SIZE_32:
@@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
 
 	insn = aarch64_insn_encode_ldst_size(size, insn);
 
+	insn = aarch64_insn_encode_ldst_order(order, insn);
+
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
 					    result);
 
@@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
 					    value);
 }
 
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
-			   enum aarch64_insn_register value,
-			   enum aarch64_insn_size_type size)
+static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type,
+					 u32 insn)
 {
-	/*
-	 * STADD is simply encoded as an alias for LDADD with XZR as
-	 * the destination register.
-	 */
-	return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
-				      value, size);
+	u32 order;
+
+	switch (type) {
+	case AARCH64_INSN_MEM_ORDER_NONE:
+		order = 0;
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQ:
+		order = BIT(22);
+		break;
+	case AARCH64_INSN_MEM_ORDER_REL:
+		order = BIT(15);
+		break;
+	case AARCH64_INSN_MEM_ORDER_ACQREL:
+		order = BIT(15) | BIT(22);
+		break;
+	default:
+		pr_err("%s: unknown mem order %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn &= ~(BIT(15) | BIT(22));
+	insn |= order;
+
+	return insn;
+}
+
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+			 enum aarch64_insn_register address,
+			 enum aarch64_insn_register value,
+			 enum aarch64_insn_size_type size,
+			 enum aarch64_insn_mem_order_type order)
+{
+	u32 insn;
+
+	switch (size) {
+	case AARCH64_INSN_SIZE_32:
+	case AARCH64_INSN_SIZE_64:
+		break;
+	default:
+		pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_get_cas_value();
+
+	insn = aarch64_insn_encode_ldst_size(size, insn);
+
+	insn = aarch64_insn_encode_cas_order(order, insn);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+					    result);
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+					    address);
+
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+					    value);
 }
+#endif
 
 static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
 					enum aarch64_insn_prfm_target target,
@@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
 	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
 }
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+	u32 opt;
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_MB_SY:
+		opt = 0xf;
+		break;
+	case AARCH64_INSN_MB_ST:
+		opt = 0xe;
+		break;
+	case AARCH64_INSN_MB_LD:
+		opt = 0xd;
+		break;
+	case AARCH64_INSN_MB_ISH:
+		opt = 0xb;
+		break;
+	case AARCH64_INSN_MB_ISHST:
+		opt = 0xa;
+		break;
+	case AARCH64_INSN_MB_ISHLD:
+		opt = 0x9;
+		break;
+	case AARCH64_INSN_MB_NSH:
+		opt = 0x7;
+		break;
+	case AARCH64_INSN_MB_NSHST:
+		opt = 0x6;
+		break;
+	case AARCH64_INSN_MB_NSHLD:
+		opt = 0x5;
+		break;
+	default:
+		pr_err("%s: unknown dmb type %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_get_dmb_value();
+	insn &= ~GENMASK(11, 8);
+	insn |= (opt << 8);
+
+	return insn;
+}
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index cc0cf0f5c7c3b..9d9250c7cc729 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -89,9 +89,16 @@
 #define A64_STXR(sf, Rt, Rn, Rs) \
 	A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
 
-/* LSE atomics */
+/*
+ * LSE atomics
+ *
+ * STADD is simply encoded as an alias for LDADD with XZR as
+ * the destination register.
+ */
 #define A64_STADD(sf, Rn, Rs) \
-	aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
+	aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
+		A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \
+		AARCH64_INSN_MEM_ORDER_NONE)
 
 /* Add/subtract (immediate) */
 #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
-- 
GitLab


From f9b5e46f4097eb298f68e5b02f70697a90a44739 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 18 Feb 2022 17:29:44 -0800
Subject: [PATCH 0584/1586] kasan: split kasan_*enabled() functions into a
 separate header

In an upcoming commit we are going to need to call
kasan_hw_tags_enabled() from arch/arm64/include/asm/mte.h. This
would create a circular dependency between headers if KASAN_GENERIC
or KASAN_SW_TAGS is enabled: linux/kasan.h -> linux/pgtable.h ->
asm/pgtable.h -> asm/mte.h -> linux/kasan.h. Break the cycle
by introducing a new header linux/kasan-enabled.h with the
kasan_*enabled() functions that can be included from asm/mte.h.

Link: https://linux-review.googlesource.com/id/I5b0d96c6ed0026fc790899e14d42b2fac6ab568e
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20220219012945.894950-1-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/kasan-enabled.h | 33 +++++++++++++++++++++++++++++++++
 include/linux/kasan.h         | 23 +----------------------
 2 files changed, 34 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/kasan-enabled.h

diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h
new file mode 100644
index 0000000000000..4b6615375022f
--- /dev/null
+++ b/include/linux/kasan-enabled.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_ENABLED_H
+#define _LINUX_KASAN_ENABLED_H
+
+#ifdef CONFIG_KASAN_HW_TAGS
+
+DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
+
+static __always_inline bool kasan_enabled(void)
+{
+	return static_branch_likely(&kasan_flag_enabled);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+	return kasan_enabled();
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool kasan_enabled(void)
+{
+	return IS_ENABLED(CONFIG_KASAN);
+}
+
+static inline bool kasan_hw_tags_enabled(void)
+{
+	return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#endif /* LINUX_KASAN_ENABLED_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4a45562d88937..b6a93261c92a0 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -3,6 +3,7 @@
 #define _LINUX_KASAN_H
 
 #include <linux/bug.h>
+#include <linux/kasan-enabled.h>
 #include <linux/kernel.h>
 #include <linux/static_key.h>
 #include <linux/types.h>
@@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {}
 
 #ifdef CONFIG_KASAN_HW_TAGS
 
-DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
-
-static __always_inline bool kasan_enabled(void)
-{
-	return static_branch_likely(&kasan_flag_enabled);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
-	return kasan_enabled();
-}
-
 void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags);
 void kasan_free_pages(struct page *page, unsigned int order);
 
 #else /* CONFIG_KASAN_HW_TAGS */
 
-static inline bool kasan_enabled(void)
-{
-	return IS_ENABLED(CONFIG_KASAN);
-}
-
-static inline bool kasan_hw_tags_enabled(void)
-{
-	return false;
-}
-
 static __always_inline void kasan_alloc_pages(struct page *page,
 					      unsigned int order, gfp_t flags)
 {
-- 
GitLab


From 38ddf7dafaeaf3fcdea65b3b4dfb06b4bcd9cc15 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 18 Feb 2022 17:29:45 -0800
Subject: [PATCH 0585/1586] arm64: mte: avoid clearing PSTATE.TCO on entry
 unless necessary

On some microarchitectures, clearing PSTATE.TCO is expensive. Clearing
TCO is only necessary if in-kernel MTE is enabled, or if MTE is
enabled in the userspace process in synchronous (or, soon, asymmetric)
mode, because we do not report uaccess faults to userspace in none
or asynchronous modes. Therefore, adjust the kernel entry code to
clear TCO only if necessary.

Because it is now possible to switch to a task in which TCO needs to
be clear from a task in which TCO is set, we also need to do the same
thing on task switch.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I52d82a580bd0500d420be501af2c35fa8c90729e
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220219012945.894950-2-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h     | 22 ++++++++++++++++++++++
 arch/arm64/kernel/entry-common.c |  3 +++
 arch/arm64/kernel/entry.S        |  7 -------
 arch/arm64/kernel/mte.c          |  3 +++
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 075539f5f1c88..adcb937342f14 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -11,7 +11,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/bitfield.h>
+#include <linux/kasan-enabled.h>
 #include <linux/page-flags.h>
+#include <linux/sched.h>
 #include <linux/types.h>
 
 #include <asm/pgtable-types.h>
@@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
 
 #endif /* CONFIG_ARM64_MTE */
 
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+	if (!system_supports_mte())
+		return;
+
+	/*
+	 * Re-enable tag checking (TCO set on exception entry). This is only
+	 * necessary if MTE is enabled in either the kernel or the userspace
+	 * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
+	 * for both). With MTE disabled in the kernel and disabled or
+	 * asynchronous in userspace, tag check faults (including in uaccesses)
+	 * are not reported, therefore there is no need to re-enable checking.
+	 * This is beneficial on microarchitectures where re-enabling TCO is
+	 * expensive.
+	 */
+	if (kasan_hw_tags_enabled() ||
+	    (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
+		asm volatile(SET_PSTATE_TCO(0));
+}
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ef7fcefb96bd1..7093b578e3250 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/context_tracking.h>
+#include <linux/kasan.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
 #include <linux/ptrace.h>
@@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
 {
 	__enter_from_kernel_mode(regs);
 	mte_check_tfsr_entry();
+	mte_disable_tco_entry(current);
 }
 
 /*
@@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void)
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit_irqoff();
 	trace_hardirqs_off_finish();
+	mte_disable_tco_entry(current);
 }
 
 static __always_inline void enter_from_user_mode(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 772ec2ecf4888..e1013a83d4f01 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -308,13 +308,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	msr_s	SYS_ICC_PMR_EL1, x20
 alternative_else_nop_endif
 
-	/* Re-enable tag checking (TCO set on exception entry) */
-#ifdef CONFIG_ARM64_MTE
-alternative_if ARM64_MTE
-	SET_PSTATE_TCO(0)
-alternative_else_nop_endif
-#endif
-
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f418ebc65f950..f983795b5eda2 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -253,6 +253,9 @@ void mte_thread_switch(struct task_struct *next)
 	mte_update_sctlr_user(next);
 	mte_update_gcr_excl(next);
 
+	/* TCO may not have been disabled on exception entry for the current task. */
+	mte_disable_tco_entry(next);
+
 	/*
 	 * Check if an async tag exception occurred at EL1.
 	 *
-- 
GitLab


From a8a733b20109fc85a5b2e0318cef036b2c818ac3 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 17 Feb 2022 10:22:37 +0530
Subject: [PATCH 0586/1586] arm64/hugetlb: Define __hugetlb_valid_size()

arch_hugetlb_valid_size() can be just factored out to create another helper
to be used in arch_hugetlb_migration_supported() as well. This just defines
__hugetlb_valid_size() for that purpose.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1645073557-6150-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/hugetlbpage.c | 38 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index ffb9c229610ab..a33aba91ad891 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void)
 }
 #endif /* CONFIG_CMA */
 
-#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
-bool arch_hugetlb_migration_supported(struct hstate *h)
+static bool __hugetlb_valid_size(unsigned long size)
 {
-	size_t pagesize = huge_page_size(h);
-
-	switch (pagesize) {
+	switch (size) {
 #ifndef __PAGETABLE_PMD_FOLDED
 	case PUD_SIZE:
 		return pud_sect_supported();
 #endif
-	case PMD_SIZE:
 	case CONT_PMD_SIZE:
+	case PMD_SIZE:
 	case CONT_PTE_SIZE:
 		return true;
 	}
-	pr_warn("%s: unrecognized huge page size 0x%lx\n",
-			__func__, pagesize);
+
 	return false;
 }
+
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h)
+{
+	size_t pagesize = huge_page_size(h);
+
+	if (!__hugetlb_valid_size(pagesize)) {
+		pr_warn("%s: unrecognized huge page size 0x%lx\n",
+			__func__, pagesize);
+		return false;
+	}
+	return true;
+}
 #endif
 
 int pmd_huge(pmd_t pmd)
@@ -506,16 +515,5 @@ arch_initcall(hugetlbpage_init);
 
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
-	switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
-	case PUD_SIZE:
-		return pud_sect_supported();
-#endif
-	case CONT_PMD_SIZE:
-	case PMD_SIZE:
-	case CONT_PTE_SIZE:
-		return true;
-	}
-
-	return false;
+	return __hugetlb_valid_size(size);
 }
-- 
GitLab


From f2544f5e6c691679d56bb38637d2f347075b36fa Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 22 Feb 2022 13:45:18 -0800
Subject: [PATCH 0587/1586] EVM: fix the evm= __setup handler return value

__setup() handlers should return 1 if the parameter is handled.
Returning 0 causes the entire string to be added to init's
environment strings (limited to 32 strings), unnecessarily polluting it.

Using the documented string "evm=fix" causes an Unknown parameter message:
  Unknown kernel command line parameters
  "BOOT_IMAGE=/boot/bzImage-517rc5 evm=fix", will be passed to user space.

and that string is added to init's environment string space:
  Run /sbin/init as init process
    with arguments:
     /sbin/init
    with environment:
     HOME=/
     TERM=linux
     BOOT_IMAGE=/boot/bzImage-517rc5
     evm=fix

With this change, using "evm=fix" acts as expected and an invalid
option ("evm=evm") causes a warning to be printed:
  evm: invalid "evm" mode
but init's environment is not polluted with this string, as expected.

Fixes: 7102ebcd65c1 ("evm: permit only valid security.evm xattrs to be updated")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 08f907382c618..7d87772f0ce68 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str)
 	else
 		pr_err("invalid \"%s\" mode", str);
 
-	return 0;
+	return 1;
 }
 __setup("evm=", evm_set_fixmode);
 
-- 
GitLab


From c8be60c12041145e663249af261286d402b4c5e3 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:17 +0800
Subject: [PATCH 0588/1586] cpupower: Add AMD P-State capability flag

Add AMD P-State capability flag in cpupower to indicate AMD new P-State
kernel module support on Ryzen processors.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/helpers.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 33ffacee7fcb9..b4813efdfb009 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -73,6 +73,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
 #define CPUPOWER_CAP_AMD_HW_PSTATE	0x00000100
 #define CPUPOWER_CAP_AMD_PSTATEDEF	0x00000200
 #define CPUPOWER_CAP_AMD_CPB_MSR	0x00000400
+#define CPUPOWER_CAP_AMD_PSTATE		0x00000800
 
 #define CPUPOWER_AMD_CPBDIS		0x02000000
 
-- 
GitLab


From 46c273a0958274f1e1e69f3540ae827a92e0660f Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:18 +0800
Subject: [PATCH 0589/1586] cpupower: Add the function to check AMD P-State
 enabled

The processor with AMD P-State function also supports legacy ACPI
hardware P-States feature as well. Once driver sets AMD P-State eanbled,
the processor will respond the finer grain AMD P-State feature instead of
legacy ACPI P-States. So it introduces the cpupower_amd_pstate_enabled()
to check whether the current kernel enables AMD P-State or AMD CPUFreq
module.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/helpers.h | 10 ++++++++++
 tools/power/cpupower/utils/helpers/misc.c    | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index b4813efdfb009..62771a0868712 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -11,6 +11,7 @@
 
 #include <libintl.h>
 #include <locale.h>
+#include <stdbool.h>
 
 #include "helpers/bitmask.h"
 #include <cpupower.h>
@@ -136,6 +137,12 @@ extern int decode_pstates(unsigned int cpu, int boost_states,
 
 extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
 				     int *active, int * states);
+
+/* AMD P-State stuff **************************/
+bool cpupower_amd_pstate_enabled(void);
+
+/* AMD P-State stuff **************************/
+
 /*
  * CPUID functions returning a single datum
  */
@@ -168,6 +175,9 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
 					    int *active, int * states)
 { return -1; }
 
+static inline bool cpupower_amd_pstate_enabled(void)
+{ return false; }
+
 /* cpuid and cpuinfo helpers  **************************/
 
 static inline unsigned int cpuid_eax(unsigned int op) { return 0; };
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index fc6e345117216..0c483cdefcc26 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -3,9 +3,11 @@
 #include <stdio.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "helpers/helpers.h"
 #include "helpers/sysfs.h"
+#include "cpufreq.h"
 
 #if defined(__i386__) || defined(__x86_64__)
 
@@ -83,6 +85,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
 	return 0;
 }
 
+bool cpupower_amd_pstate_enabled(void)
+{
+	char *driver = cpufreq_get_driver(0);
+	bool ret = false;
+
+	if (!driver)
+		return ret;
+
+	if (!strcmp(driver, "amd-pstate"))
+		ret = true;
+
+	cpufreq_put_driver(driver);
+
+	return ret;
+}
+
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
 
 /* get_cpustate
-- 
GitLab


From 083792f368b8ceea7ae035b6641e9cef3aceb366 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:19 +0800
Subject: [PATCH 0590/1586] cpupower: Initial AMD P-State capability

If kernel starts the AMD P-State module, the cpupower will initial the
capability flag as CPUPOWER_CAP_AMD_PSTATE. And once AMD P-State
capability is set, it won't need to set legacy ACPI relative
capabilities anymore.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/cpuid.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 72eb435931803..eae91f11d1870 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -149,6 +149,19 @@ out:
 		if (ext_cpuid_level >= 0x80000008 &&
 		    cpuid_ebx(0x80000008) & (1 << 4))
 			cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU;
+
+		if (cpupower_amd_pstate_enabled()) {
+			cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE;
+
+			/*
+			 * If AMD P-State is enabled, the firmware will treat
+			 * AMD P-State function as high priority.
+			 */
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE;
+			cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF;
+		}
 	}
 
 	if (cpu_info->vendor == X86_VENDOR_INTEL) {
-- 
GitLab


From e3ede97657d8cfc4fd75aecad50269534bb55aed Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:20 +0800
Subject: [PATCH 0591/1586] cpupower: Add the function to get the sysfs value
 from specific table

Expose the helper into cpufreq header, then cpufreq driver can use this
function to get the sysfs value if it has any specific sysfs interfaces.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/lib/cpufreq.c | 23 ++++++++++++++++-------
 tools/power/cpupower/lib/cpufreq.h | 12 ++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index c3b56db8b9214..1516d23c17c98 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
 	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
 };
 
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
-						 enum cpufreq_value which)
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+						 const char **table,
+						 unsigned int index,
+						 unsigned int size)
 {
 	unsigned long value;
 	unsigned int len;
 	char linebuf[MAX_LINE_LEN];
 	char *endp;
 
-	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+	if (!table || index >= size || !table[index])
 		return 0;
 
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
-				linebuf, sizeof(linebuf));
+	len = sysfs_cpufreq_read_file(cpu, table[index], linebuf,
+				      sizeof(linebuf));
 
 	if (len == 0)
 		return 0;
@@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
 	return value;
 }
 
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+						 enum cpufreq_value which)
+{
+	return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files,
+						  which,
+						  MAX_CPUFREQ_VALUE_READ_FILES);
+}
+
 /* read access to files which contain one string */
 
 enum cpufreq_string {
@@ -124,7 +133,7 @@ static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
 
 
 static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
-					   enum cpufreq_string which)
+					  enum cpufreq_string which)
 {
 	char linebuf[MAX_LINE_LEN];
 	char *result;
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 95f4fd9e2656c..2f3c840358063 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
 int cpufreq_set_frequency(unsigned int cpu,
 				unsigned long target_frequency);
 
+/*
+ * get the sysfs value from specific table
+ *
+ * Read the value with the sysfs file name from specific table. Does
+ * only work if the cpufreq driver has the specific sysfs interfaces.
+ */
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+						 const char **table,
+						 unsigned int index,
+						 unsigned int size);
+
 #ifdef __cplusplus
 }
 #endif
-- 
GitLab


From 4a06806e5d4a781d2c81f6064985018562b2604b Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:21 +0800
Subject: [PATCH 0592/1586] cpupower: Introduce ACPI CPPC library

Kernel ACPI subsytem introduced the sysfs attributes for acpi cppc
library in below path:

/sys/devices/system/cpu/cpuX/acpi_cppc/

And these attributes will be used for AMD P-State driver to provide some
performance and frequency values.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/Makefile        |  6 +--
 tools/power/cpupower/lib/acpi_cppc.c | 59 ++++++++++++++++++++++++++++
 tools/power/cpupower/lib/acpi_cppc.h | 21 ++++++++++
 3 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 tools/power/cpupower/lib/acpi_cppc.c
 create mode 100644 tools/power/cpupower/lib/acpi_cppc.h

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 3b1594447f294..e9b6de314654f 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -143,9 +143,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \
 	utils/helpers/bitmask.h \
 	utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
 
-LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
-LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
-LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
+LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h
+LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c
+LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
 override CFLAGS +=	-pipe
diff --git a/tools/power/cpupower/lib/acpi_cppc.c b/tools/power/cpupower/lib/acpi_cppc.c
new file mode 100644
index 0000000000000..c401ac331e9fa
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpupower_intern.h"
+#include "acpi_cppc.h"
+
+/* ACPI CPPC sysfs access ***********************************************/
+
+static int acpi_cppc_read_file(unsigned int cpu, const char *fname,
+			       char *buf, size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/acpi_cppc/%s",
+		 cpu, fname);
+	return cpupower_read_sysfs(path, buf, buflen);
+}
+
+static const char * const acpi_cppc_value_files[] = {
+	[HIGHEST_PERF] = "highest_perf",
+	[LOWEST_PERF] = "lowest_perf",
+	[NOMINAL_PERF] = "nominal_perf",
+	[LOWEST_NONLINEAR_PERF] = "lowest_nonlinear_perf",
+	[LOWEST_FREQ] = "lowest_freq",
+	[NOMINAL_FREQ] = "nominal_freq",
+	[REFERENCE_PERF] = "reference_perf",
+	[WRAPAROUND_TIME] = "wraparound_time"
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu, enum acpi_cppc_value which)
+{
+	unsigned long long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_CPPC_VALUE_FILES)
+		return 0;
+
+	len = acpi_cppc_read_file(cpu, acpi_cppc_value_files[which],
+				  linebuf, sizeof(linebuf));
+	if (len == 0)
+		return 0;
+
+	value = strtoull(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
diff --git a/tools/power/cpupower/lib/acpi_cppc.h b/tools/power/cpupower/lib/acpi_cppc.h
new file mode 100644
index 0000000000000..85ca83080316a
--- /dev/null
+++ b/tools/power/cpupower/lib/acpi_cppc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_CPPC_H__
+#define __ACPI_CPPC_H__
+
+enum acpi_cppc_value {
+	HIGHEST_PERF,
+	LOWEST_PERF,
+	NOMINAL_PERF,
+	LOWEST_NONLINEAR_PERF,
+	LOWEST_FREQ,
+	NOMINAL_FREQ,
+	REFERENCE_PERF,
+	WRAPAROUND_TIME,
+	MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+				 enum acpi_cppc_value which);
+
+#endif /* _ACPI_CPPC_H */
-- 
GitLab


From 33e43f3636dffe84753847eee79ea0e3527105e6 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:22 +0800
Subject: [PATCH 0593/1586] cpupower: Add AMD P-State sysfs definition and
 access helper

Introduce the marco definitions and access helper function for
AMD P-State sysfs interfaces such as each performance goals and frequency
levels in amd helper file. They will be used to read the sysfs attribute
from AMD P-State cpufreq driver for cpupower utilities.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/amd.c | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 97f2c857048e1..4d45d1b44164f 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -8,7 +8,10 @@
 #include <pci/pci.h>
 
 #include "helpers/helpers.h"
+#include "cpufreq.h"
+#include "acpi_cppc.h"
 
+/* ACPI P-States Helper Functions for AMD Processors ***************/
 #define MSR_AMD_PSTATE_STATUS	0xc0010063
 #define MSR_AMD_PSTATE		0xc0010064
 #define MSR_AMD_PSTATE_LIMIT	0xc0010061
@@ -146,4 +149,31 @@ int amd_pci_get_num_boost_states(int *active, int *states)
 	pci_cleanup(pci_acc);
 	return 0;
 }
+
+/* ACPI P-States Helper Functions for AMD Processors ***************/
+
+/* AMD P-State Helper Functions ************************************/
+enum amd_pstate_value {
+	AMD_PSTATE_HIGHEST_PERF,
+	AMD_PSTATE_MAX_FREQ,
+	AMD_PSTATE_LOWEST_NONLINEAR_FREQ,
+	MAX_AMD_PSTATE_VALUE_READ_FILES,
+};
+
+static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = {
+	[AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf",
+	[AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq",
+	[AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq",
+};
+
+static unsigned long amd_pstate_get_data(unsigned int cpu,
+					 enum amd_pstate_value value)
+{
+	return cpufreq_get_sysfs_value_from_table(cpu,
+						  amd_pstate_value_files,
+						  value,
+						  MAX_AMD_PSTATE_VALUE_READ_FILES);
+}
+
+/* AMD P-State Helper Functions ************************************/
 #endif /* defined(__i386__) || defined(__x86_64__) */
-- 
GitLab


From bf9801baa81802dac7e2a5318944ca2f4bfa74ef Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:23 +0800
Subject: [PATCH 0594/1586] cpupower: Enable boost state support for AMD
 P-State module

The legacy ACPI hardware P-States function has 3 P-States on ACPI table,
the CPU frequency only can be switched between the 3 P-States. While the
processor supports the boost state, it will have another boost state
that the frequency can be higher than P0 state, and the state can be
decoded by the function of decode_pstates() and read by
amd_pci_get_num_boost_states().

However, the new AMD P-State function is different than legacy ACPI
hardware P-State on AMD processors. That has a finer grain frequency
range between the highest and lowest frequency. And boost frequency is
actually the frequency which is mapped on highest performance ratio. The
similar previous P0 frequency is mapped on nominal performance ratio.
If the highest performance on the processor is higher than nominal
performance, then we think the current processor supports the boost
state. And it uses amd_pstate_boost_init() to initialize boost for AMD
P-State function.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/amd.c     | 18 ++++++++++++++++++
 tools/power/cpupower/utils/helpers/helpers.h |  5 +++++
 tools/power/cpupower/utils/helpers/misc.c    |  2 ++
 3 files changed, 25 insertions(+)

diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 4d45d1b44164f..f5ba528dc7db8 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -175,5 +175,23 @@ static unsigned long amd_pstate_get_data(unsigned int cpu,
 						  MAX_AMD_PSTATE_VALUE_READ_FILES);
 }
 
+void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
+{
+	unsigned long highest_perf, nominal_perf, cpuinfo_min,
+		      cpuinfo_max, amd_pstate_max;
+
+	highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF);
+	nominal_perf = acpi_cppc_get_data(cpu, NOMINAL_PERF);
+
+	*support = highest_perf > nominal_perf ? 1 : 0;
+	if (!(*support))
+		return;
+
+	cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max);
+	amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ);
+
+	*active = cpuinfo_max == amd_pstate_max ? 1 : 0;
+}
+
 /* AMD P-State Helper Functions ************************************/
 #endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 62771a0868712..326491e11c6ec 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -140,6 +140,8 @@ extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
 
 /* AMD P-State stuff **************************/
 bool cpupower_amd_pstate_enabled(void);
+void amd_pstate_boost_init(unsigned int cpu,
+			   int *support, int *active);
 
 /* AMD P-State stuff **************************/
 
@@ -177,6 +179,9 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
 
 static inline bool cpupower_amd_pstate_enabled(void)
 { return false; }
+static inline void amd_pstate_boost_init(unsigned int cpu, int *support,
+					 int *active)
+{}
 
 /* cpuid and cpuinfo helpers  **************************/
 
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 0c483cdefcc26..e0d3145434d31 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -41,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 			if (ret)
 				return ret;
 		}
+	} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+		amd_pstate_boost_init(cpu, support, active);
 	} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
 		*support = *active = 1;
 	return 0;
-- 
GitLab


From 35fdf42d90d09d2d00ef65999fe338027a6b4d8e Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:24 +0800
Subject: [PATCH 0595/1586] cpupower: Move print_speed function into misc
 helper

The print_speed can be as a common function, and expose it into misc
helper header. Then it can be used on other helper files as well.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpufreq-info.c    | 59 ++++----------------
 tools/power/cpupower/utils/helpers/helpers.h |  1 +
 tools/power/cpupower/utils/helpers/misc.c    | 40 +++++++++++++
 3 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index f9895e31ff5ae..b429454bf3aef 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -84,43 +84,6 @@ static void proc_cpufreq_output(void)
 }
 
 static int no_rounding;
-static void print_speed(unsigned long speed)
-{
-	unsigned long tmp;
-
-	if (no_rounding) {
-		if (speed > 1000000)
-			printf("%u.%06u GHz", ((unsigned int) speed/1000000),
-				((unsigned int) speed%1000000));
-		else if (speed > 1000)
-			printf("%u.%03u MHz", ((unsigned int) speed/1000),
-				(unsigned int) (speed%1000));
-		else
-			printf("%lu kHz", speed);
-	} else {
-		if (speed > 1000000) {
-			tmp = speed%10000;
-			if (tmp >= 5000)
-				speed += 10000;
-			printf("%u.%02u GHz", ((unsigned int) speed/1000000),
-				((unsigned int) (speed%1000000)/10000));
-		} else if (speed > 100000) {
-			tmp = speed%1000;
-			if (tmp >= 500)
-				speed += 1000;
-			printf("%u MHz", ((unsigned int) speed/1000));
-		} else if (speed > 1000) {
-			tmp = speed%100;
-			if (tmp >= 50)
-				speed += 100;
-			printf("%u.%01u MHz", ((unsigned int) speed/1000),
-				((unsigned int) (speed%1000)/100));
-		}
-	}
-
-	return;
-}
-
 static void print_duration(unsigned long duration)
 {
 	unsigned long tmp;
@@ -254,11 +217,11 @@ static int get_boost_mode(unsigned int cpu)
 	if (freqs) {
 		printf(_("  boost frequency steps: "));
 		while (freqs->next) {
-			print_speed(freqs->frequency);
+			print_speed(freqs->frequency, no_rounding);
 			printf(", ");
 			freqs = freqs->next;
 		}
-		print_speed(freqs->frequency);
+		print_speed(freqs->frequency, no_rounding);
 		printf("\n");
 		cpufreq_put_available_frequencies(freqs);
 	}
@@ -277,7 +240,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human)
 		return -EINVAL;
 	}
 	if (human) {
-		print_speed(freq);
+		print_speed(freq, no_rounding);
 	} else
 		printf("%lu", freq);
 	printf(_(" (asserted by call to kernel)\n"));
@@ -296,7 +259,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human)
 		return -EINVAL;
 	}
 	if (human) {
-		print_speed(freq);
+		print_speed(freq, no_rounding);
 	} else
 		printf("%lu", freq);
 	printf(_(" (asserted by call to hardware)\n"));
@@ -316,9 +279,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human)
 
 	if (human) {
 		printf(_("  hardware limits: "));
-		print_speed(min);
+		print_speed(min, no_rounding);
 		printf(" - ");
-		print_speed(max);
+		print_speed(max, no_rounding);
 		printf("\n");
 	} else {
 		printf("%lu %lu\n", min, max);
@@ -350,9 +313,9 @@ static int get_policy(unsigned int cpu)
 		return -EINVAL;
 	}
 	printf(_("  current policy: frequency should be within "));
-	print_speed(policy->min);
+	print_speed(policy->min, no_rounding);
 	printf(_(" and "));
-	print_speed(policy->max);
+	print_speed(policy->max, no_rounding);
 
 	printf(".\n                  ");
 	printf(_("The governor \"%s\" may decide which speed to use\n"
@@ -436,7 +399,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human)
 	struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time);
 	while (stats) {
 		if (human) {
-			print_speed(stats->frequency);
+			print_speed(stats->frequency, no_rounding);
 			printf(":%.2f%%",
 				(100.0 * stats->time_in_state) / total_time);
 		} else
@@ -486,11 +449,11 @@ static void debug_output_one(unsigned int cpu)
 	if (freqs) {
 		printf(_("  available frequency steps:  "));
 		while (freqs->next) {
-			print_speed(freqs->frequency);
+			print_speed(freqs->frequency, no_rounding);
 			printf(", ");
 			freqs = freqs->next;
 		}
-		print_speed(freqs->frequency);
+		print_speed(freqs->frequency, no_rounding);
 		printf("\n");
 		cpufreq_put_available_frequencies(freqs);
 	}
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 326491e11c6ec..fa2a8c1b1d269 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -200,5 +200,6 @@ extern struct bitmask *offline_cpus;
 void get_cpustate(void);
 void print_online_cpus(void);
 void print_offline_cpus(void);
+void print_speed(unsigned long speed, int no_rounding);
 
 #endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index e0d3145434d31..9547b29254a7f 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -164,3 +164,43 @@ void print_offline_cpus(void)
 		printf(_("cpupower set operation was not performed on them\n"));
 	}
 }
+
+/*
+ * print_speed
+ *
+ * Print the exact CPU frequency with appropriate unit
+ */
+void print_speed(unsigned long speed, int no_rounding)
+{
+	unsigned long tmp;
+
+	if (no_rounding) {
+		if (speed > 1000000)
+			printf("%u.%06u GHz", ((unsigned int)speed / 1000000),
+			       ((unsigned int)speed % 1000000));
+		else if (speed > 1000)
+			printf("%u.%03u MHz", ((unsigned int)speed / 1000),
+			       (unsigned int)(speed % 1000));
+		else
+			printf("%lu kHz", speed);
+	} else {
+		if (speed > 1000000) {
+			tmp = speed % 10000;
+			if (tmp >= 5000)
+				speed += 10000;
+			printf("%u.%02u GHz", ((unsigned int)speed / 1000000),
+			       ((unsigned int)(speed % 1000000) / 10000));
+		} else if (speed > 100000) {
+			tmp = speed % 1000;
+			if (tmp >= 500)
+				speed += 1000;
+			printf("%u MHz", ((unsigned int)speed / 1000));
+		} else if (speed > 1000) {
+			tmp = speed % 100;
+			if (tmp >= 50)
+				speed += 100;
+			printf("%u.%01u MHz", ((unsigned int)speed / 1000),
+			       ((unsigned int)(speed % 1000) / 100));
+		}
+	}
+}
-- 
GitLab


From d8363e29178249bb505ae388ce1658484396fcde Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:25 +0800
Subject: [PATCH 0596/1586] cpupower: Add function to print AMD P-State
 performance capabilities

AMD P-State kernel module is using the fine grain frequency instead of
acpi hardware pstate. So add a function to print performance and
frequency values.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpufreq-info.c    |  9 ++++--
 tools/power/cpupower/utils/helpers/amd.c     | 29 ++++++++++++++++++++
 tools/power/cpupower/utils/helpers/helpers.h |  5 ++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index b429454bf3aef..235243ec5ce0d 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -146,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu)
 	printf(_("    Supported: %s\n"), support ? _("yes") : _("no"));
 	printf(_("    Active: %s\n"), active ? _("yes") : _("no"));
 
-	if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
-	     cpupower_cpu_info.family >= 0x10) ||
-	     cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	    cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) {
+		return 0;
+	} else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+		    cpupower_cpu_info.family >= 0x10) ||
+		   cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		ret = decode_pstates(cpu, b_states, pstates, &pstate_no);
 		if (ret)
 			return ret;
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index f5ba528dc7db8..c519cc89c97f4 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -193,5 +193,34 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
 	*active = cpuinfo_max == amd_pstate_max ? 1 : 0;
 }
 
+void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
+{
+	printf(_("    AMD PSTATE Highest Performance: %lu. Maximum Frequency: "),
+	       amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF));
+	/*
+	 * If boost isn't active, the cpuinfo_max doesn't indicate real max
+	 * frequency. So we read it back from amd-pstate sysfs entry.
+	 */
+	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "),
+	       acpi_cppc_get_data(cpu, NOMINAL_PERF));
+	print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000,
+		    no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
+	       acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF));
+	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ),
+		    no_rounding);
+	printf(".\n");
+
+	printf(_("    AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "),
+	       acpi_cppc_get_data(cpu, LOWEST_PERF));
+	print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding);
+	printf(".\n");
+}
+
 /* AMD P-State Helper Functions ************************************/
 #endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index fa2a8c1b1d269..96e4bede078b0 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -142,6 +142,8 @@ extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
 bool cpupower_amd_pstate_enabled(void);
 void amd_pstate_boost_init(unsigned int cpu,
 			   int *support, int *active);
+void amd_pstate_show_perf_and_freq(unsigned int cpu,
+				   int no_rounding);
 
 /* AMD P-State stuff **************************/
 
@@ -182,6 +184,9 @@ static inline bool cpupower_amd_pstate_enabled(void)
 static inline void amd_pstate_boost_init(unsigned int cpu, int *support,
 					 int *active)
 {}
+static inline void amd_pstate_show_perf_and_freq(unsigned int cpu,
+						 int no_rounding)
+{}
 
 /* cpuid and cpuinfo helpers  **************************/
 
-- 
GitLab


From 4d986ffa036a773456476f70bd0fde2fb1330b7d Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Mon, 14 Feb 2022 18:00:07 -0800
Subject: [PATCH 0597/1586] spi: add missing pci_dev_put() before return

pci_get_slot() increases its reference count, the caller must
decrement the reference count by calling pci_dev_put()

Signed-off-by: Wang Qing <wangqing@vivo.com>
Link: https://lore.kernel.org/r/1644890407-65167-1-git-send-email-wangqing@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-topcliff-pch.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 8c4615b763398..8e1cc345810a6 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -877,7 +877,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
 		dev_err(&data->master->dev,
 			"ERROR: dma_request_channel FAILS(Tx)\n");
 		data->use_dma = 0;
-		return;
+		goto out;
 	}
 	dma->chan_tx = chan;
 
@@ -894,9 +894,12 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
 		dma_release_channel(dma->chan_tx);
 		dma->chan_tx = NULL;
 		data->use_dma = 0;
-		return;
+		goto out;
 	}
 	dma->chan_rx = chan;
+
+out:
+	pci_dev_put(dma_dev);
 }
 
 static void pch_spi_release_dma(struct pch_spi_data *data)
-- 
GitLab


From 4363f3d3ce8f5440dfbcd66b6a6800b42a58ba6a Mon Sep 17 00:00:00 2001
From: Harman Kalra <hkalra@marvell.com>
Date: Fri, 4 Feb 2022 18:16:01 +0530
Subject: [PATCH 0598/1586] crypto: octeontx2 - add synchronization between
 mailbox accesses

Since there are two workqueues implemented in CPTPF driver - one
for handling mailbox requests from VFs and another for handling FLR.
In both cases PF driver will forward the request to AF driver by
writing to mailbox memory. A race condition may arise if two
simultaneous requests are written to mailbox memory. Introducing
locking mechanism to maintain synchronization between multiple
mailbox accesses.

Signed-off-by: Harman Kalra <hkalra@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../marvell/octeontx2/otx2_cpt_common.h       |  1 +
 .../marvell/octeontx2/otx2_cpt_mbox_common.c  | 14 +++++++++++
 drivers/crypto/marvell/octeontx2/otx2_cptpf.h |  1 +
 .../marvell/octeontx2/otx2_cptpf_main.c       | 21 ++++++++++-------
 .../marvell/octeontx2/otx2_cptpf_mbox.c       | 23 ++++++++++++++-----
 5 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index fb56824cb0a6f..5012b7e669f07 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -157,5 +157,6 @@ struct otx2_cptlfs_info;
 int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs);
 int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs);
 int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs);
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox);
 
 #endif /* __OTX2_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
index 9074876d38e5d..a317319696eff 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
@@ -202,3 +202,17 @@ int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs)
 	}
 	return ret;
 }
+
+int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox)
+{
+	int err;
+
+	if (!otx2_mbox_nonempty(mbox, 0))
+		return 0;
+	otx2_mbox_msg_send(mbox, 0);
+	err = otx2_mbox_wait_for_rsp(mbox, 0);
+	if (err)
+		return err;
+
+	return otx2_mbox_check_rsp_msgs(mbox, 0);
+}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
index 05b2d9c650e10..936174b012e8e 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
@@ -46,6 +46,7 @@ struct otx2_cptpf_dev {
 
 	struct workqueue_struct	*flr_wq;
 	struct cptpf_flr_work   *flr_work;
+	struct mutex            lock;   /* serialize mailbox access */
 
 	unsigned long cap_flag;
 	u8 pf_id;               /* RVU PF number */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 1720a5bb70161..17a9dd20c8c35 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -140,6 +140,7 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
 
 	vf = flr_work - pf->flr_work;
 
+	mutex_lock(&pf->lock);
 	req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
 				      sizeof(struct msg_rsp));
 	if (!req)
@@ -151,16 +152,19 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
 	req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK;
 
 	otx2_cpt_send_mbox_msg(mbox, pf->pdev);
+	if (!otx2_cpt_sync_mbox_msg(&pf->afpf_mbox)) {
 
-	if (vf >= 64) {
-		reg = 1;
-		vf = vf - 64;
+		if (vf >= 64) {
+			reg = 1;
+			vf = vf - 64;
+		}
+		/* Clear transaction pending register */
+		otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+				 RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+		otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
+				 RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
 	}
-	/* Clear transaction pending register */
-	otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
-	otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf));
+	mutex_unlock(&pf->lock);
 }
 
 static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg)
@@ -468,6 +472,7 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
 		goto error;
 
 	INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler);
+	mutex_init(&cptpf->lock);
 	return 0;
 
 error:
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
index 186f1c1190c1a..fee758b86d29f 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
@@ -18,6 +18,7 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
 	struct mbox_msghdr *msg;
 	int ret;
 
+	mutex_lock(&cptpf->lock);
 	msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size);
 	if (msg == NULL)
 		return -ENOMEM;
@@ -29,15 +30,19 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
 	msg->sig = req->sig;
 	msg->ver = req->ver;
 
-	otx2_mbox_msg_send(&cptpf->afpf_mbox, 0);
-	ret = otx2_mbox_wait_for_rsp(&cptpf->afpf_mbox, 0);
+	ret = otx2_cpt_sync_mbox_msg(&cptpf->afpf_mbox);
+	/* Error code -EIO indicate there is a communication failure
+	 * to the AF. Rest of the error codes indicate that AF processed
+	 * VF messages and set the error codes in response messages
+	 * (if any) so simply forward responses to VF.
+	 */
 	if (ret == -EIO) {
-		dev_err(&cptpf->pdev->dev, "RVU MBOX timeout.\n");
+		dev_warn(&cptpf->pdev->dev,
+			 "AF not responding to VF%d messages\n", vf->vf_id);
+		mutex_unlock(&cptpf->lock);
 		return ret;
-	} else if (ret) {
-		dev_err(&cptpf->pdev->dev, "RVU MBOX error: %d.\n", ret);
-		return -EFAULT;
 	}
+	mutex_unlock(&cptpf->lock);
 	return 0;
 }
 
@@ -204,6 +209,10 @@ void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work)
 		if (err == -ENOMEM || err == -EIO)
 			break;
 		offset = msg->next_msgoff;
+		/* Write barrier required for VF responses which are handled by
+		 * PF driver and not forwarded to AF.
+		 */
+		smp_wmb();
 	}
 	/* Send mbox responses to VF */
 	if (mdev->num_msgs)
@@ -350,6 +359,8 @@ void otx2_cptpf_afpf_mbox_handler(struct work_struct *work)
 			process_afpf_mbox_msg(cptpf, msg);
 
 		offset = msg->next_msgoff;
+		/* Sync VF response ready to be sent */
+		smp_wmb();
 		mdev->msgs_acked++;
 	}
 	otx2_mbox_reset(afpf_mbox, 0);
-- 
GitLab


From f17f3f82420f0b6914638f298064816c1efcbec3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 16 Feb 2022 19:44:06 +0800
Subject: [PATCH 0599/1586] crypto: x86/blowfish - Remove unused inline
 functions

This is unused after commit c0a64926c53e ("crypto: x86/blowfish - drop CTR mode implementation")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/blowfish_glue.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index a880e0b1c2555..fda6066437aa3 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -32,24 +32,12 @@ static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
 	__blowfish_enc_blk(ctx, dst, src, false);
 }
 
-static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__blowfish_enc_blk(ctx, dst, src, true);
-}
-
 static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
 	__blowfish_enc_blk_4way(ctx, dst, src, false);
 }
 
-static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
-				      const u8 *src)
-{
-	__blowfish_enc_blk_4way(ctx, dst, src, true);
-}
-
 static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-- 
GitLab


From c143a603c9abf3297b87ce1d2827883c67efc385 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 16 Feb 2022 19:45:21 +0800
Subject: [PATCH 0600/1586] crypto: x86/des3 - Remove unused inline function
 des3_ede_enc_blk_3way()

This is unused after commit 768db5fee3bb ("crypto: x86/des - drop CTR mode implementation")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/des3_ede_glue.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 787c234d2469c..abb8b1fe123b4 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -45,14 +45,6 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 	des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
 }
 
-static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
-					 const u8 *src)
-{
-	u32 *enc_ctx = ctx->enc.expkey;
-
-	des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
-}
-
 static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
-- 
GitLab


From 1fb37b5692c915edcc2448a6b37255738c7c77e0 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@benyossef.com>
Date: Thu, 17 Feb 2022 21:27:26 +0200
Subject: [PATCH 0601/1586] crypto: ccree - don't attempt 0 len DMA mappings

Refuse to try mapping zero bytes as this may cause a fault
on some configurations / platforms and it seems the prev.
attempt is not enough and we need to be more explicit.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Reported-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Fixes: ce0fc6db38de ("crypto: ccree - protect against empty or NULL
scatterlists")
Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccree/cc_buffer_mgr.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index a5e041d9d2cf1..11e0278c8631d 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg,
 {
 	int ret = 0;
 
+	if (!nbytes) {
+		*mapped_nents = 0;
+		*lbytes = 0;
+		*nents = 0;
+		return 0;
+	}
+
 	*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes);
 	if (*nents > max_sg_nents) {
 		*nents = 0;
-- 
GitLab


From 0a2a464f863187f97e96ebc6384c052cafd4a54c Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 19 Feb 2022 16:08:08 +0800
Subject: [PATCH 0602/1586] crypto: hisilicon/sec - fix the aead software
 fallback for engine

Due to the subreq pointer misuse the private context memory. The aead
soft crypto occasionally casues the OS panic as setting the 64K page.
Here is fix it.

Fixes: 6c46a3297bea ("crypto: hisilicon/sec - add fallback tfm...")
Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 8caba9fd1f19c..a91635c348b5e 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2295,9 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 				struct aead_request *aead_req,
 				bool encrypt)
 {
-	struct aead_request *subreq = aead_request_ctx(aead_req);
 	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
 	struct device *dev = ctx->dev;
+	struct aead_request *subreq;
+	int ret;
 
 	/* Kunpeng920 aead mode not support input 0 size */
 	if (!a_ctx->fallback_aead_tfm) {
@@ -2305,6 +2306,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 		return -EINVAL;
 	}
 
+	subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL);
+	if (!subreq)
+		return -ENOMEM;
+
 	aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
 	aead_request_set_callback(subreq, aead_req->base.flags,
 				  aead_req->base.complete, aead_req->base.data);
@@ -2312,8 +2317,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 			       aead_req->cryptlen, aead_req->iv);
 	aead_request_set_ad(subreq, aead_req->assoclen);
 
-	return encrypt ? crypto_aead_encrypt(subreq) :
-		   crypto_aead_decrypt(subreq);
+	if (encrypt)
+		ret = crypto_aead_encrypt(subreq);
+	else
+		ret = crypto_aead_decrypt(subreq);
+	aead_request_free(subreq);
+
+	return ret;
 }
 
 static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
-- 
GitLab


From 4509d950a6764d18a99776614513d280cca422d8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 23 Feb 2022 08:28:52 +0100
Subject: [PATCH 0603/1586] x86/pat: Remove the unused set_pages_array_wt()
 function

Commit

  623dffb2a2e0 ("x86/mm/pat: Add set_memory_wt() for Write-Through type")

added it but there were no users.

  [ bp: Add a commit message. ]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220223072852.616143-1-hch@lst.de
---
 arch/x86/include/asm/set_memory.h | 1 -
 arch/x86/mm/pat/set_memory.c      | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a1..60bdede41466d 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages);
 
 int set_pages_array_uc(struct page **pages, int addrinarray);
 int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
 int set_pages_array_wb(struct page **pages, int addrinarray);
 
 /*
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef6..9bdaf828ee68f 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2121,12 +2121,6 @@ int set_pages_array_wc(struct page **pages, int numpages)
 }
 EXPORT_SYMBOL(set_pages_array_wc);
 
-int set_pages_array_wt(struct page **pages, int numpages)
-{
-	return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
-}
-EXPORT_SYMBOL_GPL(set_pages_array_wt);
-
 int set_pages_wb(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
-- 
GitLab


From 6198311093dabcafbe345d580c56b5d5a9ab5f3c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 22 Feb 2022 21:57:38 +0300
Subject: [PATCH 0604/1586] x86/cc: Move arch/x86/{kernel/cc_platform.c =>
 coco/core.c}

Move cc_platform.c to arch/x86/coco/. The directory is going to be the
home space for code related to confidential computing.

Intel TDX code will land here. AMD SEV code will also eventually be
moved there.

No functional changes.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20220222185740.26228-3-kirill.shutemov@linux.intel.com
---
 arch/x86/Kbuild                                | 2 ++
 arch/x86/coco/Makefile                         | 6 ++++++
 arch/x86/{kernel/cc_platform.c => coco/core.c} | 0
 arch/x86/kernel/Makefile                       | 5 -----
 4 files changed, 8 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/coco/Makefile
 rename arch/x86/{kernel/cc_platform.c => coco/core.c} (100%)

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index f384cb1a4f7a8..5a83da703e876 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/
+
 obj-y += entry/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
new file mode 100644
index 0000000000000..c1ead00017a7f
--- /dev/null
+++ b/arch/x86/coco/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS_REMOVE_core.o	= -pg
+KASAN_SANITIZE_core.o	:= n
+CFLAGS_core.o		+= -fno-stack-protector
+
+obj-y += core.o
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c
similarity index 100%
rename from arch/x86/kernel/cc_platform.c
rename to arch/x86/coco/core.c
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6aef9ee28a394..6462e3dd98f49 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
 CFLAGS_REMOVE_head64.o = -pg
 CFLAGS_REMOVE_sev.o = -pg
-CFLAGS_REMOVE_cc_platform.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o				:= n
@@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
 KASAN_SANITIZE_stacktrace.o				:= n
 KASAN_SANITIZE_paravirt.o				:= n
 KASAN_SANITIZE_sev.o					:= n
-KASAN_SANITIZE_cc_platform.o				:= n
 
 # With some compiler versions the generated code results in boot hangs, caused
 # by several compilation units. To be safe, disable all instrumentation.
@@ -49,7 +47,6 @@ endif
 KCOV_INSTRUMENT		:= n
 
 CFLAGS_head$(BITS).o	+= -fno-stack-protector
-CFLAGS_cc_platform.o	+= -fno-stack-protector
 
 CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
 
@@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 
-obj-$(CONFIG_ARCH_HAS_CC_PLATFORM)	+= cc_platform.o
-
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-- 
GitLab


From 655a0fa34b4f7ac6e2b1406fab15e52a7b6accb1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 22 Feb 2022 21:57:39 +0300
Subject: [PATCH 0605/1586] x86/coco: Explicitly declare type of confidential
 computing platform

The kernel derives the confidential computing platform
type it is running as from sme_me_mask on AMD or by using
hv_is_isolation_supported() on HyperV isolation VMs. This detection
process will be more complicated as more platforms get added.

Declare a confidential computing vendor variable explicitly and set it
via cc_set_vendor() on the respective platform.

  [ bp: Massage commit message, fixup HyperV check. ]

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20220222185740.26228-4-kirill.shutemov@linux.intel.com
---
 arch/x86/coco/core.c               | 29 +++++++++++++++++------------
 arch/x86/include/asm/coco.h        | 14 ++++++++++++++
 arch/x86/kernel/cpu/mshyperv.c     |  6 ++++++
 arch/x86/mm/mem_encrypt_identity.c | 11 +++++++----
 4 files changed, 44 insertions(+), 16 deletions(-)
 create mode 100644 arch/x86/include/asm/coco.h

diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 6a6ffcd978f6e..476dcd198af5f 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -9,18 +9,15 @@
 
 #include <linux/export.h>
 #include <linux/cc_platform.h>
-#include <linux/mem_encrypt.h>
 
-#include <asm/mshyperv.h>
+#include <asm/coco.h>
 #include <asm/processor.h>
 
-static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
+static enum cc_vendor vendor __ro_after_init;
+
+static bool intel_cc_platform_has(enum cc_attr attr)
 {
-#ifdef CONFIG_INTEL_TDX_GUEST
-	return false;
-#else
 	return false;
-#endif
 }
 
 /*
@@ -74,12 +71,20 @@ static bool hyperv_cc_platform_has(enum cc_attr attr)
 
 bool cc_platform_has(enum cc_attr attr)
 {
-	if (sme_me_mask)
+	switch (vendor) {
+	case CC_VENDOR_AMD:
 		return amd_cc_platform_has(attr);
-
-	if (hv_is_isolation_supported())
+	case CC_VENDOR_INTEL:
+		return intel_cc_platform_has(attr);
+	case CC_VENDOR_HYPERV:
 		return hyperv_cc_platform_has(attr);
-
-	return false;
+	default:
+		return false;
+	}
 }
 EXPORT_SYMBOL_GPL(cc_platform_has);
+
+__init void cc_set_vendor(enum cc_vendor v)
+{
+	vendor = v;
+}
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
new file mode 100644
index 0000000000000..e49f9ddb6ae62
--- /dev/null
+++ b/arch/x86/include/asm/coco.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_COCO_H
+#define _ASM_X86_COCO_H
+
+enum cc_vendor {
+	CC_VENDOR_NONE,
+	CC_VENDOR_AMD,
+	CC_VENDOR_HYPERV,
+	CC_VENDOR_INTEL,
+};
+
+void cc_set_vendor(enum cc_vendor v);
+
+#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 5a99f993e6392..e0a5724720523 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,6 +33,7 @@
 #include <asm/nmi.h>
 #include <clocksource/hyperv_timer.h>
 #include <asm/numa.h>
+#include <asm/coco.h>
 
 /* Is Linux running as the root partition? */
 bool hv_root_partition;
@@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void)
 		 */
 		swiotlb_force = SWIOTLB_FORCE;
 #endif
+		/* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
+		if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+			if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
+				cc_set_vendor(CC_VENDOR_HYPERV);
+		}
 	}
 
 	if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 3f0abb4033403..06314ae3998e5 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -44,6 +44,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cmdline.h>
+#include <asm/coco.h>
 
 #include "mm_internal.h"
 
@@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp)
 	} else {
 		/* SEV state cannot be controlled by a command line option */
 		sme_me_mask = me_mask;
-		physical_mask &= ~sme_me_mask;
-		return;
+		goto out;
 	}
 
 	/*
@@ -600,6 +600,9 @@ void __init sme_enable(struct boot_params *bp)
 		sme_me_mask = 0;
 	else
 		sme_me_mask = active_by_default ? me_mask : 0;
-
-	physical_mask &= ~sme_me_mask;
+out:
+	if (sme_me_mask) {
+		physical_mask &= ~sme_me_mask;
+		cc_set_vendor(CC_VENDOR_AMD);
+	}
 }
-- 
GitLab


From b577f542f93cbba57f8d6185ef1fb13a41ddf162 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 22 Feb 2022 21:57:40 +0300
Subject: [PATCH 0606/1586] x86/coco: Add API to handle encryption mask

AMD SME/SEV uses a bit in the page table entries to indicate that the
page is encrypted and not accessible to the VMM.

TDX uses a similar approach, but the polarity of the mask is opposite to
AMD: if the bit is set the page is accessible to VMM.

Provide vendor-neutral API to deal with the mask: cc_mkenc() and
cc_mkdec() modify given address to make it encrypted/decrypted. It can
be applied to phys_addr_t, pgprotval_t or page table entry value.

pgprot_encrypted() and pgprot_decrypted() reimplemented using new
helpers.

The implementation will be extended to cover TDX.

pgprot_decrypted() is used by drivers (i915, virtio_gpu, vfio).
cc_mkdec() called by pgprot_decrypted(). Export cc_mkdec().

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20220222185740.26228-5-kirill.shutemov@linux.intel.com
---
 arch/x86/coco/core.c               | 27 +++++++++++++++++++++++++++
 arch/x86/include/asm/coco.h        | 18 ++++++++++++++++++
 arch/x86/include/asm/pgtable.h     | 13 +++++++------
 arch/x86/mm/mem_encrypt_identity.c |  1 +
 arch/x86/mm/pat/set_memory.c       |  5 +++--
 5 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 476dcd198af5f..fc1365dd927e8 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -14,6 +14,7 @@
 #include <asm/processor.h>
 
 static enum cc_vendor vendor __ro_after_init;
+static u64 cc_mask __ro_after_init;
 
 static bool intel_cc_platform_has(enum cc_attr attr)
 {
@@ -84,7 +85,33 @@ bool cc_platform_has(enum cc_attr attr)
 }
 EXPORT_SYMBOL_GPL(cc_platform_has);
 
+u64 cc_mkenc(u64 val)
+{
+	switch (vendor) {
+	case CC_VENDOR_AMD:
+		return val | cc_mask;
+	default:
+		return val;
+	}
+}
+
+u64 cc_mkdec(u64 val)
+{
+	switch (vendor) {
+	case CC_VENDOR_AMD:
+		return val & ~cc_mask;
+	default:
+		return val;
+	}
+}
+EXPORT_SYMBOL_GPL(cc_mkdec);
+
 __init void cc_set_vendor(enum cc_vendor v)
 {
 	vendor = v;
 }
+
+__init void cc_set_mask(u64 mask)
+{
+	cc_mask = mask;
+}
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index e49f9ddb6ae62..3d98c3a60d34f 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_COCO_H
 #define _ASM_X86_COCO_H
 
+#include <asm/types.h>
+
 enum cc_vendor {
 	CC_VENDOR_NONE,
 	CC_VENDOR_AMD,
@@ -10,5 +12,21 @@ enum cc_vendor {
 };
 
 void cc_set_vendor(enum cc_vendor v);
+void cc_set_mask(u64 mask);
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+u64 cc_mkenc(u64 val);
+u64 cc_mkdec(u64 val);
+#else
+static inline u64 cc_mkenc(u64 val)
+{
+	return val;
+}
+
+static inline u64 cc_mkdec(u64 val)
+{
+	return val;
+}
+#endif
 
 #endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 8a9432fb3802b..62ab07e24aef9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,17 +15,12 @@
 		     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))	\
 	 : (prot))
 
-/*
- * Macros to add or remove encryption attribute
- */
-#define pgprot_encrypted(prot)	__pgprot(__sme_set(pgprot_val(prot)))
-#define pgprot_decrypted(prot)	__pgprot(__sme_clr(pgprot_val(prot)))
-
 #ifndef __ASSEMBLY__
 #include <linux/spinlock.h>
 #include <asm/x86_init.h>
 #include <asm/pkru.h>
 #include <asm/fpu/api.h>
+#include <asm/coco.h>
 #include <asm-generic/pgtable_uffd.h>
 #include <linux/page_table_check.h>
 
@@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
 void ptdump_walk_pgd_level_checkwx(void);
 void ptdump_walk_user_pgd_level_checkwx(void);
 
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot)	__pgprot(cc_mkenc(pgprot_val(prot)))
+#define pgprot_decrypted(prot)	__pgprot(cc_mkdec(pgprot_val(prot)))
+
 #ifdef CONFIG_DEBUG_WX
 #define debug_checkwx()		ptdump_walk_pgd_level_checkwx()
 #define debug_checkwx_user()	ptdump_walk_user_pgd_level_checkwx()
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 06314ae3998e5..b43bc24d2bb64 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -604,5 +604,6 @@ out:
 	if (sme_me_mask) {
 		physical_mask &= ~sme_me_mask;
 		cc_set_vendor(CC_VENDOR_AMD);
+		cc_set_mask(sme_me_mask);
 	}
 }
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index b4072115c8ef6..1441db69cea5d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages)
  */
 static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 {
+	pgprot_t empty = __pgprot(0);
 	struct cpa_data cpa;
 	int ret;
 
@@ -1999,8 +2000,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 	memset(&cpa, 0, sizeof(cpa));
 	cpa.vaddr = &addr;
 	cpa.numpages = numpages;
-	cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
-	cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+	cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty);
+	cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty);
 	cpa.pgd = init_mm.pgd;
 
 	/* Must avoid aliasing mappings in the highmem code */
-- 
GitLab


From 1e8c5971c249893ac33ca983c32bafcf5d50c727 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 22 Feb 2022 22:35:28 -0600
Subject: [PATCH 0607/1586] x86/mm/cpa: Generalize __set_memory_enc_pgtable()

The kernel provides infrastructure to set or clear the encryption mask
from the pages for AMD SEV, but TDX requires few tweaks.

- TDX and SEV have different requirements to the cache and TLB
  flushing.

- TDX has own routine to notify VMM about page encryption status change.

Modify __set_memory_enc_pgtable() and make it flexible enough to cover
both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in the
callbacks under x86_platform.guest. TDX will provide own version of said
callbacks.

  [ bp: Beat into submission. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20220223043528.2093214-1-brijesh.singh@amd.com
---
 arch/x86/include/asm/set_memory.h |  1 -
 arch/x86/include/asm/x86_init.h   | 16 +++++++
 arch/x86/kernel/x86_init.c        | 16 ++++++-
 arch/x86/mm/mem_encrypt_amd.c     | 72 +++++++++++++++++++++----------
 arch/x86/mm/pat/set_memory.c      | 20 +++++----
 5 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ff0f2d90338a1..ce8dd215f5b34 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages);
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 bool kernel_page_present(struct page *page);
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 
 extern int kernel_set_to_readonly;
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 22b7412c08f63..e9170457697e4 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -141,6 +141,21 @@ struct x86_init_acpi {
 	void (*reduced_hw_early_init)(void);
 };
 
+/**
+ * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
+ *
+ * @enc_status_change_prepare	Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish	Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required	Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required	Returns true if a cache flush is needed before changing page encryption status
+ */
+struct x86_guest {
+	void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_tlb_flush_required)(bool enc);
+	bool (*enc_cache_flush_required)(void);
+};
+
 /**
  * struct x86_init_ops - functions for platform specific setup
  *
@@ -287,6 +302,7 @@ struct x86_platform_ops {
 	struct x86_legacy_features legacy;
 	void (*set_legacy_features)(void);
 	struct x86_hyper_runtime hyper;
+	struct x86_guest guest;
 };
 
 struct x86_apic_ops {
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7d20c1d34a3cd..e84ee5cdbd8c6 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = {
 
 static void default_nmi_init(void) { };
 
+static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_tlb_flush_required_noop(bool enc) { return false; }
+static bool enc_cache_flush_required_noop(void) { return false; }
+
 struct x86_platform_ops x86_platform __ro_after_init = {
 	.calibrate_cpu			= native_calibrate_cpu_early,
 	.calibrate_tsc			= native_calibrate_tsc,
@@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = {
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init,
 	.get_nmi_reason			= default_get_nmi_reason,
-	.save_sched_clock_state 	= tsc_save_sched_clock_state,
-	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
+	.save_sched_clock_state		= tsc_save_sched_clock_state,
+	.restore_sched_clock_state	= tsc_restore_sched_clock_state,
 	.hyper.pin_vcpu			= x86_op_int_noop,
+
+	.guest = {
+		.enc_status_change_prepare = enc_status_change_prepare_noop,
+		.enc_status_change_finish  = enc_status_change_finish_noop,
+		.enc_tlb_flush_required	   = enc_tlb_flush_required_noop,
+		.enc_cache_flush_required  = enc_cache_flush_required_noop,
+	},
 };
 
 EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 2b2d018ea3450..6169053c28541 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data)
 	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
 }
 
-void __init sme_early_init(void)
-{
-	unsigned int i;
-
-	if (!sme_me_mask)
-		return;
-
-	early_pmd_flags = __sme_set(early_pmd_flags);
-
-	__supported_pte_mask = __sme_set(__supported_pte_mask);
-
-	/* Update the protection map with memory encryption mask */
-	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
-		protection_map[i] = pgprot_encrypted(protection_map[i]);
-
-	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
-		swiotlb_force = SWIOTLB_FORCE;
-}
-
 void __init sev_setup_arch(void)
 {
 	phys_addr_t total_mem = memblock_phys_mem_size();
@@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
 	return pfn;
 }
 
-void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_tlb_flush_required(bool enc)
+{
+	return true;
+}
+
+static bool amd_enc_cache_flush_required(void)
+{
+	return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
+}
+
+static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 {
 #ifdef CONFIG_PARAVIRT
 	unsigned long sz = npages << PAGE_SHIFT;
@@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
 #endif
 }
 
+static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+{
+}
+
+/* Return true unconditionally: return value doesn't matter for the SEV side */
+static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
+{
+	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+		enc_dec_hypercall(vaddr, npages, enc);
+
+	return true;
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
@@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
 	ret = 0;
 
-	notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+	early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
 out:
 	__flush_tlb_all();
 	return ret;
@@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
 
 void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 {
-	notify_range_enc_status_changed(vaddr, npages, enc);
+	enc_dec_hypercall(vaddr, npages, enc);
+}
+
+void __init sme_early_init(void)
+{
+	unsigned int i;
+
+	if (!sme_me_mask)
+		return;
+
+	early_pmd_flags = __sme_set(early_pmd_flags);
+
+	__supported_pte_mask = __sme_set(__supported_pte_mask);
+
+	/* Update the protection map with memory encryption mask */
+	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
+		protection_map[i] = pgprot_encrypted(protection_map[i]);
+
+	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+		swiotlb_force = SWIOTLB_FORCE;
+
+	x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
+	x86_platform.guest.enc_status_change_finish  = amd_enc_status_change_finish;
+	x86_platform.guest.enc_tlb_flush_required    = amd_enc_tlb_flush_required;
+	x86_platform.guest.enc_cache_flush_required  = amd_enc_cache_flush_required;
 }
 
 void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 1441db69cea5d..3b75262cfb272 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2008,10 +2008,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 	kmap_flush_unused();
 	vm_unmap_aliases();
 
-	/*
-	 * Before changing the encryption attribute, we need to flush caches.
-	 */
-	cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
+	/* Flush the caches as needed before changing the encryption attribute. */
+	if (x86_platform.guest.enc_tlb_flush_required(enc))
+		cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
+
+	/* Notify hypervisor that we are about to set/clr encryption attribute. */
+	x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
 
 	ret = __change_page_attr_set_clr(&cpa, 1);
 
@@ -2024,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 	 */
 	cpa_flush(&cpa, 0);
 
-	/*
-	 * Notify hypervisor that a given memory range is mapped encrypted
-	 * or decrypted.
-	 */
-	notify_range_enc_status_changed(addr, numpages, enc);
+	/* Notify hypervisor that we have successfully set/clr encryption attribute. */
+	if (!ret) {
+		if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
+			ret = -EIO;
+	}
 
 	return ret;
 }
-- 
GitLab


From 7b75bbdf5bedebed387aac6ad8411ed1cf3db5d0 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:03 +0100
Subject: [PATCH 0608/1586] powercap/dtpm: Change locking scheme

The different functions are all called through the
dtpm_create_hierarchy() which handle the mutex. The different
functions are used in this context, consequently with the lock always
held.

Remove all locks taken in the function and add the lock in the
hierarchy creation function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-1-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c | 95 ++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 68 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 414826a1509b6..0b0121c37a1b8 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -51,9 +51,7 @@ static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw)
 {
 	struct dtpm *dtpm = to_dtpm(pcz);
 
-	mutex_lock(&dtpm_lock);
 	*max_power_uw = dtpm->power_max - dtpm->power_min;
-	mutex_unlock(&dtpm_lock);
 
 	return 0;
 }
@@ -83,14 +81,7 @@ static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw)
 
 static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __get_power_uw(dtpm, power_uw);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
+	return __get_power_uw(to_dtpm(pcz), power_uw);
 }
 
 static void __dtpm_rebalance_weight(struct dtpm *dtpm)
@@ -133,7 +124,16 @@ static void __dtpm_add_power(struct dtpm *dtpm)
 	}
 }
 
-static int __dtpm_update_power(struct dtpm *dtpm)
+/**
+ * dtpm_update_power - Update the power on the dtpm
+ * @dtpm: a pointer to a dtpm structure to update
+ *
+ * Function to update the power values of the dtpm node specified in
+ * parameter. These new values will be propagated to the tree.
+ *
+ * Return: zero on success, -EINVAL if the values are inconsistent
+ */
+int dtpm_update_power(struct dtpm *dtpm)
 {
 	int ret;
 
@@ -155,26 +155,6 @@ static int __dtpm_update_power(struct dtpm *dtpm)
 	return ret;
 }
 
-/**
- * dtpm_update_power - Update the power on the dtpm
- * @dtpm: a pointer to a dtpm structure to update
- *
- * Function to update the power values of the dtpm node specified in
- * parameter. These new values will be propagated to the tree.
- *
- * Return: zero on success, -EINVAL if the values are inconsistent
- */
-int dtpm_update_power(struct dtpm *dtpm)
-{
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __dtpm_update_power(dtpm);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
-}
-
 /**
  * dtpm_release_zone - Cleanup when the node is released
  * @pcz: a pointer to a powercap_zone structure
@@ -191,20 +171,14 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 	struct dtpm *dtpm = to_dtpm(pcz);
 	struct dtpm *parent = dtpm->parent;
 
-	mutex_lock(&dtpm_lock);
-
-	if (!list_empty(&dtpm->children)) {
-		mutex_unlock(&dtpm_lock);
+	if (!list_empty(&dtpm->children))
 		return -EBUSY;
-	}
 
 	if (parent)
 		list_del(&dtpm->sibling);
 
 	__dtpm_sub_power(dtpm);
 
-	mutex_unlock(&dtpm_lock);
-
 	if (dtpm->ops)
 		dtpm->ops->release(dtpm);
 
@@ -216,23 +190,12 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 	return 0;
 }
 
-static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit)
-{
-	*power_limit = dtpm->power_limit;
-	return 0;
-}
-
 static int get_power_limit_uw(struct powercap_zone *pcz,
 			      int cid, u64 *power_limit)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-	int ret;
-
-	mutex_lock(&dtpm_lock);
-	ret = __get_power_limit_uw(dtpm, cid, power_limit);
-	mutex_unlock(&dtpm_lock);
-
-	return ret;
+	*power_limit = to_dtpm(pcz)->power_limit;
+	
+	return 0;
 }
 
 /*
@@ -292,7 +255,7 @@ static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit)
 
 			ret = __set_power_limit_uw(child, cid, power);
 			if (!ret)
-				ret = __get_power_limit_uw(child, cid, &power);
+				ret = get_power_limit_uw(&child->zone, cid, &power);
 
 			if (ret)
 				break;
@@ -310,8 +273,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
 	struct dtpm *dtpm = to_dtpm(pcz);
 	int ret;
 
-	mutex_lock(&dtpm_lock);
-
 	/*
 	 * Don't allow values outside of the power range previously
 	 * set when initializing the power numbers.
@@ -323,8 +284,6 @@ static int set_power_limit_uw(struct powercap_zone *pcz,
 	pr_debug("%s: power limit: %llu uW, power max: %llu uW\n",
 		 dtpm->zone.name, dtpm->power_limit, dtpm->power_max);
 
-	mutex_unlock(&dtpm_lock);
-
 	return ret;
 }
 
@@ -335,11 +294,7 @@ static const char *get_constraint_name(struct powercap_zone *pcz, int cid)
 
 static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power)
 {
-	struct dtpm *dtpm = to_dtpm(pcz);
-
-	mutex_lock(&dtpm_lock);
-	*max_power = dtpm->power_max;
-	mutex_unlock(&dtpm_lock);
+	*max_power = to_dtpm(pcz)->power_max;
 
 	return 0;
 }
@@ -442,8 +397,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	if (IS_ERR(pcz))
 		return PTR_ERR(pcz);
 
-	mutex_lock(&dtpm_lock);
-
 	if (parent) {
 		list_add_tail(&dtpm->sibling, &parent->children);
 		dtpm->parent = parent;
@@ -459,8 +412,6 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	pr_debug("Registered dtpm node '%s' / %llu-%llu uW, \n",
 		 dtpm->zone.name, dtpm->power_min, dtpm->power_max);
 
-	mutex_unlock(&dtpm_lock);
-
 	return 0;
 }
 
@@ -605,8 +556,12 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
 	struct device_node *np;
 	int i, ret;
 
-	if (pct)
-		return -EBUSY;
+	mutex_lock(&dtpm_lock);
+
+	if (pct) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
 
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
@@ -648,12 +603,16 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
 				dtpm_subsys[i]->name, ret);
 	}
 
+	mutex_unlock(&dtpm_lock);
+
 	return 0;
 
 out_err:
 	powercap_unregister_control_type(pct);
 out_pct:
 	pct = NULL;
+out_unlock:
+	mutex_unlock(&dtpm_lock);
 	
 	return ret;
 }
-- 
GitLab


From 0aea2e4ec2a2bfa2d7e8820e37ba5b5ce04f20a5 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:04 +0100
Subject: [PATCH 0609/1586] powercap/dtpm_cpu: Reset per_cpu variable in the
 release function

The release function does not reset the per cpu variable when it is
called. That will prevent creation again as the variable will be
already from the previous creation.

Fix it by resetting them.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-2-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm_cpu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index eed5ad688d467..71f45d2f5a606 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -151,10 +151,17 @@ static int update_pd_power_uw(struct dtpm *dtpm)
 static void pd_release(struct dtpm *dtpm)
 {
 	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
+	struct cpufreq_policy *policy;
 
 	if (freq_qos_request_active(&dtpm_cpu->qos_req))
 		freq_qos_remove_request(&dtpm_cpu->qos_req);
 
+	policy = cpufreq_cpu_get(dtpm_cpu->cpu);
+	if (policy) {
+		for_each_cpu(dtpm_cpu->cpu, policy->related_cpus)
+			per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL;
+	}
+	
 	kfree(dtpm_cpu);
 }
 
-- 
GitLab


From 690de0b4013f6f35bc9fced12746b9f396c471ae Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:05 +0100
Subject: [PATCH 0610/1586] powercap/dtpm: Fixup kfree for virtual node

When the node is virtual there is no release function associated which
can free the memory.

Free the memory when no 'ops' exists.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-3-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 0b0121c37a1b8..7bddd25a67670 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -181,12 +181,12 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 
 	if (dtpm->ops)
 		dtpm->ops->release(dtpm);
+	else
+		kfree(dtpm);
 
 	if (root == dtpm)
 		root = NULL;
 
-	kfree(dtpm);
-
 	return 0;
 }
 
-- 
GitLab


From c404c64d64bc31bebe8a2015103671f7cd282731 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:06 +0100
Subject: [PATCH 0611/1586] powercap/dtpm: Destroy hierarchy function

The hierarchy creation function exits but without a destroy hierarchy
function. Due to that, the modules creating the hierarchy can not be
unloaded properly because they don't have an exit callback.

Provide the dtpm_destroy_hierarchy() function to remove the previously
created hierarchy.

The function relies on all the release mechanisms implemented by the
underlying powercap framework.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-4-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c | 43 +++++++++++++++++++++++++++++++++++++++++
 include/linux/dtpm.h    |  3 +++
 2 files changed, 46 insertions(+)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 7bddd25a67670..d9d74f9811186 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -617,3 +617,46 @@ out_unlock:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dtpm_create_hierarchy);
+
+static void __dtpm_destroy_hierarchy(struct dtpm *dtpm)
+{
+	struct dtpm *child, *aux;
+
+	list_for_each_entry_safe(child, aux, &dtpm->children, sibling)
+		__dtpm_destroy_hierarchy(child);
+
+	/*
+	 * At this point, we know all children were removed from the
+	 * recursive call before
+	 */
+	dtpm_unregister(dtpm);
+}
+
+void dtpm_destroy_hierarchy(void)
+{
+	int i;
+
+	mutex_lock(&dtpm_lock);
+
+	if (!pct)
+		goto out_unlock;
+
+	__dtpm_destroy_hierarchy(root);
+	
+
+	for (i = 0; i < ARRAY_SIZE(dtpm_subsys); i++) {
+
+		if (!dtpm_subsys[i]->exit)
+			continue;
+
+		dtpm_subsys[i]->exit();
+	}
+
+	powercap_unregister_control_type(pct);
+
+	pct = NULL;
+
+out_unlock:
+	mutex_unlock(&dtpm_lock);
+}
+EXPORT_SYMBOL_GPL(dtpm_destroy_hierarchy);
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index f7a25c70dd4c0..a4a13514b7306 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -37,6 +37,7 @@ struct device_node;
 struct dtpm_subsys_ops {
 	const char *name;
 	int (*init)(void);
+	void (*exit)(void);
 	int (*setup)(struct dtpm *, struct device_node *);
 };
 
@@ -67,4 +68,6 @@ void dtpm_unregister(struct dtpm *dtpm);
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
 int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table);
+
+void dtpm_destroy_hierarchy(void);
 #endif
-- 
GitLab


From 4712a236db409d5ee5dccb8c7e57fe54d7d3ec66 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:07 +0100
Subject: [PATCH 0612/1586] powercap/dtpm: Move the 'root' reset place

The 'root' node is checked everytime a dtpm node is destroyed.

When we reach the end of the hierarchy destruction function, we can
unconditionnaly set the 'root' node to NULL again.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-5-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index d9d74f9811186..ec931a06d90ae 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -184,9 +184,6 @@ int dtpm_release_zone(struct powercap_zone *pcz)
 	else
 		kfree(dtpm);
 
-	if (root == dtpm)
-		root = NULL;
-
 	return 0;
 }
 
@@ -656,6 +653,8 @@ void dtpm_destroy_hierarchy(void)
 
 	pct = NULL;
 
+	root = NULL;
+
 out_unlock:
 	mutex_unlock(&dtpm_lock);
 }
-- 
GitLab


From bfded2ca8f36935ff13b3b30f8e66d6135e178ac Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:08 +0100
Subject: [PATCH 0613/1586] powercap/dtpm_cpu: Add exit function

Now that we can destroy the hierarchy, the code must remove what it
had put in place at the creation. In our case, the cpu hotplug
callbacks.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-6-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm_cpu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 71f45d2f5a606..bca2f912d3496 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -299,8 +299,15 @@ static int dtpm_cpu_init(void)
 	return 0;
 }
 
+static void dtpm_cpu_exit(void)
+{
+	cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
+	cpuhp_remove_state_nocalls(CPUHP_AP_DTPM_CPU_DEAD);
+}
+
 struct dtpm_subsys_ops dtpm_cpu_ops = {
 	.name = KBUILD_MODNAME,
 	.init = dtpm_cpu_init,
+	.exit = dtpm_cpu_exit,
 	.setup = dtpm_cpu_setup,
 };
-- 
GitLab


From f1ebef9e55f3c49063b575e97d2019832b8f8ef9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Sun, 30 Jan 2022 22:02:09 +0100
Subject: [PATCH 0614/1586] dtpm/soc/rk3399: Add the ability to unload the
 module

The dtpm hierarchy can now be removed with the
dtpm_destroy_hierarchy() function. Add the module_exit() callback so
the module can be unloaded by removing the previously created
hierarchy.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20220130210210.549877-7-daniel.lezcano@linaro.org
---
 drivers/soc/rockchip/dtpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/soc/rockchip/dtpm.c b/drivers/soc/rockchip/dtpm.c
index ebebb748488b7..5a23784b52215 100644
--- a/drivers/soc/rockchip/dtpm.c
+++ b/drivers/soc/rockchip/dtpm.c
@@ -52,6 +52,12 @@ static int __init rockchip_dtpm_init(void)
 }
 module_init(rockchip_dtpm_init);
 
+static void __exit rockchip_dtpm_exit(void)
+{
+	return dtpm_destroy_hierarchy();
+}
+module_exit(rockchip_dtpm_exit);
+
 MODULE_SOFTDEP("pre: panfrost cpufreq-dt");
 MODULE_DESCRIPTION("Rockchip DTPM driver");
 MODULE_LICENSE("GPL");
-- 
GitLab


From 75aeaaf23def967853c8d1cfb513a6842dbc232e Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 2 Feb 2022 14:43:06 +0000
Subject: [PATCH 0615/1586] EDAC/amd64: Set memory type per DIMM

Current AMD systems allow mixing of DIMM types within a system. However,
DIMMs within a channel, i.e. managed by a single Unified Memory
Controller (UMC), must be of the same type.

Handle this possible configuration by checking and setting the memory
type for each individual "UMC" structure.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: William Roche <william.roche@oracle.com>
Link: https://lore.kernel.org/r/20220202144307.2678405-2-yazen.ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 43 ++++++++++++++++++++++++++++-----------
 drivers/edac/amd64_edac.h | 10 ++++++++-
 2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index fba609ada0e67..388b072daa94b 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1429,7 +1429,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
 		edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
 				i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
 
-		if (pvt->dram_type == MEM_LRDDR4) {
+		if (umc->dram_type == MEM_LRDDR4) {
 			amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
 			edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
 					i, 1 << ((tmp >> 4) & 0x3));
@@ -1616,19 +1616,36 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
 	}
 }
 
-static void determine_memory_type(struct amd64_pvt *pvt)
+static void determine_memory_type_df(struct amd64_pvt *pvt)
 {
-	u32 dram_ctrl, dcsm;
+	struct amd64_umc *umc;
+	u32 i;
 
-	if (pvt->umc) {
-		if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
-			pvt->dram_type = MEM_LRDDR4;
-		else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
-			pvt->dram_type = MEM_RDDR4;
+	for_each_umc(i) {
+		umc = &pvt->umc[i];
+
+		if (!(umc->sdp_ctrl & UMC_SDP_INIT)) {
+			umc->dram_type = MEM_EMPTY;
+			continue;
+		}
+
+		if (umc->dimm_cfg & BIT(5))
+			umc->dram_type = MEM_LRDDR4;
+		else if (umc->dimm_cfg & BIT(4))
+			umc->dram_type = MEM_RDDR4;
 		else
-			pvt->dram_type = MEM_DDR4;
-		return;
+			umc->dram_type = MEM_DDR4;
+
+		edac_dbg(1, "  UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]);
 	}
+}
+
+static void determine_memory_type(struct amd64_pvt *pvt)
+{
+	u32 dram_ctrl, dcsm;
+
+	if (pvt->umc)
+		return determine_memory_type_df(pvt);
 
 	switch (pvt->fam) {
 	case 0xf:
@@ -3452,7 +3469,9 @@ skip:
 	read_dct_base_mask(pvt);
 
 	determine_memory_type(pvt);
-	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
+
+	if (!pvt->umc)
+		edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
 
 	determine_ecc_sym_sz(pvt);
 }
@@ -3548,7 +3567,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
 					pvt->mc_node_id, cs);
 
 			dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
-			dimm->mtype = pvt->dram_type;
+			dimm->mtype = pvt->umc[umc].dram_type;
 			dimm->edac_mode = edac_mode;
 			dimm->dtype = dev_type;
 			dimm->grain = 64;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 352bda9803f6c..6b8742369f9d8 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -344,6 +344,9 @@ struct amd64_umc {
 	u32 sdp_ctrl;		/* SDP Control reg */
 	u32 ecc_ctrl;		/* DRAM ECC Control reg */
 	u32 umc_cap_hi;		/* Capabilities High reg */
+
+	/* cache the dram_type */
+	enum mem_type dram_type;
 };
 
 struct amd64_pvt {
@@ -391,7 +394,12 @@ struct amd64_pvt {
 	/* place to store error injection parameters prior to issue */
 	struct error_injection injection;
 
-	/* cache the dram_type */
+	/*
+	 * cache the dram_type
+	 *
+	 * NOTE: Don't use this for Family 17h and later.
+	 *	 Use dram_type in struct amd64_umc instead.
+	 */
 	enum mem_type dram_type;
 
 	struct amd64_umc *umc;	/* UMC registers */
-- 
GitLab


From 2151c84ece920dc55942495004a823cbecb921e5 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 2 Feb 2022 14:43:07 +0000
Subject: [PATCH 0616/1586] EDAC/amd64: Add new register offset support and
 related changes

Introduce a "family flags" bitmask that can be used to indicate any
special behavior needed on a per-family basis.

Add a flag to indicate a system uses the new register offsets introduced
with Family 19h Model 10h.

Use this flag to account for register offset changes, a new bitfield
indicating DDR5 use on a memory controller, and to set the proper number
of chip select masks.

Rework f17_addr_mask_to_cs_size() to properly handle the change in chip
select masks. And update code comments to reflect the updated Chip
Select, DIMM, and Mask relationships.

[uninitialized variable warning]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: William Roche <william.roche@oracle.com>
Link: https://lore.kernel.org/r/20220202144307.2678405-3-yazen.ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 80 +++++++++++++++++++++++++++++++--------
 drivers/edac/amd64_edac.h | 14 +++++++
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 388b072daa94b..812baa48b2906 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
 
 static struct amd64_family_type *fam_type;
 
+static inline u32 get_umc_reg(u32 reg)
+{
+	if (!fam_type->flags.zn_regs_v2)
+		return reg;
+
+	switch (reg) {
+	case UMCCH_ADDR_CFG:		return UMCCH_ADDR_CFG_DDR5;
+	case UMCCH_ADDR_MASK_SEC:	return UMCCH_ADDR_MASK_SEC_DDR5;
+	case UMCCH_DIMM_CFG:		return UMCCH_DIMM_CFG_DDR5;
+	}
+
+	WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
+	return 0;
+}
+
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
@@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
 		edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
 				i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
 
-		if (umc->dram_type == MEM_LRDDR4) {
-			amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
+		if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
+			amd_smn_read(pvt->mc_node_id,
+				     umc_base + get_umc_reg(UMCCH_ADDR_CFG),
+				     &tmp);
 			edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
 					i, 1 << ((tmp >> 4) & 0x3));
 		}
@@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
 
 		for_each_umc(umc) {
 			pvt->csels[umc].b_cnt = 4;
-			pvt->csels[umc].m_cnt = 2;
+			pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
 		}
 
 	} else {
@@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
 		}
 
 		umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
-		umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+		umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
 
 		for_each_chip_select_mask(cs, umc, pvt) {
 			mask = &pvt->csels[umc].csmasks[cs];
@@ -1629,12 +1646,25 @@ static void determine_memory_type_df(struct amd64_pvt *pvt)
 			continue;
 		}
 
-		if (umc->dimm_cfg & BIT(5))
-			umc->dram_type = MEM_LRDDR4;
-		else if (umc->dimm_cfg & BIT(4))
-			umc->dram_type = MEM_RDDR4;
-		else
-			umc->dram_type = MEM_DDR4;
+		/*
+		 * Check if the system supports the "DDR Type" field in UMC Config
+		 * and has DDR5 DIMMs in use.
+		 */
+		if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
+			if (umc->dimm_cfg & BIT(5))
+				umc->dram_type = MEM_LRDDR5;
+			else if (umc->dimm_cfg & BIT(4))
+				umc->dram_type = MEM_RDDR5;
+			else
+				umc->dram_type = MEM_DDR5;
+		} else {
+			if (umc->dimm_cfg & BIT(5))
+				umc->dram_type = MEM_LRDDR4;
+			else if (umc->dimm_cfg & BIT(4))
+				umc->dram_type = MEM_RDDR4;
+			else
+				umc->dram_type = MEM_DDR4;
+		}
 
 		edac_dbg(1, "  UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]);
 	}
@@ -2166,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
 {
 	u32 addr_mask_orig, addr_mask_deinterleaved;
 	u32 msb, weight, num_zero_bits;
+	int cs_mask_nr = csrow_nr;
 	int dimm, size = 0;
 
 	/* No Chip Selects are enabled. */
@@ -2181,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
 		return size;
 
 	/*
-	 * There is one mask per DIMM, and two Chip Selects per DIMM.
-	 *	CS0 and CS1 -> DIMM0
-	 *	CS2 and CS3 -> DIMM1
+	 * Family 17h introduced systems with one mask per DIMM,
+	 * and two Chip Selects per DIMM.
+	 *
+	 *	CS0 and CS1 -> MASK0 / DIMM0
+	 *	CS2 and CS3 -> MASK1 / DIMM1
+	 *
+	 * Family 19h Model 10h introduced systems with one mask per Chip Select,
+	 * and two Chip Selects per DIMM.
+	 *
+	 *	CS0 -> MASK0 -> DIMM0
+	 *	CS1 -> MASK1 -> DIMM0
+	 *	CS2 -> MASK2 -> DIMM1
+	 *	CS3 -> MASK3 -> DIMM1
+	 *
+	 * Keep the mask number equal to the Chip Select number for newer systems,
+	 * and shift the mask number for older systems.
 	 */
 	dimm = csrow_nr >> 1;
 
+	if (!fam_type->flags.zn_regs_v2)
+		cs_mask_nr >>= 1;
+
 	/* Asymmetric dual-rank DIMM support. */
 	if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
-		addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+		addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
 	else
-		addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+		addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
 
 	/*
 	 * The number of zero bits in the mask is equal to the number of bits
@@ -2947,6 +2994,7 @@ static struct amd64_family_type family_types[] = {
 		.f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
 		.f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
 		.max_mcs = 12,
+		.flags.zn_regs_v2 = 1,
 		.ops = {
 			.early_channel_count	= f17_early_channel_count,
 			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -3385,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
 
-		amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
+		amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
 		amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
 		amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
 		amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 6b8742369f9d8..38e5ad95d0109 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -273,8 +273,11 @@
 #define UMCCH_BASE_ADDR_SEC		0x10
 #define UMCCH_ADDR_MASK			0x20
 #define UMCCH_ADDR_MASK_SEC		0x28
+#define UMCCH_ADDR_MASK_SEC_DDR5	0x30
 #define UMCCH_ADDR_CFG			0x30
+#define UMCCH_ADDR_CFG_DDR5		0x40
 #define UMCCH_DIMM_CFG			0x80
+#define UMCCH_DIMM_CFG_DDR5		0x90
 #define UMCCH_UMC_CFG			0x100
 #define UMCCH_SDP_CTRL			0x104
 #define UMCCH_ECC_CTRL			0x14C
@@ -488,11 +491,22 @@ struct low_ops {
 					 unsigned cs_mode, int cs_mask_nr);
 };
 
+struct amd64_family_flags {
+	/*
+	 * Indicates that the system supports the new register offsets, etc.
+	 * first introduced with Family 19h Model 10h.
+	 */
+	__u64 zn_regs_v2	: 1,
+
+	      __reserved	: 63;
+};
+
 struct amd64_family_type {
 	const char *ctl_name;
 	u16 f0_id, f1_id, f2_id, f6_id;
 	/* Maximum number of memory controllers per die/node. */
 	u8 max_mcs;
+	struct amd64_family_flags flags;
 	struct low_ops ops;
 };
 
-- 
GitLab


From 8382dce5e4835c045f33b8958a5f559d212cdd11 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Tue, 22 Feb 2022 23:34:26 +0800
Subject: [PATCH 0617/1586] cpupower: Add "perf" option to print AMD P-State
 information

Add "-c --perf" option in cpupower-frequency-info to get the performance
and frequency values for AMD P-State.

Commit message amended:
Shuah Khan <skhan@linuxfoundation.org>

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 .../cpupower/man/cpupower-frequency-info.1    |  3 +++
 tools/power/cpupower/utils/cpufreq-info.c     | 19 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 6aa8d239dff9d..dd545b499480b 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -53,6 +53,9 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters.
 \fB\-n\fR \fB\-\-no-rounding\fR
 Output frequencies and latencies without rounding off values.
 .TP  
+\fB\-c\fR \fB\-\-perf\fR
+Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC).
+.TP
 .SH "REMARKS"
 .LP 
 By default only values of core zero are displayed. How to display settings of
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index 235243ec5ce0d..0646f615fe2d4 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -438,6 +438,17 @@ static int get_latency(unsigned int cpu, unsigned int human)
 	return 0;
 }
 
+/* --performance / -c */
+
+static int get_perf_cap(unsigned int cpu)
+{
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	    cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE)
+		amd_pstate_show_perf_and_freq(cpu, no_rounding);
+
+	return 0;
+}
+
 static void debug_output_one(unsigned int cpu)
 {
 	struct cpufreq_available_frequencies *freqs;
@@ -466,6 +477,7 @@ static void debug_output_one(unsigned int cpu)
 	if (get_freq_hardware(cpu, 1) < 0)
 		get_freq_kernel(cpu, 1);
 	get_boost_mode(cpu);
+	get_perf_cap(cpu);
 }
 
 static struct option info_opts[] = {
@@ -484,6 +496,7 @@ static struct option info_opts[] = {
 	{"proc",	 no_argument,		 NULL,	 'o'},
 	{"human",	 no_argument,		 NULL,	 'm'},
 	{"no-rounding", no_argument,	 NULL,	 'n'},
+	{"performance", no_argument,	 NULL,	 'c'},
 	{ },
 };
 
@@ -497,7 +510,7 @@ int cmd_freq_info(int argc, char **argv)
 	int output_param = 0;
 
 	do {
-		ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts,
+		ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts,
 				  NULL);
 		switch (ret) {
 		case '?':
@@ -520,6 +533,7 @@ int cmd_freq_info(int argc, char **argv)
 		case 'e':
 		case 's':
 		case 'y':
+		case 'c':
 			if (output_param) {
 				output_param = -1;
 				cont = 0;
@@ -626,6 +640,9 @@ int cmd_freq_info(int argc, char **argv)
 		case 'y':
 			ret = get_latency(cpu, human);
 			break;
+		case 'c':
+			ret = get_perf_cap(cpu);
+			break;
 		}
 		if (ret)
 			return ret;
-- 
GitLab


From a586f944f3a30cfffdbda081aa094bc6845f5ba9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Feb 2022 21:19:48 +0200
Subject: [PATCH 0618/1586] spi: pxa2xx-pci: Do not dereference fwnode in
 struct device

In order to make the underneath API easier to change in the future,
prevent users from dereferencing fwnode from struct device.
Instead, use the specific dev_fwnode() API for that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220223191948.31325-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 2e134eb4bd2c9..47c8cb56a4d07 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -261,7 +261,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 		return PTR_ERR(ssp->clk);
 
 	memset(&pi, 0, sizeof(pi));
-	pi.fwnode = dev->dev.fwnode;
+	pi.fwnode = dev_fwnode(&dev->dev);
 	pi.parent = &dev->dev;
 	pi.name = "pxa2xx-spi";
 	pi.id = ssp->port_id;
-- 
GitLab


From 609d7ffdc42199a0ec949db057e3b4be6745d6c5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Feb 2022 21:16:37 +0200
Subject: [PATCH 0619/1586] spi: pxa2xx-pci: Balance reference count for PCI
 DMA device

The pci_get_slot() increases its reference count, the caller
must decrement the reference count by calling pci_dev_put().

Fixes: 743485ea3bee ("spi: pxa2xx-pci: Do a specific setup in a separate function")
Fixes: 25014521603f ("spi: pxa2xx-pci: Enable DMA for Intel Merrifield")
Reported-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220223191637.31147-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 47c8cb56a4d07..6d60972e4e207 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -76,14 +76,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param)
 	return true;
 }
 
+static void lpss_dma_put_device(void *dma_dev)
+{
+	pci_dev_put(dma_dev);
+}
+
 static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
 	struct pci_dev *dma_dev;
+	int ret;
 
 	c->num_chipselect = 1;
 	c->max_clk_rate = 50000000;
 
 	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+	ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
+	if (ret)
+		return ret;
 
 	if (c->tx_param) {
 		struct dw_dma_slave *slave = c->tx_param;
@@ -107,8 +116,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 
 static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
-	struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
 	struct dw_dma_slave *tx, *rx;
+	struct pci_dev *dma_dev;
+	int ret;
 
 	switch (PCI_FUNC(dev->devfn)) {
 	case 0:
@@ -133,6 +143,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 		return -ENODEV;
 	}
 
+	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
+	ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
+	if (ret)
+		return ret;
+
 	tx = c->tx_param;
 	tx->dma_dev = &dma_dev->dev;
 
-- 
GitLab


From da3951ebdcd1cb1d5c750e08cd05aee7b0c04d9a Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 22 Feb 2022 13:46:10 +0100
Subject: [PATCH 0620/1586] random: round-robin registers as ulong, not u32

When the interrupt handler does not have a valid cycle counter, it calls
get_reg() to read a register from the irq stack, in round-robin.
Currently it does this assuming that registers are 32-bit. This is
_probably_ the case, and probably all platforms without cycle counters
are in fact 32-bit platforms. But maybe not, and either way, it's not
quite correct. This commit fixes that to deal with `unsigned long`
rather than `u32`.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d73a75cbe82d6..a4dedeea35e97 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1261,15 +1261,15 @@ int random_online_cpu(unsigned int cpu)
 }
 #endif
 
-static u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs)
 {
-	u32 *ptr = (u32 *)regs;
+	unsigned long *ptr = (unsigned long *)regs;
 	unsigned int idx;
 
 	if (regs == NULL)
 		return 0;
 	idx = READ_ONCE(f->reg_idx);
-	if (idx >= sizeof(struct pt_regs) / sizeof(u32))
+	if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long))
 		idx = 0;
 	ptr += idx++;
 	WRITE_ONCE(f->reg_idx, idx);
-- 
GitLab


From a3f9e8910e1584d7725ef7d5ac870920d42d0bb4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 22 Feb 2022 14:01:57 +0100
Subject: [PATCH 0621/1586] random: only wake up writers after zap if threshold
 was passed

The only time that we need to wake up /dev/random writers on
RNDCLEARPOOL/RNDZAPPOOL is when we're changing from a value that is
greater than or equal to POOL_MIN_BITS to zero, because if we're
changing from below POOL_MIN_BITS to zero, the writers are already
unblocked.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a4dedeea35e97..536237a0f073b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1582,7 +1582,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		 */
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		if (xchg(&input_pool.entropy_count, 0)) {
+		if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) {
 			wake_up_interruptible(&random_write_wait);
 			kill_fasync(&fasync, SIGIO, POLL_OUT);
 		}
-- 
GitLab


From 62a40dad573406cdcb489dd45cfd201717c2e765 Mon Sep 17 00:00:00 2001
From: Oscar Shiang <oscar0225@livemail.tw>
Date: Sun, 20 Feb 2022 20:12:00 +0800
Subject: [PATCH 0622/1586] docs: Remove duplicated words in
 trace/osnoise-tracer

There are 2 duplicated words found in osnoise tracer documentation.
This patch removes them.

Signed-off-by: Oscar Shiang <oscar0225@livemail.tw>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Link: https://lore.kernel.org/r/TYCP286MB1913117487F390E3BCE38B15A1399@TYCP286MB1913.JPNP286.PROD.OUTLOOK.COM
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/osnoise-tracer.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index b648cb9bf1f02..963def9f97c6e 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -51,7 +51,7 @@ For example::
         [root@f32 ~]# cd /sys/kernel/tracing/
         [root@f32 tracing]# echo osnoise > current_tracer
 
-It is possible to follow the trace by reading the trace trace file::
+It is possible to follow the trace by reading the trace file::
 
         [root@f32 tracing]# cat trace
         # tracer: osnoise
@@ -108,7 +108,7 @@ The tracer has a set of options inside the osnoise directory, they are:
    option.
  - tracing_threshold: the minimum delta between two time() reads to be
    considered as noise, in us. When set to 0, the default value will
-   will be used, which is currently 5 us.
+   be used, which is currently 5 us.
 
 Additional Tracing
 ------------------
-- 
GitLab


From a5cdaea525c32e7def563ba07d9fef9bc6edffab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:18 +0100
Subject: [PATCH 0623/1586] scripts: kernel-doc: Add the basic POD sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NAME section provides the doc title, while SYNOPSIS contains
the basic syntax and usage description, which will be printed
in the help document and in the error output produced on wrong script
usage.

The rationale is to give users simple and succinct enlightment,
at the same time structuring the script internally for the maintainers.

In the synopsis, Rst-only options are grouped around rst, and the rest is
arranged as in the OPTIONS subsections (yet to be translated into POD,
check at the end of the series).

The third of the basic sections, DESCRIPTION, is added separately.

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-2-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3106b7536b893..c8fbf1d3d5aa9 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -16,6 +16,31 @@ use strict;
 ## This software falls under the GNU General Public License.     ##
 ## Please read the COPYING file for more information             ##
 
+=head1 NAME
+
+kernel-doc - Print formatted kernel documentation to stdout
+
+=head1 SYNOPSIS
+
+ kernel-doc [-h] [-v] [-Werror]
+   [ -man |
+     -rst [-sphinx-version VERSION] [-enable-lineno] |
+     -none
+   ]
+   [
+     -export |
+     -internal |
+     [-function NAME] ... |
+     [-nosymbol NAME] ...
+   ]
+   [-no-doc-sections]
+   [-export-file FILE] ...
+   FILE ...
+
+Run `kernel-doc -h` for details.
+
+=cut
+
 # 18/01/2001 - 	Cleanups
 # 		Functions prototyped as foo(void) same as foo()
 # 		Stop eval'ing where we don't need to.
-- 
GitLab


From 43caf1a6823dc7c156cf38a6c71881c1e90cd3c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:19 +0100
Subject: [PATCH 0624/1586] scripts: kernel-doc: Relink argument parsing error
 handling to pod2usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The former usage function is substituted, although not as the -h and -help
parameter handler yet.

Purpose: Use Pod::Usage to handle documentation printing in an integrated
way.

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-3-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index c8fbf1d3d5aa9..e7f7251771bb6 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -16,6 +16,8 @@ use strict;
 ## This software falls under the GNU General Public License.     ##
 ## Please read the COPYING file for more information             ##
 
+use Pod::Usage qw/pod2usage/;
+
 =head1 NAME
 
 kernel-doc - Print formatted kernel documentation to stdout
@@ -298,7 +300,13 @@ my $blankline_rst = "\n";
 
 # read arguments
 if ($#ARGV == -1) {
-    usage();
+	pod2usage(
+		-message => "No arguments!\n",
+		-exitval => 1,
+		-verbose => 99,
+		-sections => 'SYNOPSIS',
+		-output => \*STDERR,
+	);
 }
 
 my $kernelversion;
@@ -518,8 +526,14 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
 	    die "Sphinx version should either major.minor or major.minor.patch format\n";
 	}
     } else {
-	# Unknown argument
-        usage();
+		# Unknown argument
+		pod2usage(
+			-message => "Argument unknown!\n",
+			-exitval => 1,
+			-verbose => 99,
+			-sections => 'SYNOPSIS',
+			-output => \*STDERR,
+		);
     }
 }
 
-- 
GitLab


From f1583922bf9383ce0079dfdded959dfc5585dc5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:20 +0100
Subject: [PATCH 0625/1586] scripts: kernel-doc: Translate the DESCRIPTION
 section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Transition the description section into POD. This is one of the standard
documentation sections. This adjustment makes the section available for
POD and makes it look better.

Notes:
- an article addition
- paragraphing correction

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-4-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e7f7251771bb6..e4203f13fa93f 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -41,6 +41,15 @@ kernel-doc - Print formatted kernel documentation to stdout
 
 Run `kernel-doc -h` for details.
 
+=head1 DESCRIPTION
+
+Read C language source or header FILEs, extract embedded documentation comments,
+and print formatted documentation to standard output.
+
+The documentation comments are identified by the "/**" opening comment mark.
+
+See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
+
 =cut
 
 # 18/01/2001 - 	Cleanups
@@ -72,12 +81,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Read C language source or header FILEs, extract embedded documentation comments,
-and print formatted documentation to standard output.
-
-The documentation comments are identified by "/**" opening comment mark. See
-Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
-
 Output format selection (mutually exclusive):
   -man			Output troff manual page format. This is the default.
   -rst			Output reStructuredText format.
-- 
GitLab


From 2875f78708219feadf0956dcf9e936ec25fb7a8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:21 +0100
Subject: [PATCH 0626/1586] scripts: kernel-doc: Translate the "Output format
 selection" subsection of OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Another step in the direction of a uniform POD documentation, which will
make users happier.

Options land at the end of the script, not to clutter the file top.

The default output format is corrected to rst. That's what it is now.

A POD delimiting comment is added to the script head, which improves
the script logical structure.

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-5-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e4203f13fa93f..18eca172c4b5e 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -52,6 +52,8 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
 
 =cut
 
+# more perldoc at the end of the file
+
 # 18/01/2001 - 	Cleanups
 # 		Functions prototyped as foo(void) same as foo()
 # 		Stop eval'ing where we don't need to.
@@ -81,11 +83,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Output format selection (mutually exclusive):
-  -man			Output troff manual page format. This is the default.
-  -rst			Output reStructuredText format.
-  -none			Do not output documentation, only warnings.
-
 Output format selection modifier (affects only ReST output):
 
   -sphinx-version	Use the ReST C domain dialect compatible with an
@@ -2563,3 +2560,27 @@ if ($Werror && $warnings) {
 } else {
     exit($output_mode eq "none" ? 0 : $errors)
 }
+
+__END__
+
+=head1 OPTIONS
+
+=head2 Output format selection (mutually exclusive):
+
+=over 8
+
+=item -man
+
+Output troff manual page format.
+
+=item -rst
+
+Output reStructuredText format. This is the default.
+
+=item -none
+
+Do not output documentation, only warnings.
+
+=back
+
+=cut
-- 
GitLab


From dd803b04b0a0af16e43c2af1a3e67d7ce8e1f899 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:22 +0100
Subject: [PATCH 0627/1586] scripts: kernel-doc: Translate the "Output format
 selection modifier" subsection of OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aim: unified POD, user more happy

This section is renamed to "Output format modifiers" to make it simple.

To make it even more simple, a subsection is added:
"reStructuredText only".

Other notes:
- paragraphing correction
- article correction

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-6-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 18eca172c4b5e..b926faa16b008 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -83,13 +83,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Output format selection modifier (affects only ReST output):
-
-  -sphinx-version	Use the ReST C domain dialect compatible with an
-			specific Sphinx Version.
-			If not specified, kernel-doc will auto-detect using
-			the sphinx-build version found on PATH.
-
 Output selection (mutually exclusive):
   -export		Only output documentation for symbols that have been
 			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
@@ -2583,4 +2576,19 @@ Do not output documentation, only warnings.
 
 =back
 
+=head2 Output format modifiers
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -sphinx-version VERSION
+
+Use the ReST C domain dialect compatible with a specific Sphinx Version.
+
+If not specified, kernel-doc will auto-detect using the sphinx-build version
+found on PATH.
+
+=back
+
 =cut
-- 
GitLab


From 9c77f108f43ae08e560f54c817d4aeb4857dc783 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:23 +0100
Subject: [PATCH 0628/1586] scripts: kernel-doc: Translate the "Output
 selection" subsection of OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aim: unified POD, user more satisfied, script better structured

The plurals in -function and -nosymbol are corrected to singulars.
That's how the script works now. I think this describes the syntax better.
The plurar suggests multiple FILE arguments might be possible. So this
seems more coherent.

Other notes:
- paragraphing correction
- article correction

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-7-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index b926faa16b008..e49cdb307a35d 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -83,19 +83,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Output selection (mutually exclusive):
-  -export		Only output documentation for symbols that have been
-			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
-                        in any input FILE or -export-file FILE.
-  -internal		Only output documentation for symbols that have NOT been
-			exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
-                        in any input FILE or -export-file FILE.
-  -function NAME	Only output documentation for the given function(s)
-			or DOC: section title(s). All other functions and DOC:
-			sections are ignored. May be specified multiple times.
-  -nosymbol NAME	Exclude the specified symbols from the output
-		        documentation. May be specified multiple times.
-
 Output selection modifiers:
   -no-doc-sections	Do not output DOC: sections.
   -enable-lineno        Enable output of #define LINENO lines. Only works with
@@ -2591,4 +2578,33 @@ found on PATH.
 
 =back
 
+=head2 Output selection (mutually exclusive):
+
+=over 8
+
+=item -export
+
+Only output documentation for the symbols that have been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -internal
+
+Only output documentation for the symbols that have NOT been exported using
+EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+
+=item -function NAME
+
+Only output documentation for the given function or DOC: section title.
+All other functions and DOC: sections are ignored.
+
+May be specified multiple times.
+
+=item -nosymbol NAME
+
+Exclude the specified symbol from the output documentation.
+
+May be specified multiple times.
+
+=back
+
 =cut
-- 
GitLab


From c15de5a19a2881205f6f893869584c99cbe4fae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:24 +0100
Subject: [PATCH 0629/1586] scripts: kernel-doc: Translate the "Output
 selection modifiers" subsection of OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aim: unified POD, user more satisfied, script better structured

A subsection "reStructuredText only" is added for -enable-lineno.

Other notes:
- paragraphing correction

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-8-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e49cdb307a35d..210e7e3b501b6 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -83,14 +83,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Output selection modifiers:
-  -no-doc-sections	Do not output DOC: sections.
-  -enable-lineno        Enable output of #define LINENO lines. Only works with
-                        reStructuredText format.
-  -export-file FILE     Specify an additional FILE in which to look for
-                        EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with
-                        -export or -internal. May be specified multiple times.
-
 Other parameters:
   -v			Verbose output, more warnings and other information.
   -h			Print this help.
@@ -2607,4 +2599,33 @@ May be specified multiple times.
 
 =back
 
+=head2 Output selection modifiers:
+
+=over 8
+
+=item -no-doc-sections
+
+Do not output DOC: sections.
+
+=item -export-file FILE
+
+Specify an additional FILE in which to look for EXPORT_SYMBOL() and
+EXPORT_SYMBOL_GPL().
+
+To be used with -export or -internal.
+
+May be specified multiple times.
+
+=back
+
+=head3 reStructuredText only
+
+=over 8
+
+=item -enable-lineno
+
+Enable output of #define LINENO lines.
+
+=back
+
 =cut
-- 
GitLab


From 834cf6b9039e6f6ebd73cc4da51cc8bc802ca777 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:25 +0100
Subject: [PATCH 0630/1586] scripts: kernel-doc: Translate the "Other
 parameters" subsection of OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aim: unified POD, user more satisfied, script better structured

Notes:
- The -help token is added.
- The entries are sorted alphbetically.

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-9-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 210e7e3b501b6..4a26a74318e68 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -83,11 +83,6 @@ sub usage {
     my $message = <<"EOF";
 Usage: $0 [OPTION ...] FILE ...
 
-Other parameters:
-  -v			Verbose output, more warnings and other information.
-  -h			Print this help.
-  -Werror		Treat warnings as errors.
-
 EOF
     print $message;
     exit 1;
@@ -2628,4 +2623,22 @@ Enable output of #define LINENO lines.
 
 =back
 
+=head2 Other parameters:
+
+=over 8
+
+=item -h, -help
+
+Print this help.
+
+=item -v
+
+Verbose output, more warnings and other information.
+
+=item -Werror
+
+Treat warnings as errors.
+
+=back
+
 =cut
-- 
GitLab


From 252b47da9fd9eeebbdaed448aea71010261d7dc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:26 +0100
Subject: [PATCH 0631/1586] scripts: kernel-doc: Replace the usage function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aim: unified POD, user more satisfied, script better structured

You can see the results with:

$ scripts/kernel-doc -help

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-10-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 4a26a74318e68..d7ca4877eedad 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -79,15 +79,6 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
 # 25/07/2012 - Added support for HTML5
 # -- Dan Luedtke <mail@danrl.de>
 
-sub usage {
-    my $message = <<"EOF";
-Usage: $0 [OPTION ...] FILE ...
-
-EOF
-    print $message;
-    exit 1;
-}
-
 #
 # format of comments.
 # In the following table, (...)? signifies optional structure.
@@ -468,7 +459,7 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
     } elsif ($cmd eq "Werror") {
 	$Werror = 1;
     } elsif (($cmd eq "h") || ($cmd eq "help")) {
-	usage();
+		pod2usage(-exitval => 0, -verbose => 2);
     } elsif ($cmd eq 'no-doc-sections') {
 	    $no_doc_sections = 1;
     } elsif ($cmd eq 'enable-lineno') {
-- 
GitLab


From 258092a89085ed9536da00f27d8ddbe083c9ea0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:27 +0100
Subject: [PATCH 0632/1586] scripts: kernel-doc: Drop obsolete comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

What for? To improve the script maintainability.

1. License

As stated by Jonathan Corbet in the reply to my version 1, the SPDX line
is enough.

2. The to-do list comment

As suggested by Jonathan Corbet in reply to my version 3, this section
doesn't need to be transitioned. And so it is removed for clarity.

3. The historical changelog comments

As suggested by Jonathan Corbet in a reply to v3, this section can go.
I wanted to keep it, but since it doesn't contain copyright notices,
let's just have it clean and simple.

4. The "format of comments" comment block

As suggested by Jani Nikula in a reply to my first version of this
transformation, Documentation/doc-guide/kernel-doc.rst can serve as the
information hub for comment formatting. The section DESCRIPTION already
points there, so the original comment block can just be removed.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Suggested-by: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-11-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 143 ---------------------------------------------
 1 file changed, 143 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index d7ca4877eedad..a5a397e22ea7b 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -12,9 +12,6 @@ use strict;
 ## 								 ##
 ## #define enhancements by Armin Kuster <akuster@mvista.com>	 ##
 ## Copyright (c) 2000 MontaVista Software, Inc.			 ##
-## 								 ##
-## This software falls under the GNU General Public License.     ##
-## Please read the COPYING file for more information             ##
 
 use Pod::Usage qw/pod2usage/;
 
@@ -54,146 +51,6 @@ See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax.
 
 # more perldoc at the end of the file
 
-# 18/01/2001 - 	Cleanups
-# 		Functions prototyped as foo(void) same as foo()
-# 		Stop eval'ing where we don't need to.
-# -- huggie@earth.li
-
-# 27/06/2001 -  Allowed whitespace after initial "/**" and
-#               allowed comments before function declarations.
-# -- Christian Kreibich <ck@whoop.org>
-
-# Still to do:
-# 	- add perldoc documentation
-# 	- Look more closely at some of the scarier bits :)
-
-# 26/05/2001 - 	Support for separate source and object trees.
-#		Return error code.
-# 		Keith Owens <kaos@ocs.com.au>
-
-# 23/09/2001 - Added support for typedefs, structs, enums and unions
-#              Support for Context section; can be terminated using empty line
-#              Small fixes (like spaces vs. \s in regex)
-# -- Tim Jansen <tim@tjansen.de>
-
-# 25/07/2012 - Added support for HTML5
-# -- Dan Luedtke <mail@danrl.de>
-
-#
-# format of comments.
-# In the following table, (...)? signifies optional structure.
-#                         (...)* signifies 0 or more structure elements
-# /**
-#  * function_name(:)? (- short description)?
-# (* @parameterx: (description of parameter x)?)*
-# (* a blank line)?
-#  * (Description:)? (Description of function)?
-#  * (section header: (section description)? )*
-#  (*)?*/
-#
-# So .. the trivial example would be:
-#
-# /**
-#  * my_function
-#  */
-#
-# If the Description: header tag is omitted, then there must be a blank line
-# after the last parameter specification.
-# e.g.
-# /**
-#  * my_function - does my stuff
-#  * @my_arg: its mine damnit
-#  *
-#  * Does my stuff explained.
-#  */
-#
-#  or, could also use:
-# /**
-#  * my_function - does my stuff
-#  * @my_arg: its mine damnit
-#  * Description: Does my stuff explained.
-#  */
-# etc.
-#
-# Besides functions you can also write documentation for structs, unions,
-# enums and typedefs. Instead of the function name you must write the name
-# of the declaration;  the struct/union/enum/typedef must always precede
-# the name. Nesting of declarations is not supported.
-# Use the argument mechanism to document members or constants.
-# e.g.
-# /**
-#  * struct my_struct - short description
-#  * @a: first member
-#  * @b: second member
-#  *
-#  * Longer description
-#  */
-# struct my_struct {
-#     int a;
-#     int b;
-# /* private: */
-#     int c;
-# };
-#
-# All descriptions can be multiline, except the short function description.
-#
-# For really longs structs, you can also describe arguments inside the
-# body of the struct.
-# eg.
-# /**
-#  * struct my_struct - short description
-#  * @a: first member
-#  * @b: second member
-#  *
-#  * Longer description
-#  */
-# struct my_struct {
-#     int a;
-#     int b;
-#     /**
-#      * @c: This is longer description of C
-#      *
-#      * You can use paragraphs to describe arguments
-#      * using this method.
-#      */
-#     int c;
-# };
-#
-# This should be use only for struct/enum members.
-#
-# You can also add additional sections. When documenting kernel functions you
-# should document the "Context:" of the function, e.g. whether the functions
-# can be called form interrupts. Unlike other sections you can end it with an
-# empty line.
-# A non-void function should have a "Return:" section describing the return
-# value(s).
-# Example-sections should contain the string EXAMPLE so that they are marked
-# appropriately in DocBook.
-#
-# Example:
-# /**
-#  * user_function - function that can only be called in user context
-#  * @a: some argument
-#  * Context: !in_interrupt()
-#  *
-#  * Some description
-#  * Example:
-#  *    user_function(22);
-#  */
-# ...
-#
-#
-# All descriptive text is further processed, scanning for the following special
-# patterns, which are highlighted appropriately.
-#
-# 'funcname()' - function
-# '$ENVVAR' - environmental variable
-# '&struct_name' - name of a structure (up to two words including 'struct')
-# '&struct_name.member' - name of a structure member
-# '@parameter' - name of a parameter
-# '%CONST' - name of a constant.
-# '``LITERAL``' - literal string without any spaces on it.
-
 ## init lots of data
 
 my $errors = 0;
-- 
GitLab


From 2b306ecaf57b2b5004dcb671a46ef24a1c369db2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Warnie=C5=82=C5=82o?= <tomasz.warniello@gmail.com>
Date: Fri, 18 Feb 2022 19:16:28 +0100
Subject: [PATCH 0633/1586] scripts: kernel-doc: Refresh the copyright lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I wanted to clean up these lines, but in the end decided not to touch
the old ones and just add my own about POD. I'll leave the cleanup
for lawyers.

Signed-off-by: Tomasz Warniełło <tomasz.warniello@gmail.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Disliked-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220218181628.1411551-12-tomasz.warniello@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index a5a397e22ea7b..f06f68f3c3d9b 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -12,6 +12,8 @@ use strict;
 ## 								 ##
 ## #define enhancements by Armin Kuster <akuster@mvista.com>	 ##
 ## Copyright (c) 2000 MontaVista Software, Inc.			 ##
+#
+# Copyright (C) 2022 Tomasz Warniełło (POD)
 
 use Pod::Usage qw/pod2usage/;
 
-- 
GitLab


From e334f873eb4e1638dd0b45200d2d8838a13b0cac Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Thu, 24 Feb 2022 22:02:46 +0900
Subject: [PATCH 0634/1586] docs: scripts/kernel-doc: Detect absence of FILE
 arg

Currently, when there is no FILE argument following a switch such
as -man, -rst, or -none, kernel-doc exits with a warning from perl
(long msg folded):

    Use of uninitialized value $ARGV[0] in pattern match (m//)
    at ./scripts/kernel-doc line 438.

, which is unhelpful.

Improve the behavior by adding a check at the bottom of parsing
loop.
If the argument is absent, display help text and exit with
the code of 1 (via usage()).

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/7b136049-a3ba-0eb5-8717-364d773ff914@gmail.com
[jc: reworked to fix conflict with pod patches]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index f06f68f3c3d9b..9c084a2ba3b05 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -343,14 +343,23 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
 	    die "Sphinx version should either major.minor or major.minor.patch format\n";
 	}
     } else {
-		# Unknown argument
-		pod2usage(
-			-message => "Argument unknown!\n",
-			-exitval => 1,
-			-verbose => 99,
-			-sections => 'SYNOPSIS',
-			-output => \*STDERR,
-		);
+	# Unknown argument
+	pod2usage(
+	    -message => "Argument unknown!\n",
+	    -exitval => 1,
+	    -verbose => 99,
+	    -sections => 'SYNOPSIS',
+	    -output => \*STDERR,
+	    );
+    }
+    if ($#ARGV < 0){
+	pod2usage(
+	    -message => "FILE argument missing\n",
+	    -exitval => 1,
+	    -verbose => 99,
+	    -sections => 'SYNOPSIS',
+	    -output => \*STDERR,
+	    );
     }
 }
 
-- 
GitLab


From 024314d6d540a24cfe029421ad7e97e1d6e886b2 Mon Sep 17 00:00:00 2001
From: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Date: Wed, 23 Feb 2022 21:41:04 +0800
Subject: [PATCH 0635/1586] Documentation/vm/page_owner.rst: fix language

There are some words that need to be fixed.

Thanks for Shuah Khan's constructive suggestions.
The text has been fixed as follows.

a. So, if you'd like to use it, you need
to add "page_owner=on" into your boot cmdline.

Here, "into" has been replaced with "to".

b. ...page owner is disabled in runtime due to no
enabling, boot option, runtime overhead is marginal.

Here, "no" has been replaced with "not".

Signed-off-by: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20220223134104.2663-1-caoyixuan2019@email.szu.edu.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/page_owner.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
index 9837fc8147dd6..bc28edaf3de1f 100644
--- a/Documentation/vm/page_owner.rst
+++ b/Documentation/vm/page_owner.rst
@@ -26,9 +26,9 @@ fragmentation statistics can be obtained through gfp flag information of
 each page. It is already implemented and activated if page owner is
 enabled. Other usages are more than welcome.
 
-page owner is disabled in default. So, if you'd like to use it, you need
-to add "page_owner=on" into your boot cmdline. If the kernel is built
-with page owner and page owner is disabled in runtime due to no enabling
+page owner is disabled by default. So, if you'd like to use it, you need
+to add "page_owner=on" to your boot cmdline. If the kernel is built
+with page owner and page owner is disabled in runtime due to not enabling
 boot option, runtime overhead is marginal. If disabled in runtime, it
 doesn't require memory to store owner information, so there is no runtime
 memory overhead. And, page owner inserts just two unlikely branches into
-- 
GitLab


From b089f167c384c7ca0adc50c78dd7a98f747d5e03 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 22 Feb 2022 10:27:51 +0900
Subject: [PATCH 0636/1586] Documentation: block/diskstats: update function
 names

__make_request() and end_that_request_last() do no longer exist. Replace
them with the current call-site.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviwed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220222012751.1933194-1-naohiro.aota@wdc.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/iostats.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 9b14b0c2c9c45..609a3201fd4e1 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -76,7 +76,7 @@ Field  3 -- # of sectors read (unsigned long)
 
 Field  4 -- # of milliseconds spent reading (unsigned int)
     This is the total number of milliseconds spent by all reads (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field  5 -- # of writes completed (unsigned long)
     This is the total number of writes completed successfully.
@@ -89,7 +89,7 @@ Field  7 -- # of sectors written (unsigned long)
 
 Field  8 -- # of milliseconds spent writing (unsigned int)
     This is the total number of milliseconds spent by all writes (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field  9 -- # of I/Os currently in progress (unsigned int)
     The only field that should go to zero. Incremented as requests are
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
 
 Field 15 -- # of milliseconds spent discarding (unsigned int)
     This is the total number of milliseconds spent by all discards (as
-    measured from __make_request() to end_that_request_last()).
+    measured from blk_mq_alloc_request() to __blk_mq_end_request()).
 
 Field 16 -- # of flush requests completed
     This is the total number of flush requests completed successfully.
-- 
GitLab


From 398f7abdcb7e2307facebcbdae5639f7d35916cd Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Fri, 18 Feb 2022 23:11:17 +0900
Subject: [PATCH 0637/1586] docs: pdfdocs: Pull LaTeX preamble part out of
 conf.py

Quote from Jon's remark [1]:

    I do notice that Documentation/conf.py is getting large and
    unapproachable.  At some future point, it might be nice to pull
    all of the latex stuff out into a separate file where it won't
    scare people who stumble into it by accident.

Pull LaTeX preamble settings added since commit 3b4c963243b1 ("docs:
conf.py: adjust the LaTeX document output") out into
sphinx/kerneldoc-preamble.sty.

It will be copied to the build directory by the added
"latex_additional_files" setting in conf.py.

As a bonus, LaTeX/TeX code can be maintained without escaping backslashes.

To compensate the loss of change history in sphinx/kerneldoc-preamble.sty,
here is a list of changes made in conf.py:

  - f7ebe6b76940 ("docs: Activate exCJK only in CJK chapters")
  - 0afd4df0d16a ("docs: pdfdocs: Prevent column squeezing by tabulary")
  - 659653c9e546 ("docs: pdfdocs: Refactor config for CJK document")
  - e291ff6f5a03 ("docs: pdfdocs: Add CJK-language-specific font settings")
  - 7eb368cc319b ("docs: pdfdocs: Choose Serif font as CJK mainfont if possible")
  - 35382965bdd2 ("docs: pdfdocs: Preserve inter-phrase space in Korean translations")
  - 77abc2c230b1 ("docs: pdfdocs: One-half spacing for CJK translations")
  - 788d28a25799 ("docs: pdfdocs: Permit AutoFakeSlant for CJK fonts")
  - 29ac9822358f ("docs: pdfdocs: Teach xeCJK about character classes of quotation marks")
  - 7c5c18bdb656 ("docs: pdfdocs: Fix typo in CJK-language specific font settings")
  - aa872e0647dc ("docs: pdfdocs: Adjust \headheight for fancyhdr")
  - 8716ef413aa5 ("docs: pdfdocs: Tweak width params of TOC")
  - 66939df53948 ("docs: pdfdocs: Switch default CJK font to KR variants")
  - 7b686a2ea1e4 ("docs: pdfdocs: Enable CJKspace in TOC for Korean titles")
  - 5d9158e3c762 ("docs/translations: Skip CJK contents if suitable fonts not found")
  - b774cc46313b ("docs: pdfdocs: Move CJK monospace font setting to main conf.py")

[1]: https://lore.kernel.org/all/87zgmr66cn.fsf@meer.lwn.net/

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/aaa9dca1-27c0-c414-77f3-c5587db0cc5b@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py                       | 184 ++------------------
 Documentation/sphinx/kerneldoc-preamble.sty | 184 ++++++++++++++++++++
 2 files changed, 197 insertions(+), 171 deletions(-)
 create mode 100644 Documentation/sphinx/kerneldoc-preamble.sty

diff --git a/Documentation/conf.py b/Documentation/conf.py
index fb8f69fc4d38c..072ee31a301dc 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -409,188 +409,25 @@ latex_elements = {
 
     # Additional stuff for the LaTeX preamble.
     'preamble': '''
-	% Custom width parameters for TOC --- Redefine low-level commands
-	% defined in report.cls
-	\\makeatletter
-	%% Redefine \\@pnumwidth (page number width)
-	\\renewcommand*\\@pnumwidth{2.7em}
-	%% Redefine \\l@chapter (chapter list entry)
-	\\renewcommand*\\l@chapter[2]{%
-	  \\ifnum \\c@tocdepth >\\m@ne
-	    \\addpenalty{-\\@highpenalty}%
-	    \\vskip 1.0em \\@plus\\p@
-	    \\setlength\\@tempdima{1.8em}%
-	    \\begingroup
-	      \\parindent \\z@ \\rightskip \\@pnumwidth
-	      \\parfillskip -\\@pnumwidth
-	      \\leavevmode \\bfseries
-	      \\advance\\leftskip\\@tempdima
-	      \\hskip -\\leftskip
-	      #1\\nobreak\\hfil
-	      \\nobreak\\hb@xt@\\@pnumwidth{\\hss #2%
-	                                 \\kern-\\p@\\kern\\p@}\\par
-	      \\penalty\\@highpenalty
-	    \\endgroup
-	  \\fi}
-	%% Redefine \\l@section and \\l@subsection
-	\\renewcommand*\\l@section{\\@dottedtocline{1}{1.8em}{3.2em}}
-	\\renewcommand*\\l@subsection{\\@dottedtocline{2}{5em}{4.3em}}
-	\\makeatother
-	%% Sphinx < 1.8 doesn't have \\sphinxtableofcontentshook
-	\\providecommand{\\sphinxtableofcontentshook}{}
-	%% Undefine it for compatibility with Sphinx 1.7.9
-	\\renewcommand{\\sphinxtableofcontentshook}{} % Empty the hook
-	% Prevent column squeezing of tabulary.
-	\\setlength{\\tymin}{20em}
         % Use some font with UTF-8 support with XeLaTeX
         \\usepackage{fontspec}
         \\setsansfont{DejaVu Sans}
         \\setromanfont{DejaVu Serif}
         \\setmonofont{DejaVu Sans Mono}
-        % Adjust \\headheight for fancyhdr
-        \\addtolength{\\headheight}{1.6pt}
-        \\addtolength{\\topmargin}{-1.6pt}
-     ''',
+    ''',
 }
 
-# Translations have Asian (CJK) characters which are only displayed if
-# xeCJK is used
-
-latex_elements['preamble']  += '''
-    \\IfFontExistsTF{Noto Sans CJK SC}{
-	% This is needed for translations
-	\\usepackage{xeCJK}
-	\\IfFontExistsTF{Noto Serif CJK KR}{
-	    \\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
-	}{
-	    \\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant]
-	}
-	\\setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
-	\\setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
-	\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
-	\\xeCJKDeclareCharClass{HalfRight}{`”,`’}
-	% CJK Language-specific font choices
-	\\IfFontExistsTF{Noto Serif CJK SC}{
-	    \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
-	    \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
-	    \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
-	\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK TC}{
-	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
-	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
-	    \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
-	\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK KR}{
-	    \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
-	    \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
-	    \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
-	\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
-	\\IfFontExistsTF{Noto Serif CJK JP}{
-	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
-	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
-	}{
-	    \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
-	    \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
-	}
-	\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
-	\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
-	% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
-	\\providecommand{\\onehalfspacing}{}
-	\\providecommand{\\singlespacing}{}
-	% Define custom macros to on/off CJK
-	\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
-	\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
-	\\newcommand{\\kerneldocBeginSC}{%
-	    \\begingroup%
-	    \\scmain%
-	    \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{FullRight}{`”,`’}%
-	    \\renewcommand{\\CJKrmdefault}{SCserif}%
-	    \\renewcommand{\\CJKsfdefault}{SCsans}%
-	    \\renewcommand{\\CJKttdefault}{SCmono}%
-	    \\xeCJKsetup{CJKspace = false}%
-	    % For CJK ascii-art alignment
-	    \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
-	}
-	\\newcommand{\\kerneldocEndSC}{\\endgroup}
-	\\newcommand{\\kerneldocBeginTC}{%
-	    \\begingroup%
-	    \\tcmain%
-	    \\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
-	    \\xeCJKDeclareCharClass{FullRight}{`”,`’}%
-	    \\renewcommand{\\CJKrmdefault}{TCserif}%
-	    \\renewcommand{\\CJKsfdefault}{TCsans}%
-	    \\renewcommand{\\CJKttdefault}{TCmono}%
-	    \\xeCJKsetup{CJKspace = false}%
-	    % For CJK ascii-art alignment
-	    \\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
-	}
-	\\newcommand{\\kerneldocEndTC}{\\endgroup}
-	\\newcommand{\\kerneldocBeginKR}{%
-	    \\begingroup%
-	    \\krmain%
-	    \\renewcommand{\\CJKrmdefault}{KRserif}%
-	    \\renewcommand{\\CJKsfdefault}{KRsans}%
-	    \\renewcommand{\\CJKttdefault}{KRmono}%
-	    % \\xeCJKsetup{CJKspace = true} % true by default
-	    % For CJK ascii-art alignment (still misaligned for Hangul)
-	    \\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
-	}
-	\\newcommand{\\kerneldocEndKR}{\\endgroup}
-	\\newcommand{\\kerneldocBeginJP}{%
-	    \\begingroup%
-	    \\jpmain%
-	    \\renewcommand{\\CJKrmdefault}{JPserif}%
-	    \\renewcommand{\\CJKsfdefault}{JPsans}%
-	    \\renewcommand{\\CJKttdefault}{JPmono}%
-	    \\xeCJKsetup{CJKspace = false}%
-	    % For CJK ascii-art alignment
-	    \\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
-	}
-	\\newcommand{\\kerneldocEndJP}{\\endgroup}
-	% Single spacing in literal blocks
-	\\fvset{baselinestretch=1}
-	% To customize \\sphinxtableofcontents
-	\\usepackage{etoolbox}
-	% Inactivate CJK after tableofcontents
-	\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
-	\\xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC
-    }{ % No CJK font found
-	% Custom macros to on/off CJK (Dummy)
-	\\newcommand{\\kerneldocCJKon}{}
-	\\newcommand{\\kerneldocCJKoff}{}
-	\\newcommand{\\kerneldocBeginSC}[1]{%
-	    \\begin{sphinxadmonition}{note}{Note:}
-		``Noto Sans CJK'' fonts are not found while building this PDF\\@.
-		Translations of zh\\_CN, zh\\_TW, ko\\_KR, and ja\\_JP are
-		skipped.
-	    \\end{sphinxadmonition}}
-	\\newcommand{\\kerneldocEndSC}{}
-	\\newcommand{\\kerneldocBeginTC}[1]{}
-	\\newcommand{\\kerneldocEndTC}{}
-	\\newcommand{\\kerneldocBeginKR}[1]{}
-	\\newcommand{\\kerneldocEndKR}{}
-	\\newcommand{\\kerneldocBeginJP}[1]{}
-	\\newcommand{\\kerneldocEndJP}{}
-    }
-'''
-
 # Fix reference escape troubles with Sphinx 1.4.x
 if major == 1:
     latex_elements['preamble']  += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
 
+
+# Load kerneldoc specific LaTeX settings
+latex_elements['preamble'] += '''
+        % Load kerneldoc specific LaTeX settings
+	\\input{kerneldoc-preamble.sty}
+'''
+
 # With Sphinx 1.6, it is possible to change the Bg color directly
 # by using:
 #	\definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
@@ -652,6 +489,11 @@ for fn in os.listdir('.'):
 # If false, no module index is generated.
 #latex_domain_indices = True
 
+# Additional LaTeX stuff to be copied to build directory
+latex_additional_files = [
+    'sphinx/kerneldoc-preamble.sty',
+]
+
 
 # -- Options for manual page output ---------------------------------------
 
diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
new file mode 100644
index 0000000000000..4e56ccea1dbd7
--- /dev/null
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -0,0 +1,184 @@
+% -*- coding: utf-8 -*-
+% SPDX-License-Identifier: GPL-2.0
+%
+% LaTeX preamble for "make latexdocs" or "make pdfdocs" including:
+%   - TOC width settings
+%   - Setting of tabulary (\tymin)
+%   - Headheight setting for fancyhdr
+%   - Fontfamily settings for CJK (Chinese, Japanese, and Korean) translations
+%
+% Note on the suffix of .sty:
+%   This is not implemented as a LaTeX style file, but as a file containing
+%   plain LaTeX code to be included into preamble.
+%   ".sty" is chosen because ".tex" would cause the build scripts to confuse
+%   this file with a LaTeX main file.
+%
+% Copyright (C) 2022  Akira Yokosawa
+
+% Custom width parameters for TOC --- Redefine low-level commands
+% defined in report.cls
+\makeatletter
+%% Redefine \@pnumwidth (page number width)
+\renewcommand*\@pnumwidth{2.7em}
+%% Redefine \l@chapter (chapter list entry)
+\renewcommand*\l@chapter[2]{%
+  \ifnum \c@tocdepth >\m@ne
+    \addpenalty{-\@highpenalty}%
+    \vskip 1.0em \@plus\p@
+    \setlength\@tempdima{1.8em}%
+    \begingroup
+      \parindent \z@ \rightskip \@pnumwidth
+      \parfillskip -\@pnumwidth
+      \leavevmode \bfseries
+      \advance\leftskip\@tempdima
+      \hskip -\leftskip
+      #1\nobreak\hfil
+      \nobreak\hb@xt@\@pnumwidth{\hss #2%
+                                 \kern-\p@\kern\p@}\par
+      \penalty\@highpenalty
+    \endgroup
+  \fi}
+%% Redefine \l@section and \l@subsection
+\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}}
+\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}}
+\makeatother
+%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook
+\providecommand{\sphinxtableofcontentshook}{}
+%% Undefine it for compatibility with Sphinx 1.7.9
+\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook
+% Prevent column squeezing of tabulary.
+\setlength{\tymin}{20em}
+
+% Adjust \headheight for fancyhdr
+\addtolength{\headheight}{1.6pt}
+\addtolength{\topmargin}{-1.6pt}
+
+% Translations have Asian (CJK) characters which are only displayed if
+% xeCJK is used
+\IfFontExistsTF{Noto Sans CJK SC}{
+    % This is needed for translations
+    \usepackage{xeCJK}
+    \IfFontExistsTF{Noto Serif CJK KR}{
+	\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
+    }{
+	\setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant]
+    }
+    \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
+    \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    \xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
+    \xeCJKDeclareCharClass{HalfRight}{`”,`’}
+    % CJK Language-specific font choices
+    \IfFontExistsTF{Noto Serif CJK SC}{
+	\newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant]
+	\newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[SCmain]\scmain{Noto Sans CJK SC}[AutoFakeSlant]
+	\newCJKfontfamily[SCserif]\scserif{Noto Sans CJK SC}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant]
+    \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
+    \IfFontExistsTF{Noto Serif CJK TC}{
+	\newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
+	\newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[TCmain]\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
+	\newCJKfontfamily[TCserif]\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
+    \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
+    \IfFontExistsTF{Noto Serif CJK KR}{
+	\newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant]
+	\newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[KRmain]\krmain{Noto Sans CJK KR}[AutoFakeSlant]
+	\newCJKfontfamily[KRserif]\krserif{Noto Sans CJK KR}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant]
+    \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    \IfFontExistsTF{Noto Serif CJK JP}{
+	\newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
+	\newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
+    }{
+	\newCJKfontfamily[JPmain]\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
+	\newCJKfontfamily[JPserif]\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
+    }
+    \newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
+    \newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
+    % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
+    \providecommand{\onehalfspacing}{}
+    \providecommand{\singlespacing}{}
+    % Define custom macros to on/off CJK
+    \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing}
+    \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing}
+    \newcommand{\kerneldocBeginSC}{%
+	\begingroup%
+	\scmain%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}%
+	\renewcommand{\CJKrmdefault}{SCserif}%
+	\renewcommand{\CJKsfdefault}{SCsans}%
+	\renewcommand{\CJKttdefault}{SCmono}%
+	\xeCJKsetup{CJKspace = false}%
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndSC}{\endgroup}
+    \newcommand{\kerneldocBeginTC}{%
+	\begingroup%
+	\tcmain%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}%
+	\renewcommand{\CJKrmdefault}{TCserif}%
+	\renewcommand{\CJKsfdefault}{TCsans}%
+	\renewcommand{\CJKttdefault}{TCmono}%
+	\xeCJKsetup{CJKspace = false}%
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndTC}{\endgroup}
+    \newcommand{\kerneldocBeginKR}{%
+	\begingroup%
+	\krmain%
+	\renewcommand{\CJKrmdefault}{KRserif}%
+	\renewcommand{\CJKsfdefault}{KRsans}%
+	\renewcommand{\CJKttdefault}{KRmono}%
+	% \xeCJKsetup{CJKspace = true} % true by default
+	% For CJK ascii-art alignment (still misaligned for Hangul)
+	\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndKR}{\endgroup}
+    \newcommand{\kerneldocBeginJP}{%
+	\begingroup%
+	\jpmain%
+	\renewcommand{\CJKrmdefault}{JPserif}%
+	\renewcommand{\CJKsfdefault}{JPsans}%
+	\renewcommand{\CJKttdefault}{JPmono}%
+	\xeCJKsetup{CJKspace = false}%
+	% For CJK ascii-art alignment
+	\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
+    }
+    \newcommand{\kerneldocEndJP}{\endgroup}
+    % Single spacing in literal blocks
+    \fvset{baselinestretch=1}
+    % To customize \sphinxtableofcontents
+    \usepackage{etoolbox}
+    % Inactivate CJK after tableofcontents
+    \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{}
+    \xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC
+}{ % No CJK font found
+    % Custom macros to on/off CJK (Dummy)
+    \newcommand{\kerneldocCJKon}{}
+    \newcommand{\kerneldocCJKoff}{}
+    \newcommand{\kerneldocBeginSC}[1]{%
+	\begin{sphinxadmonition}{note}{Note:}
+	    ``Noto Sans CJK'' fonts are not found while building this PDF\@.
+	    Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped.
+	\end{sphinxadmonition}}
+    \newcommand{\kerneldocEndSC}{}
+    \newcommand{\kerneldocBeginTC}[1]{}
+    \newcommand{\kerneldocEndTC}{}
+    \newcommand{\kerneldocBeginKR}[1]{}
+    \newcommand{\kerneldocEndKR}{}
+    \newcommand{\kerneldocBeginJP}[1]{}
+    \newcommand{\kerneldocEndJP}{}
+}
-- 
GitLab


From 7cee33ce237cef25cbd782783a7e0241ef6d0776 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Fri, 18 Feb 2022 23:13:41 +0900
Subject: [PATCH 0638/1586] docs: kerneldoc-preamble.sty: Expand comments in
 LaTeX code

Expand comments in LaTeX code and mention some of important points
told in changelogs of conf.py changes.
Hopefully they can help future contributors in this area.

No code change involved.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/bce9261b-1950-3146-07b2-07bd2ec79158@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kerneldoc-preamble.sty | 61 ++++++++++++++++-----
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
index 4e56ccea1dbd7..5e59900e994d9 100644
--- a/Documentation/sphinx/kerneldoc-preamble.sty
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -15,8 +15,20 @@
 %
 % Copyright (C) 2022  Akira Yokosawa
 
-% Custom width parameters for TOC --- Redefine low-level commands
-% defined in report.cls
+% Custom width parameters for TOC
+%  - Redefine low-level commands defined in report.cls.
+%  - Indent of 2 chars is preserved for ease of comparison.
+% Summary of changes from default params:
+%   Width of page number (\@pnumwidth): 1.55em -> 2.7em
+%   Width of chapter number:            1.5em  -> 1.8em
+%   Indent of section number:           1.5em  -> 1.8em
+%   Width of section number:            2.6em  -> 3.2em
+%   Indent of sebsection number:        4.1em  -> 5em
+%   Width of subsection number:         3.5em  -> 4.3em
+%
+% These params can have 4 digit page counts, 2 digit chapter counts,
+% section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts
+% of 5 digits + 2 periods (e.g., 18.7.13).
 \makeatletter
 %% Redefine \@pnumwidth (page number width)
 \renewcommand*\@pnumwidth{2.7em}
@@ -46,7 +58,10 @@
 \providecommand{\sphinxtableofcontentshook}{}
 %% Undefine it for compatibility with Sphinx 1.7.9
 \renewcommand{\sphinxtableofcontentshook}{} % Empty the hook
-% Prevent column squeezing of tabulary.
+
+% Prevent column squeezing of tabulary.  \tymin is set by Sphinx as:
+%   \setlength{\tymin}{3\fontcharwd\font`0 }
+% , which is too short.
 \setlength{\tymin}{20em}
 
 % Adjust \headheight for fancyhdr
@@ -56,8 +71,12 @@
 % Translations have Asian (CJK) characters which are only displayed if
 % xeCJK is used
 \IfFontExistsTF{Noto Sans CJK SC}{
-    % This is needed for translations
+    % Load xeCJK when CJK font is available
     \usepackage{xeCJK}
+    % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits
+    % its emulation.
+    % Select KR variant at the beginning of each document so that quotation
+    % and apostorph symbols of half-width is used in TOC of Latin documents.
     \IfFontExistsTF{Noto Serif CJK KR}{
 	\setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant]
     }{
@@ -65,9 +84,11 @@
     }
     \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant]
     \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    % Teach xeCJK of half-width symbols
     \xeCJKDeclareCharClass{HalfLeft}{`“,`‘}
     \xeCJKDeclareCharClass{HalfRight}{`”,`’}
     % CJK Language-specific font choices
+    %% for Simplified Chinese
     \IfFontExistsTF{Noto Serif CJK SC}{
 	\newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant]
 	\newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant]
@@ -77,6 +98,7 @@
     }
     \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant]
     \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
+    %% for Traditional Chinese
     \IfFontExistsTF{Noto Serif CJK TC}{
 	\newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
 	\newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
@@ -86,6 +108,7 @@
     }
     \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
     \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
+    %% for Korean
     \IfFontExistsTF{Noto Serif CJK KR}{
 	\newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant]
 	\newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant]
@@ -95,6 +118,7 @@
     }
     \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant]
     \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
+    %% for Japanese
     \IfFontExistsTF{Noto Serif CJK JP}{
 	\newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
 	\newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
@@ -108,34 +132,39 @@
     \providecommand{\onehalfspacing}{}
     \providecommand{\singlespacing}{}
     % Define custom macros to on/off CJK
+    %% One and half spacing for CJK contents
     \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing}
     \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing}
+    % Define custom macros for switching CJK font setting
+    %% for Simplified Chinese
     \newcommand{\kerneldocBeginSC}{%
 	\begingroup%
 	\scmain%
-	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
-	\xeCJKDeclareCharClass{FullRight}{`”,`’}%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in SC
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in SC
 	\renewcommand{\CJKrmdefault}{SCserif}%
 	\renewcommand{\CJKsfdefault}{SCsans}%
 	\renewcommand{\CJKttdefault}{SCmono}%
-	\xeCJKsetup{CJKspace = false}%
+	\xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
 	% For CJK ascii-art alignment
 	\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]%
     }
     \newcommand{\kerneldocEndSC}{\endgroup}
+    %% for Traditional Chinese
     \newcommand{\kerneldocBeginTC}{%
 	\begingroup%
 	\tcmain%
-	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}%
-	\xeCJKDeclareCharClass{FullRight}{`”,`’}%
+	\xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in TC
+	\xeCJKDeclareCharClass{FullRight}{`”,`’}% Full-width in TC
 	\renewcommand{\CJKrmdefault}{TCserif}%
 	\renewcommand{\CJKsfdefault}{TCsans}%
 	\renewcommand{\CJKttdefault}{TCmono}%
-	\xeCJKsetup{CJKspace = false}%
+	\xeCJKsetup{CJKspace = false}% gobble white spaces by ' '
 	% For CJK ascii-art alignment
 	\setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]%
     }
     \newcommand{\kerneldocEndTC}{\endgroup}
+    %% for Korean
     \newcommand{\kerneldocBeginKR}{%
 	\begingroup%
 	\krmain%
@@ -147,29 +176,35 @@
 	\setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]%
     }
     \newcommand{\kerneldocEndKR}{\endgroup}
+    %% for Japanese
     \newcommand{\kerneldocBeginJP}{%
 	\begingroup%
 	\jpmain%
 	\renewcommand{\CJKrmdefault}{JPserif}%
 	\renewcommand{\CJKsfdefault}{JPsans}%
 	\renewcommand{\CJKttdefault}{JPmono}%
-	\xeCJKsetup{CJKspace = false}%
+	\xeCJKsetup{CJKspace = false}% gobble white space by ' '
 	% For CJK ascii-art alignment
 	\setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]%
     }
     \newcommand{\kerneldocEndJP}{\endgroup}
+
     % Single spacing in literal blocks
     \fvset{baselinestretch=1}
     % To customize \sphinxtableofcontents
     \usepackage{etoolbox}
     % Inactivate CJK after tableofcontents
     \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{}
-    \xeCJKsetup{CJKspace = true} % For inter-phrase space of Korean TOC
+    \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC
 }{ % No CJK font found
-    % Custom macros to on/off CJK (Dummy)
+    % Custom macros to on/off CJK and switch CJK fonts (Dummy)
     \newcommand{\kerneldocCJKon}{}
     \newcommand{\kerneldocCJKoff}{}
+    %% By defining \kerneldocBegin(SC|TC|KR|JP) as commands with an argument
+    %% and ignore the argument (#1) in their definitions, whole contents of
+    %% CJK chapters can be ignored.
     \newcommand{\kerneldocBeginSC}[1]{%
+	%% Put a note on missing CJK fonts in place of zh_CN translation.
 	\begin{sphinxadmonition}{note}{Note:}
 	    ``Noto Sans CJK'' fonts are not found while building this PDF\@.
 	    Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped.
-- 
GitLab


From 96c7f3b53cc34d8d11ce0b8c6baabba1c5ee16e0 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Fri, 18 Feb 2022 23:26:34 +0900
Subject: [PATCH 0639/1586] Reword note on missing CJK fonts

Use past tense as the fonts can be installed after the fact.
Add suggestion to install "Noto Sans CJK" and "Noto Serif CJK"
font families. ("Noto Serif CJK" is optional.)

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/dfefa601-c58d-c86c-953f-5e4454db9409@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/kerneldoc-preamble.sty | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty
index 5e59900e994d9..9d0204dc38be2 100644
--- a/Documentation/sphinx/kerneldoc-preamble.sty
+++ b/Documentation/sphinx/kerneldoc-preamble.sty
@@ -205,9 +205,16 @@
     %% CJK chapters can be ignored.
     \newcommand{\kerneldocBeginSC}[1]{%
 	%% Put a note on missing CJK fonts in place of zh_CN translation.
-	\begin{sphinxadmonition}{note}{Note:}
-	    ``Noto Sans CJK'' fonts are not found while building this PDF\@.
-	    Translations of zh\_CN, zh\_TW, ko\_KR, and ja\_JP are skipped.
+	\begin{sphinxadmonition}{note}{Note on missing fonts:}
+	    Translations of Simplified Chinese (zh\_CN), Traditional Chinese
+	    (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped
+	    due to the lack of suitable font families.
+
+	    If you want them, please install ``Noto Sans CJK'' font families
+	    by following instructions from
+	    \sphinxcode{./scripts/sphinx-pre-install}.
+	    Having optional ``Noto Serif CJK'' font families will improve
+	    the looks of those translations.
 	\end{sphinxadmonition}}
     \newcommand{\kerneldocEndSC}{}
     \newcommand{\kerneldocBeginTC}[1]{}
-- 
GitLab


From 18ab307823bb643fc985d316448f2d70eb1cb7c3 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 24 Feb 2022 12:35:07 -0700
Subject: [PATCH 0640/1586] docs: fix RST error in vm/page_owner.rst

Commit f7df2b1cf03a ("tools/vm/page_owner_sort.c: count and sort by mem")
added a literal text block without the necessary markup, leading to these
warnings in the docs build:

  Documentation/vm/page_owner.rst:92: WARNING: Unexpected indentation.
  Documentation/vm/page_owner.rst:96: WARNING: Unexpected indentation.
  Documentation/vm/page_owner.rst:107: WARNING: Unexpected indentation.

Add the necessary colons and make the build quieter.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/page_owner.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
index bc28edaf3de1f..905555e3e4836 100644
--- a/Documentation/vm/page_owner.rst
+++ b/Documentation/vm/page_owner.rst
@@ -85,7 +85,7 @@ Usage
 	cat /sys/kernel/debug/page_owner > page_owner_full.txt
 	./page_owner_sort page_owner_full.txt sorted_page_owner.txt
 
-   The general output of ``page_owner_full.txt`` is as follows:
+   The general output of ``page_owner_full.txt`` is as follows::
 
 	Page allocated via order XXX, ...
 	PFN XXX ...
@@ -100,7 +100,7 @@ Usage
    and pages of buf, and finally sorts them according to the times.
 
    See the result about who allocated each page
-   in the ``sorted_page_owner.txt``. General output:
+   in the ``sorted_page_owner.txt``. General output::
 
 	XXX times, XXX pages:
 	Page allocated via order XXX, ...
-- 
GitLab


From 1ecf393fc5a5962ebbe8d011dede6cab880f349b Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Wed, 16 Feb 2022 07:51:33 +0100
Subject: [PATCH 0641/1586] docs: add two documents about regression handling

Create two documents explaining various aspects around regression
handling and tracking; one is aimed at users, the other targets
developers.

The texts among others describes the first rule of Linux kernel
development and what it means in practice. They also explain what a
regression actually is and how to report one properly.

Both texts additionally provide a brief introduction to the bot the
kernel's regression tracker uses to facilitate the work, but mention the
use is optional.

To sum things up, provide a few quotes from Linus in the document for
developers to show how serious we take regressions.

Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
Link: https://lore.kernel.org/r/34e56d3588f22d7e0b4d635ef9c9c3b33ca4ac04.1644994117.git.linux@leemhuis.info
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/index.rst           |   1 +
 .../admin-guide/reporting-regressions.rst     | 439 ++++++++++++
 .../process/handling-regressions.rst          | 659 ++++++++++++++++++
 Documentation/process/index.rst               |   1 +
 MAINTAINERS                                   |   2 +
 5 files changed, 1102 insertions(+)
 create mode 100644 Documentation/admin-guide/reporting-regressions.rst
 create mode 100644 Documentation/process/handling-regressions.rst

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 1bedab498104a..5bfafcbb9562c 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -35,6 +35,7 @@ problems and bugs in particular.
    :maxdepth: 1
 
    reporting-issues
+   reporting-regressions
    security-bugs
    bug-hunting
    bug-bisect
diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
new file mode 100644
index 0000000000000..6fbd24ceb3bfe
--- /dev/null
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -0,0 +1,439 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+Reporting regressions
++++++++++++++++++++++
+
+"*We don't cause regressions*" is the first rule of Linux kernel development;
+Linux founder and lead developer Linus Torvalds established it himself and
+ensures it's obeyed.
+
+This document describes what the rule means for users and how the Linux kernel's
+development model ensures to address all reported regressions; aspects relevant
+for kernel developers are left to Documentation/process/handling-regressions.rst.
+
+
+The important bits (aka "TL;DR")
+================================
+
+#. It's a regression if something running fine with one Linux kernel works worse
+   or not at all with a newer version. Note, the newer kernel has to be compiled
+   using a similar configuration; the detailed explanations below describes this
+   and other fine print in more detail.
+
+#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
+   it already covers all aspects important for regressions and repeated
+   below for convenience. Two of them are important: start your report's subject
+   with "[REGRESSION]" and CC or forward it to `the regression mailing list
+   <https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
+
+#. Optional, but recommended: when sending or forwarding your report, make the
+   Linux kernel regression tracking bot "regzbot" track the issue by specifying
+   when the regression started like this::
+
+       #regzbot introduced v5.13..v5.14-rc1
+
+
+All the details on Linux kernel regressions relevant for users
+==============================================================
+
+
+The important basics
+--------------------
+
+
+What is a "regression" and what is the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's a regression if some application or practical use case running fine with
+one Linux kernel works worse or not at all with a newer version compiled using a
+similar configuration. The "no regressions rule" forbids this to take place; if
+it happens by accident, developers that caused it are expected to quickly fix
+the issue.
+
+It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
+5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
+It's also a regression if a perfectly working application suddenly shows erratic
+behavior with a newer kernel version; such issues can be caused by changes in
+procfs, sysfs, or one of the many other interfaces Linux provides to userland
+software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
+be built from a configuration similar to the one from 5.13. This can be achieved
+using ``make olddefconfig``, as explained in more detail below.
+
+Note the "practical use case" in the first sentence of this section: developers
+despite the "no regressions" rule are free to change any aspect of the kernel
+and even APIs or ABIs to userland, as long as no existing application or use
+case breaks.
+
+Also be aware the "no regressions" rule covers only interfaces the kernel
+provides to the userland. It thus does not apply to kernel-internal interfaces
+like the module API, which some externally developed drivers use to hook into
+the kernel.
+
+How do I report a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Just report the issue as outlined in
+Documentation/admin-guide/reporting-issues.rst, it already describes the
+important points. The following aspects outlined there are especially relevant
+for regressions:
+
+ * When checking for existing reports to join, also search the `archives of the
+   Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
+   `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+ * Start your report's subject with "[REGRESSION]".
+
+ * In your report, clearly mention the last kernel version that worked fine and
+   the first broken one. Ideally try to find the exact change causing the
+   regression using a bisection, as explained below in more detail.
+
+ * Remember to let the Linux regressions mailing list
+   (regressions@lists.linux.dev) know about your report:
+
+   * If you report the regression by mail, CC the regressions list.
+
+   * If you report your regression to some bug tracker, forward the submitted
+     report by mail to the regressions list while CCing the maintainer and the
+     mailing list for the subsystem in question.
+
+   If it's a regression within a stable or longterm series (e.g.
+   v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
+   <https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
+
+  In case you performed a successful bisection, add everyone to the CC the
+  culprit's commit message mentions in lines starting with "Signed-off-by:".
+
+When CCing for forwarding your report to the list, consider directly telling the
+aforementioned Linux kernel regression tracking bot about your report. To do
+that, include a paragraph like this in your mail::
+
+       #regzbot introduced: v5.13..v5.14-rc1
+
+Regzbot will then consider your mail a report for a regression introduced in the
+specified version range. In above case Linux v5.13 still worked fine and Linux
+v5.14-rc1 was the first version where you encountered the issue. If you
+performed a bisection to find the commit that caused the regression, specify the
+culprit's commit-id instead::
+
+       #regzbot introduced: 1f2e3d4c5d
+
+Placing such a "regzbot command" is in your interest, as it will ensure the
+report won't fall through the cracks unnoticed. If you omit this, the Linux
+kernel's regressions tracker will take care of telling regzbot about your
+regression, as long as you send a copy to the regressions mailing lists. But the
+regression tracker is just one human which sometimes has to rest or occasionally
+might even enjoy some time away from computers (as crazy as that might sound).
+Relying on this person thus will result in an unnecessary delay before the
+regressions becomes mentioned `on the list of tracked and unresolved Linux
+kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
+weekly regression reports sent by regzbot. Such delays can result in Linus
+Torvalds being unaware of important regressions when deciding between "continue
+development or call this finished and release the final?".
+
+Are really all regressions fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Nearly all of them are, as long as the change causing the regression (the
+"culprit commit") is reliably identified. Some regressions can be fixed without
+this, but often it's required.
+
+Who needs to find the root cause of a regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers of the affected code area should try to locate the culprit on their
+own. But for them that's often impossible to do with reasonable effort, as quite
+a lot of issues only occur in a particular environment outside the developer's
+reach -- for example, a specific hardware platform, firmware, Linux distro,
+system's configuration, or application. That's why in the end it's often up to
+the reporter to locate the culprit commit; sometimes users might even need to
+run additional tests afterwards to pinpoint the exact root cause. Developers
+should offer advice and reasonably help where they can, to make this process
+relatively easy and achievable for typical users.
+
+How can I find the culprit?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Perform a bisection, as roughly outlined in
+Documentation/admin-guide/reporting-issues.rst and described in more detail by
+Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
+in many cases finds the culprit relatively quickly. If it's hard or
+time-consuming to reliably reproduce the issue, consider teaming up with other
+affected users to narrow down the search range together.
+
+Who can I ask for advice when it comes to regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
+CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+Additional details about regressions
+------------------------------------
+
+
+What is the goal of the "no regressions rule"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Users should feel safe when updating kernel versions and not have to worry
+something might break. This is in the interest of the kernel developers to make
+updating attractive: they don't want users to stay on stable or longterm Linux
+series that are either abandoned or more than one and a half years old. That's
+in everybody's interest, as `those series might have known bugs, security
+issues, or other problematic aspects already fixed in later versions
+<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
+Additionally, the kernel developers want to make it simple and appealing for
+users to test the latest pre-release or regular release. That's also in
+everybody's interest, as it's a lot easier to track down and fix problems, if
+they are reported shortly after being introduced.
+
+Is the "no regressions" rule really adhered in practice?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's taken really seriously, as can be seen by many mailing list posts from
+Linux creator and lead developer Linus Torvalds, some of which are quoted in
+Documentation/process/handling-regressions.rst.
+
+Exceptions to this rule are extremely rare; in the past developers almost always
+turned out to be wrong when they assumed a particular situation was warranting
+an exception.
+
+Who ensures the "no regressions" is actually followed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The subsystem maintainers should take care of that, which are watched and
+supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
+Greg Kroah-Hartman et al. for various stable/longterm series.
+
+All of them are helped by people trying to ensure no regression report falls
+through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
+the Linux kernel's "regressions tracker"; to facilitate this work he relies on
+regzbot, the Linux kernel regression tracking bot. That's why you want to bring
+your report on the radar of these people by CCing or forwarding each report to
+the regressions mailing list, ideally with a "regzbot command" in your mail to
+get it tracked immediately.
+
+Is it a regression, if the issue can be avoided by updating some software?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Almost always: yes. If a developer tells you otherwise, ask the regression
+tracker for advice as outlined above.
+
+Is it a regression, if a newer kernel works slower or consumes more energy?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Yes, but the difference has to be significant. A five percent slow-down in a
+micro-benchmark thus is unlikely to qualify as regression, unless it also
+influences the results of a broad benchmark by more than one percent. If in
+doubt, ask for advice.
+
+Is it a regression, if an external kernel module breaks when updating Linux?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+No, as the "no regression" rule is about interfaces and services the Linux
+kernel provides to the userland. It thus does not cover building or running
+externally developed kernel modules, as they run in kernel-space and hook into
+the kernel using internal interfaces occasionally changed.
+
+How are regressions handled that are caused by security fixes?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In extremely rare situations security issues can't be fixed without causing
+regressions; those fixes are given way, as they are the lesser evil in the end.
+Luckily this middling almost always can be avoided, as key developers for the
+affected area and often Linus Torvalds himself try very hard to fix security
+issues without causing regressions.
+
+If you nevertheless face such a case, check the mailing list archives if people
+tried their best to avoid the regression. If not, report it; if in doubt, ask
+for advice as outlined above.
+
+What happens if fixing a regression is impossible without causing another?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Sadly these things happen, but luckily not very often; if they occur, expert
+developers of the affected code area should look into the issue to find a fix
+that avoids regressions or at least their impact. If you run into such a
+situation, do what was outlined already for regressions caused by security
+fixes: check earlier discussions if people already tried their best and ask for
+advice if in doubt.
+
+A quick note while at it: these situations could be avoided, if people would
+regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
+development cycle a test run. This is best explained by imagining a change
+integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
+the same time is a hard requirement for some other improvement applied for
+5.15-rc1. All these changes often can simply be reverted and the regression thus
+solved, if someone finds and reports it before 5.15 is released. A few days or
+weeks later this solution can become impossible, as some software might have
+started to rely on aspects introduced by one of the follow-up changes: reverting
+all changes would then cause a regression for users of said software and thus is
+out of the question.
+
+Is it a regression, if some feature I relied on was removed months ago?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is, but often it's hard to fix such regressions due to the aspects outlined
+in the previous section. It hence needs to be dealt with on a case-by-case
+basis. This is another reason why it's in everybody's interest to regularly test
+mainline pre-releases.
+
+Does the "no regression" rule apply if I seem to be the only affected person?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It does, but only for practical usage: the Linux developers want to be free to
+remove support for hardware only to be found in attics and museums anymore.
+
+Note, sometimes regressions can't be avoided to make progress -- and the latter
+is needed to prevent Linux from stagnation. Hence, if only very few users seem
+to be affected by a regression, it for the greater good might be in their and
+everyone else's interest to lettings things pass. Especially if there is an
+easy way to circumvent the regression somehow, for example by updating some
+software or using a kernel parameter created just for this purpose.
+
+Does the regression rule apply for code in the staging tree as well?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not according to the `help text for the configuration option covering all
+staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
+which since its early days states::
+
+       Please note that these drivers are under heavy development, may or
+       may not work, and may contain userspace interfaces that most likely
+       will be changed in the near future.
+
+The staging developers nevertheless often adhere to the "no regressions" rule,
+but sometimes bend it to make progress. That's for example why some users had to
+deal with (often negligible) regressions when a WiFi driver from the staging
+tree was replaced by a totally different one written from scratch.
+
+Why do later versions have to be "compiled with a similar configuration"?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because the Linux kernel developers sometimes integrate changes known to cause
+regressions, but make them optional and disable them in the kernel's default
+configuration. This trick allows progress, as the "no regressions" rule
+otherwise would lead to stagnation.
+
+Consider for example a new security feature blocking access to some kernel
+interfaces often abused by malware, which at the same time are required to run a
+few rarely used applications. The outlined approach makes both camps happy:
+people using these applications can leave the new security feature off, while
+everyone else can enable it without running into trouble.
+
+How to create a configuration similar to the one of an older kernel?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Start your machine with a known-good kernel and configure the newer Linux
+version with ``make olddefconfig``. This makes the kernel's build scripts pick
+up the configuration file (the ".config" file) from the running kernel as base
+for the new one you are about to compile; afterwards they set all new
+configuration options to their default value, which should disable new features
+that might cause regressions.
+
+Can I report a regression I found with pre-compiled vanilla kernels?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You need to ensure the newer kernel was compiled with a similar configuration
+file as the older one (see above), as those that built them might have enabled
+some known-to-be incompatible feature for the newer kernel. If in doubt, report
+the matter to the kernel's provider and ask for advice.
+
+
+More about regression tracking with "regzbot"
+---------------------------------------------
+
+What is regression tracking and why should I care about it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for Linux kernel development as well. That's why Thorsten Leemhuis, the
+Linux Kernel's regression tracker, and some people try to ensure all regression
+are fixed by keeping an eye on them until they are resolved. Neither of them are
+paid for this, that's why the work is done on a best effort basis.
+
+Why and how are Linux kernel regressions tracked using a bot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tracking regressions completely manually has proven to be quite hard due to the
+distributed and loosely structured nature of Linux kernel development process.
+That's why the Linux kernel's regression tracker developed regzbot to facilitate
+the work, with the long term goal to automate regression tracking as much as
+possible for everyone involved.
+
+Regzbot works by watching for replies to reports of tracked regressions.
+Additionally, it's looking out for posted or committed patches referencing such
+reports with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+involve regzbot to track severe issues, like reports about hangs, corrupted
+data, or internal errors (Panic, Oops, BUG(), warning, ...).
+
+How to change aspects of a tracked regression?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+report. The easiest way to do that: find the report in your "Sent" folder or the
+mailing list archive and reply to it using your mailer's "Reply-all" function.
+In that mail, use one of the following commands in a stand-alone paragraph (IOW:
+use blank lines to separate one or multiple of these commands from the rest of
+the mail's text).
+
+ * Update when the regression started to happen, for example after performing a
+   bisection::
+
+       #regzbot introduced: 1f2e3d4c5d
+
+ * Set or update the title::
+
+       #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+   the issue or a fix are discussed:::
+
+       #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+       #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Point to a place with further details of interest, like a mailing list post
+   or a ticket in a bug tracker that are slightly related, but about a different
+   topic::
+
+       #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as invalid::
+
+       #regzbot invalid: wasn't a regression, problem has always existed
+
+Regzbot supports a few other commands primarily used by developers or people
+tracking regressions. They and more details about the aforementioned regzbot
+commands can be found in the `getting started guide
+<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
+the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+for regzbot.
+
+..
+   end-of-content
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst
new file mode 100644
index 0000000000000..e1102a3207e3f
--- /dev/null
+++ b/Documentation/process/handling-regressions.rst
@@ -0,0 +1,659 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. See the bottom of this file for additional redistribution information.
+
+Handling regressions
+++++++++++++++++++++
+
+*We don't cause regressions* -- this document describes what this "first rule of
+Linux kernel development" means in practice for developers. It complements
+Documentation/admin-guide/reporting-regressions.rst, which covers the topic from a
+user's point of view; if you never read that text, go and at least skim over it
+before continuing here.
+
+The important bits (aka "The TL;DR")
+====================================
+
+#. Ensure subscribers of the `regression mailing list <https://lore.kernel.org/regressions/>`_
+   (regressions@lists.linux.dev) quickly become aware of any new regression
+   report:
+
+    * When receiving a mailed report that did not CC the list, bring it into the
+      loop by immediately sending at least a brief "Reply-all" with the list
+      CCed.
+
+    * Forward or bounce any reports submitted in bug trackers to the list.
+
+#. Make the Linux kernel regression tracking bot "regzbot" track the issue (this
+   is optional, but recommended):
+
+    * For mailed reports, check if the reporter included a line like ``#regzbot
+      introduced v5.13..v5.14-rc1``. If not, send a reply (with the regressions
+      list in CC) containing a paragraph like the following, which tells regzbot
+      when the issue started to happen::
+
+       #regzbot ^introduced 1f2e3d4c5b6a
+
+    * When forwarding reports from a bug tracker to the regressions list (see
+      above), include a paragraph like the following::
+
+       #regzbot introduced: v5.13..v5.14-rc1
+       #regzbot from: Some N. Ice Human <some.human@example.com>
+       #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+#. When submitting fixes for regressions, add "Link:" tags to the patch
+   description pointing to all places where the issue was reported, as
+   mandated by Documentation/process/submitting-patches.rst and
+   :ref:`Documentation/process/5.Posting.rst <development_posting>`.
+
+
+All the details on Linux kernel regressions relevant for developers
+===================================================================
+
+
+The important basics in more detail
+-----------------------------------
+
+
+What to do when receiving regression reports
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ensure the Linux kernel's regression tracker and others subscribers of the
+`regression mailing list <https://lore.kernel.org/regressions/>`_
+(regressions@lists.linux.dev) become aware of any newly reported regression:
+
+ * When you receive a report by mail that did not CC the list, immediately bring
+   it into the loop by sending at least a brief "Reply-all" with the list CCed;
+   try to ensure it gets CCed again in case you reply to a reply that omitted
+   the list.
+
+ * If a report submitted in a bug tracker hits your Inbox, forward or bounce it
+   to the list. Consider checking the list archives beforehand, if the reporter
+   already forwarded the report as instructed by
+   Documentation/admin-guide/reporting-issues.rst.
+
+When doing either, consider making the Linux kernel regression tracking bot
+"regzbot" immediately start tracking the issue:
+
+ * For mailed reports, check if the reporter included a "regzbot command" like
+   ``#regzbot introduced 1f2e3d4c5b6a``. If not, send a reply (with the
+   regressions list in CC) with a paragraph like the following:::
+
+       #regzbot ^introduced: v5.13..v5.14-rc1
+
+   This tells regzbot the version range in which the issue started to happen;
+   you can specify a range using commit-ids as well or state a single commit-id
+   in case the reporter bisected the culprit.
+
+   Note the caret (^) before the "introduced": it tells regzbot to treat the
+   parent mail (the one you reply to) as the initial report for the regression
+   you want to see tracked; that's important, as regzbot will later look out
+   for patches with "Link:" tags pointing to the report in the archives on
+   lore.kernel.org.
+
+ * When forwarding a regressions reported to a bug tracker, include a paragraph
+   with these regzbot commands::
+
+       #regzbot introduced: 1f2e3d4c5b6a
+       #regzbot from: Some N. Ice Human <some.human@example.com>
+       #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789
+
+   Regzbot will then automatically associate patches with the report that
+   contain "Link:" tags pointing to your mail or the mentioned ticket.
+
+What's important when fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You don't need to do anything special when submitting fixes for regression, just
+remember to do what Documentation/process/submitting-patches.rst,
+:ref:`Documentation/process/5.Posting.rst <development_posting>`, and
+Documentation/process/stable-kernel-rules.rst already explain in more detail:
+
+ * Point to all places where the issue was reported using "Link:" tags::
+
+       Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
+       Link: https://bugzilla.kernel.org/show_bug.cgi?id=1234567890
+
+ * Add a "Fixes:" tag to specify the commit causing the regression.
+
+ * If the culprit was merged in an earlier development cycle, explicitly mark
+   the fix for backporting using the ``Cc: stable@vger.kernel.org`` tag.
+
+All this is expected from you and important when it comes to regression, as
+these tags are of great value for everyone (you included) that might be looking
+into the issue weeks, months, or years later. These tags are also crucial for
+tools and scripts used by other kernel developers or Linux distributions; one of
+these tools is regzbot, which heavily relies on the "Link:" tags to associate
+reports for regression with changes resolving them.
+
+
+More aspects regarding regressions developers should be aware of
+----------------------------------------------------------------
+
+
+How to deal with changes where a risk of regression is known
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Evaluate how big the risk of regressions is, for example by performing a code
+search in Linux distributions and Git forges. Also consider asking other
+developers or projects likely to be affected to evaluate or even test the
+proposed change; if problems surface, maybe some solution acceptable for all
+can be found.
+
+If the risk of regressions in the end seems to be relatively small, go ahead
+with the change, but let all involved parties know about the risk. Hence, make
+sure your patch description makes this aspect obvious. Once the change is
+merged, tell the Linux kernel's regression tracker and the regressions mailing
+list about the risk, so everyone has the change on the radar in case reports
+trickle in. Depending on the risk, you also might want to ask the subsystem
+maintainer to mention the issue in his mainline pull request.
+
+What else is there to known about regressions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check out Documentation/admin-guide/reporting-regressions.rst, it covers a lot
+of other aspects you want might want to be aware of:
+
+ * the purpose of the "no regressions rule"
+
+ * what issues actually qualify as regression
+
+ * who's in charge for finding the root cause of a regression
+
+ * how to handle tricky situations, e.g. when a regression is caused by a
+   security fix or when fixing a regression might cause another one
+
+Whom to ask for advice when it comes to regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
+CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
+issue might better be dealt with in private, feel free to omit the list.
+
+
+More about regression tracking and regzbot
+------------------------------------------
+
+
+Why the Linux kernel has a regression tracker, and why is regzbot used?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Rules like "no regressions" need someone to ensure they are followed, otherwise
+they are broken either accidentally or on purpose. History has shown this to be
+true for the Linux kernel as well. That's why Thorsten Leemhuis volunteered to
+keep an eye on things as the Linux kernel's regression tracker, who's
+occasionally helped by other people. Neither of them are paid to do this,
+that's why regression tracking is done on a best effort basis.
+
+Earlier attempts to manually track regressions have shown it's an exhausting and
+frustrating work, which is why they were abandoned after a while. To prevent
+this from happening again, Thorsten developed regzbot to facilitate the work,
+with the long term goal to automate regression tracking as much as possible for
+everyone involved.
+
+How does regression tracking work with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot watches for replies to reports of tracked regressions. Additionally,
+it's looking out for posted or committed patches referencing such reports
+with "Link:" tags; replies to such patch postings are tracked as well.
+Combined this data provides good insights into the current state of the fixing
+process.
+
+Regzbot tries to do its job with as little overhead as possible for both
+reporters and developers. In fact, only reporters are burdened with an extra
+duty: they need to tell regzbot about the regression report using the ``#regzbot
+introduced`` command outlined above; if they don't do that, someone else can
+take care of that using ``#regzbot ^introduced``.
+
+For developers there normally is no extra work involved, they just need to make
+sure to do something that was expected long before regzbot came to light: add
+"Link:" tags to the patch description pointing to all reports about the issue
+fixed.
+
+Do I have to use regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's in the interest of everyone if you do, as kernel maintainers like Linus
+Torvalds partly rely on regzbot's tracking in their work -- for example when
+deciding to release a new version or extend the development phase. For this they
+need to be aware of all unfixed regression; to do that, Linus is known to look
+into the weekly reports sent by regzbot.
+
+Do I have to tell regzbot about every regression I stumble upon?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ideally yes: we are all humans and easily forget problems when something more
+important unexpectedly comes up -- for example a bigger problem in the Linux
+kernel or something in real life that's keeping us away from keyboards for a
+while. Hence, it's best to tell regzbot about every regression, except when you
+immediately write a fix and commit it to a tree regularly merged to the affected
+kernel series.
+
+How to see which regressions regzbot tracks currently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Check `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_
+for the latest info; alternatively, `search for the latest regression report
+<https://lore.kernel.org/lkml/?q=%22Linux+regressions+report%22+f%3Aregzbot>`_,
+which regzbot normally sends out once a week on Sunday evening (UTC), which is a
+few hours before Linus usually publishes new (pre-)releases.
+
+What places is regzbot monitoring?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Regzbot is watching the most important Linux mailing lists as well as the git
+repositories of linux-next, mainline, and stable/longterm.
+
+What kind of issues are supposed to be tracked by regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The bot is meant to track regressions, hence please don't involve regzbot for
+regular issues. But it's okay for the Linux kernel's regression tracker if you
+use regzbot to track severe issues, like reports about hangs, corrupted data,
+or internal errors (Panic, Oops, BUG(), warning, ...).
+
+Can I add regressions found by CI systems to regzbot's tracking?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Feel free to do so, if the particular regression likely has impact on practical
+use cases and thus might be noticed by users; hence, please don't involve
+regzbot for theoretical regressions unlikely to show themselves in real world
+usage.
+
+How to interact with regzbot?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By using a 'regzbot command' in a direct or indirect reply to the mail with the
+regression report. These commands need to be in their own paragraph (IOW: they
+need to be separated from the rest of the mail using blank lines).
+
+One such command is ``#regzbot introduced <version or commit>``, which makes
+regzbot consider your mail as a regressions report added to the tracking, as
+already described above; ``#regzbot ^introduced <version or commit>`` is another
+such command, which makes regzbot consider the parent mail as a report for a
+regression which it starts to track.
+
+Once one of those two commands has been utilized, other regzbot commands can be
+used in direct or indirect replies to the report. You can write them below one
+of the `introduced` commands or in replies to the mail that used one of them
+or itself is a reply to that mail:
+
+ * Set or update the title::
+
+       #regzbot title: foo
+
+ * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
+   the issue or a fix are discussed -- for example the posting of a patch fixing
+   the regression::
+
+       #regzbot monitor: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/
+
+   Monitoring only works for lore.kernel.org and bugzilla.kernel.org; regzbot
+   will consider all messages in that thread or ticket as related to the fixing
+   process.
+
+ * Point to a place with further details of interest, like a mailing list post
+   or a ticket in a bug tracker that are slightly related, but about a different
+   topic::
+
+       #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
+
+ * Mark a regression as fixed by a commit that is heading upstream or already
+   landed::
+
+       #regzbot fixed-by: 1f2e3d4c5d
+
+ * Mark a regression as a duplicate of another one already tracked by regzbot::
+
+       #regzbot dup-of: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/
+
+ * Mark a regression as invalid::
+
+       #regzbot invalid: wasn't a regression, problem has always existed
+
+Is there more to tell about regzbot and its commands?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+More detailed and up-to-date information about the Linux
+kernel's regression tracking bot can be found on its
+`project page <https://gitlab.com/knurd42/regzbot>`_, which among others
+contains a `getting started guide <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_
+and `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
+which both cover more details than the above section.
+
+Quotes from Linus about regression
+----------------------------------
+
+Find below a few real life examples of how Linus Torvalds expects regressions to
+be handled:
+
+ * From `2017-10-26 (1/2)
+   <https://lore.kernel.org/lkml/CA+55aFwiiQYJ+YoLKCXjN_beDVfu38mg=Ggg5LFOcqHE8Qi7Zw@mail.gmail.com/>`_::
+
+       If you break existing user space setups THAT IS A REGRESSION.
+
+       It's not ok to say "but we'll fix the user space setup".
+
+       Really. NOT OK.
+
+       [...]
+
+       The first rule is:
+
+        - we don't cause regressions
+
+       and the corollary is that when regressions *do* occur, we admit to
+       them and fix them, instead of blaming user space.
+
+       The fact that you have apparently been denying the regression now for
+       three weeks means that I will revert, and I will stop pulling apparmor
+       requests until the people involved understand how kernel development
+       is done.
+
+ * From `2017-10-26 (2/2)
+   <https://lore.kernel.org/lkml/CA+55aFxW7NMAMvYhkvz1UPbUTUJewRt6Yb51QAx5RtrWOwjebg@mail.gmail.com/>`_::
+
+       People should basically always feel like they can update their kernel
+       and simply not have to worry about it.
+
+       I refuse to introduce "you can only update the kernel if you also
+       update that other program" kind of limitations. If the kernel used to
+       work for you, the rule is that it continues to work for you.
+
+       There have been exceptions, but they are few and far between, and they
+       generally have some major and fundamental reasons for having happened,
+       that were basically entirely unavoidable, and people _tried_hard_ to
+       avoid them. Maybe we can't practically support the hardware any more
+       after it is decades old and nobody uses it with modern kernels any
+       more. Maybe there's a serious security issue with how we did things,
+       and people actually depended on that fundamentally broken model. Maybe
+       there was some fundamental other breakage that just _had_ to have a
+       flag day for very core and fundamental reasons.
+
+       And notice that this is very much about *breaking* peoples environments.
+
+       Behavioral changes happen, and maybe we don't even support some
+       feature any more. There's a number of fields in /proc/<pid>/stat that
+       are printed out as zeroes, simply because they don't even *exist* in
+       the kernel any more, or because showing them was a mistake (typically
+       an information leak). But the numbers got replaced by zeroes, so that
+       the code that used to parse the fields still works. The user might not
+       see everything they used to see, and so behavior is clearly different,
+       but things still _work_, even if they might no longer show sensitive
+       (or no longer relevant) information.
+
+       But if something actually breaks, then the change must get fixed or
+       reverted. And it gets fixed in the *kernel*. Not by saying "well, fix
+       your user space then". It was a kernel change that exposed the
+       problem, it needs to be the kernel that corrects for it, because we
+       have a "upgrade in place" model. We don't have a "upgrade with new
+       user space".
+
+       And I seriously will refuse to take code from people who do not
+       understand and honor this very simple rule.
+
+       This rule is also not going to change.
+
+       And yes, I realize that the kernel is "special" in this respect. I'm
+       proud of it.
+
+       I have seen, and can point to, lots of projects that go "We need to
+       break that use case in order to make progress" or "you relied on
+       undocumented behavior, it sucks to be you" or "there's a better way to
+       do what you want to do, and you have to change to that new better
+       way", and I simply don't think that's acceptable outside of very early
+       alpha releases that have experimental users that know what they signed
+       up for. The kernel hasn't been in that situation for the last two
+       decades.
+
+       We do API breakage _inside_ the kernel all the time. We will fix
+       internal problems by saying "you now need to do XYZ", but then it's
+       about internal kernel API's, and the people who do that then also
+       obviously have to fix up all the in-kernel users of that API. Nobody
+       can say "I now broke the API you used, and now _you_ need to fix it
+       up". Whoever broke something gets to fix it too.
+
+       And we simply do not break user space.
+
+ * From `2020-05-21
+   <https://lore.kernel.org/all/CAHk-=wiVi7mSrsMP=fLXQrXK_UimybW=ziLOwSzFTtoXUacWVQ@mail.gmail.com/>`_::
+
+       The rules about regressions have never been about any kind of
+       documented behavior, or where the code lives.
+
+       The rules about regressions are always about "breaks user workflow".
+
+       Users are literally the _only_ thing that matters.
+
+       No amount of "you shouldn't have used this" or "that behavior was
+       undefined, it's your own fault your app broke" or "that used to work
+       simply because of a kernel bug" is at all relevant.
+
+       Now, reality is never entirely black-and-white. So we've had things
+       like "serious security issue" etc that just forces us to make changes
+       that may break user space. But even then the rule is that we don't
+       really have other options that would allow things to continue.
+
+       And obviously, if users take years to even notice that something
+       broke, or if we have sane ways to work around the breakage that
+       doesn't make for too much trouble for users (ie "ok, there are a
+       handful of users, and they can use a kernel command line to work
+       around it" kind of things) we've also been a bit less strict.
+
+       But no, "that was documented to be broken" (whether it's because the
+       code was in staging or because the man-page said something else) is
+       irrelevant. If staging code is so useful that people end up using it,
+       that means that it's basically regular kernel code with a flag saying
+       "please clean this up".
+
+       The other side of the coin is that people who talk about "API
+       stability" are entirely wrong. API's don't matter either. You can make
+       any changes to an API you like - as long as nobody notices.
+
+       Again, the regression rule is not about documentation, not about
+       API's, and not about the phase of the moon.
+
+       It's entirely about "we caused problems for user space that used to work".
+
+ * From `2017-11-05
+   <https://lore.kernel.org/all/CA+55aFzUvbGjD8nQ-+3oiMBx14c_6zOj2n7KLN3UsJ-qsd4Dcw@mail.gmail.com/>`_::
+
+       And our regression rule has never been "behavior doesn't change".
+       That would mean that we could never make any changes at all.
+
+       For example, we do things like add new error handling etc all the
+       time, which we then sometimes even add tests for in our kselftest
+       directory.
+
+       So clearly behavior changes all the time and we don't consider that a
+       regression per se.
+
+       The rule for a regression for the kernel is that some real user
+       workflow breaks. Not some test. Not a "look, I used to be able to do
+       X, now I can't".
+
+ * From `2018-08-03
+   <https://lore.kernel.org/all/CA+55aFwWZX=CXmWDTkDGb36kf12XmTehmQjbiMPCqCRG2hi9kw@mail.gmail.com/>`_::
+
+       YOU ARE MISSING THE #1 KERNEL RULE.
+
+       We do not regress, and we do not regress exactly because your are 100% wrong.
+
+       And the reason you state for your opinion is in fact exactly *WHY* you
+       are wrong.
+
+       Your "good reasons" are pure and utter garbage.
+
+       The whole point of "we do not regress" is so that people can upgrade
+       the kernel and never have to worry about it.
+
+       > Kernel had a bug which has been fixed
+
+       That is *ENTIRELY* immaterial.
+
+       Guys, whether something was buggy or not DOES NOT MATTER.
+
+       Why?
+
+       Bugs happen. That's a fact of life. Arguing that "we had to break
+       something because we were fixing a bug" is completely insane. We fix
+       tens of bugs every single day, thinking that "fixing a bug" means that
+       we can break something is simply NOT TRUE.
+
+       So bugs simply aren't even relevant to the discussion. They happen,
+       they get found, they get fixed, and it has nothing to do with "we
+       break users".
+
+       Because the only thing that matters IS THE USER.
+
+       How hard is that to understand?
+
+       Anybody who uses "but it was buggy" as an argument is entirely missing
+       the point. As far as the USER was concerned, it wasn't buggy - it
+       worked for him/her.
+
+       Maybe it worked *because* the user had taken the bug into account,
+       maybe it worked because the user didn't notice - again, it doesn't
+       matter. It worked for the user.
+
+       Breaking a user workflow for a "bug" is absolutely the WORST reason
+       for breakage you can imagine.
+
+       It's basically saying "I took something that worked, and I broke it,
+       but now it's better". Do you not see how f*cking insane that statement
+       is?
+
+       And without users, your program is not a program, it's a pointless
+       piece of code that you might as well throw away.
+
+       Seriously. This is *why* the #1 rule for kernel development is "we
+       don't break users". Because "I fixed a bug" is absolutely NOT AN
+       ARGUMENT if that bug fix broke a user setup. You actually introduced a
+       MUCH BIGGER bug by "fixing" something that the user clearly didn't
+       even care about.
+
+       And dammit, we upgrade the kernel ALL THE TIME without upgrading any
+       other programs at all. It is absolutely required, because flag-days
+       and dependencies are horribly bad.
+
+       And it is also required simply because I as a kernel developer do not
+       upgrade random other tools that I don't even care about as I develop
+       the kernel, and I want any of my users to feel safe doing the same
+       time.
+
+       So no. Your rule is COMPLETELY wrong. If you cannot upgrade a kernel
+       without upgrading some other random binary, then we have a problem.
+
+ * From `2021-06-05
+   <https://lore.kernel.org/all/CAHk-=wiUVqHN76YUwhkjZzwTdjMMJf_zN4+u7vEJjmEGh3recw@mail.gmail.com/>`_::
+
+       THERE ARE NO VALID ARGUMENTS FOR REGRESSIONS.
+
+       Honestly, security people need to understand that "not working" is not
+       a success case of security. It's a failure case.
+
+       Yes, "not working" may be secure. But security in that case is *pointless*.
+
+ * From `2011-05-06 (1/3)
+   <https://lore.kernel.org/all/BANLkTim9YvResB+PwRp7QTK-a5VNg2PvmQ@mail.gmail.com/>`_::
+
+       Binary compatibility is more important.
+
+       And if binaries don't use the interface to parse the format (or just
+       parse it wrongly - see the fairly recent example of adding uuid's to
+       /proc/self/mountinfo), then it's a regression.
+
+       And regressions get reverted, unless there are security issues or
+       similar that makes us go "Oh Gods, we really have to break things".
+
+       I don't understand why this simple logic is so hard for some kernel
+       developers to understand. Reality matters. Your personal wishes matter
+       NOT AT ALL.
+
+       If you made an interface that can be used without parsing the
+       interface description, then we're stuck with the interface. Theory
+       simply doesn't matter.
+
+       You could help fix the tools, and try to avoid the compatibility
+       issues that way. There aren't that many of them.
+
+   From `2011-05-06 (2/3)
+   <https://lore.kernel.org/all/BANLkTi=KVXjKR82sqsz4gwjr+E0vtqCmvA@mail.gmail.com/>`_::
+
+       it's clearly NOT an internal tracepoint. By definition. It's being
+       used by powertop.
+
+   From `2011-05-06 (3/3)
+   <https://lore.kernel.org/all/BANLkTinazaXRdGovYL7rRVp+j6HbJ7pzhg@mail.gmail.com/>`_::
+
+       We have programs that use that ABI and thus it's a regression if they break.
+
+ * From `2012-07-06 <https://lore.kernel.org/all/CA+55aFwnLJ+0sjx92EGREGTWOx84wwKaraSzpTNJwPVV8edw8g@mail.gmail.com/>`_::
+
+       > Now this got me wondering if Debian _unstable_ actually qualifies as a
+       > standard distro userspace.
+
+       Oh, if the kernel breaks some standard user space, that counts. Tons
+       of people run Debian unstable
+
+ * From `2019-09-15
+   <https://lore.kernel.org/lkml/CAHk-=wiP4K8DRJWsCo=20hn_6054xBamGKF2kPgUzpB5aMaofA@mail.gmail.com/>`_::
+
+       One _particularly_ last-minute revert is the top-most commit (ignoring
+       the version change itself) done just before the release, and while
+       it's very annoying, it's perhaps also instructive.
+
+       What's instructive about it is that I reverted a commit that wasn't
+       actually buggy. In fact, it was doing exactly what it set out to do,
+       and did it very well. In fact it did it _so_ well that the much
+       improved IO patterns it caused then ended up revealing a user-visible
+       regression due to a real bug in a completely unrelated area.
+
+       The actual details of that regression are not the reason I point that
+       revert out as instructive, though. It's more that it's an instructive
+       example of what counts as a regression, and what the whole "no
+       regressions" kernel rule means. The reverted commit didn't change any
+       API's, and it didn't introduce any new bugs. But it ended up exposing
+       another problem, and as such caused a kernel upgrade to fail for a
+       user. So it got reverted.
+
+       The point here being that we revert based on user-reported _behavior_,
+       not based on some "it changes the ABI" or "it caused a bug" concept.
+       The problem was really pre-existing, and it just didn't happen to
+       trigger before. The better IO patterns introduced by the change just
+       happened to expose an old bug, and people had grown to depend on the
+       previously benign behavior of that old issue.
+
+       And never fear, we'll re-introduce the fix that improved on the IO
+       patterns once we've decided just how to handle the fact that we had a
+       bad interaction with an interface that people had then just happened
+       to rely on incidental behavior for before. It's just that we'll have
+       to hash through how to do that (there are no less than three different
+       patches by three different developers being discussed, and there might
+       be more coming...). In the meantime, I reverted the thing that exposed
+       the problem to users for this release, even if I hope it will be
+       re-introduced (perhaps even backported as a stable patch) once we have
+       consensus about the issue it exposed.
+
+       Take-away from the whole thing: it's not about whether you change the
+       kernel-userspace ABI, or fix a bug, or about whether the old code
+       "should never have worked in the first place". It's about whether
+       something breaks existing users' workflow.
+
+       Anyway, that was my little aside on the whole regression thing.  Since
+       it's that "first rule of kernel programming", I felt it is perhaps
+       worth just bringing it up every once in a while
+
+..
+   end-of-content
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/handling-regressions.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 9f1b88492bb33..428e39074f616 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -25,6 +25,7 @@ Below are the essential guides that every developer should read.
    code-of-conduct-interpretation
    development-process
    submitting-patches
+   handling-regressions
    programming-language
    coding-style
    maintainer-handbooks
diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..6c62f7e0dc9d9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10438,6 +10438,8 @@ KERNEL REGRESSIONS
 M:	Thorsten Leemhuis <linux@leemhuis.info>
 L:	regressions@lists.linux.dev
 S:	Supported
+F:	Documentation/admin-guide/reporting-regressions.rst
+F:	Documentation/process/handling-regressions.rst
 
 KERNEL SELFTEST FRAMEWORK
 M:	Shuah Khan <shuah@kernel.org>
-- 
GitLab


From d2b40ba2cce207ecea8a740f71e113f03cc75fd5 Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Wed, 16 Feb 2022 07:51:34 +0100
Subject: [PATCH 0642/1586] docs: *-regressions.rst: explain how quickly issues
 should be handled

Add a section with a few rules of thumb about how
quickly developers should address regressions to
Documentation/process/handling-regressions.rst; additionally,
add a short paragraph about this to the companion document
Documentation/admin-guide/reporting-regressions.rst as well.

The rules of thumb were written after studying the quotes from Linus
found in handling-regressions.rst and especially influenced by
statements like "Users are literally the _only_ thing that matters" and
"without users, your program is not a program, it's a pointless piece of
code that you might as well throw away". The author interpreted those in
perspective to how the various Linux kernel series are maintained
currently and what those practices might mean for users running into a
regression on a small or big kernel update.

That for example lead to the paragraph starting with "Aim to get fixes
for regressions mainlined within one week after identifying the culprit,
if the regression was introduced in a stable/longterm release or the
devel cycle for the latest mainline release". Some might see this as
pretty high bar, but on the other hand something like that is needed to
not leave users out in the cold for too long -- which can quickly happen
when updating to the latest stable series, as the previous one is
normally stamped "End of Life" about three or four weeks after a new
mainline release. This makes a lot of users switch during this
timeframe. Any of them thus risk running into regressions not promptly
fixed; even worse, once the previous stable series is EOLed for real,
users that face a regression might be left with only three options:

 (1) continue running an outdated and thus potentially insecure kernel
     version from an abandoned stable series

 (2) run the kernel with the regression

 (3) downgrade to an earlier longterm series still supported

This is better avoided, as (1) puts users and their data in danger, (2)
will only be possible if it's a minor regression that doesn't interfere
with booting or serious usage, and (3) might be regression itself or
impossible on the particular machine, as the users might require drivers
or features only introduced after the latest longterm series branched
of.

In the end this lead to the aforementioned "Aim to fix regression within
one week" part. It's also the reason for the "Try to resolve any
regressions introduced in the current development cycle before its
end.".

Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
CC: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/a7b717b52c0d54cdec9b6daf56ed6669feddee2c.1644994117.git.linux@leemhuis.info
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../admin-guide/reporting-regressions.rst     | 12 +++
 .../process/handling-regressions.rst          | 87 +++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
index 6fbd24ceb3bfe..d8adccdae23f5 100644
--- a/Documentation/admin-guide/reporting-regressions.rst
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -214,6 +214,18 @@ your report on the radar of these people by CCing or forwarding each report to
 the regressions mailing list, ideally with a "regzbot command" in your mail to
 get it tracked immediately.
 
+How quickly are regressions normally fixed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Developers should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+The answer thus depends on various factors like the impact of a regression, its
+age, or the Linux series in which it occurs. In the end though, most regressions
+should be fixed within two weeks.
+
 Is it a regression, if the issue can be avoided by updating some software?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst
index e1102a3207e3f..abb741b1aeee7 100644
--- a/Documentation/process/handling-regressions.rst
+++ b/Documentation/process/handling-regressions.rst
@@ -45,6 +45,10 @@ The important bits (aka "The TL;DR")
    mandated by Documentation/process/submitting-patches.rst and
    :ref:`Documentation/process/5.Posting.rst <development_posting>`.
 
+#. Try to fix regressions quickly once the culprit has been identified; fixes
+   for most regressions should be merged within two weeks, but some need to be
+   resolved within two or three days.
+
 
 All the details on Linux kernel regressions relevant for developers
 ===================================================================
@@ -125,6 +129,89 @@ tools and scripts used by other kernel developers or Linux distributions; one of
 these tools is regzbot, which heavily relies on the "Link:" tags to associate
 reports for regression with changes resolving them.
 
+Prioritize work on fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You should fix any reported regression as quickly as possible, to provide
+affected users with a solution in a timely manner and prevent more users from
+running into the issue; nevertheless developers need to take enough time and
+care to ensure regression fixes do not cause additional damage.
+
+In the end though, developers should give their best to prevent users from
+running into situations where a regression leaves them only three options: "run
+a kernel with a regression that seriously impacts usage", "continue running an
+outdated and thus potentially insecure kernel version for more than two weeks
+after a regression's culprit was identified", and "downgrade to a still
+supported kernel series that lack required features".
+
+How to realize this depends a lot on the situation. Here are a few rules of
+thumb for you, in order or importance:
+
+ * Prioritize work on handling regression reports and fixing regression over all
+   other Linux kernel work, unless the latter concerns acute security issues or
+   bugs causing data loss or damage.
+
+ * Always consider reverting the culprit commits and reapplying them later
+   together with necessary fixes, as this might be the least dangerous and
+   quickest way to fix a regression.
+
+ * Developers should handle regressions in all supported kernel series, but are
+   free to delegate the work to the stable team, if the issue probably at no
+   point in time occurred with mainline.
+
+ * Try to resolve any regressions introduced in the current development before
+   its end. If you fear a fix might be too risky to apply only days before a new
+   mainline release, let Linus decide: submit the fix separately to him as soon
+   as possible with the explanation of the situation. He then can make a call
+   and postpone the release if necessary, for example if multiple such changes
+   show up in his inbox.
+
+ * Address regressions in stable, longterm, or proper mainline releases with
+   more urgency than regressions in mainline pre-releases. That changes after
+   the release of the fifth pre-release, aka "-rc5": mainline then becomes as
+   important, to ensure all the improvements and fixes are ideally tested
+   together for at least one week before Linus releases a new mainline version.
+
+ * Fix regressions within two or three days, if they are critical for some
+   reason -- for example, if the issue is likely to affect many users of the
+   kernel series in question on all or certain architectures. Note, this
+   includes mainline, as issues like compile errors otherwise might prevent many
+   testers or continuous integration systems from testing the series.
+
+ * Aim to fix regressions within one week after the culprit was identified, if
+   the issue was introduced in either:
+
+    * a recent stable/longterm release
+
+    * the development cycle of the latest proper mainline release
+
+   In the latter case (say Linux v5.14), try to address regressions even
+   quicker, if the stable series for the predecessor (v5.13) will be abandoned
+   soon or already was stamped "End-of-Life" (EOL) -- this usually happens about
+   three to four weeks after a new mainline release.
+
+ * Try to fix all other regressions within two weeks after the culprit was
+   found. Two or three additional weeks are acceptable for performance
+   regressions and other issues which are annoying, but don't prevent anyone
+   from running Linux (unless it's an issue in the current development cycle,
+   as those should ideally be addressed before the release). A few weeks in
+   total are acceptable if a regression can only be fixed with a risky change
+   and at the same time is affecting only a few users; as much time is
+   also okay if the regression is already present in the second newest longterm
+   kernel series.
+
+Note: The aforementioned time frames for resolving regressions are meant to
+include getting the fix tested, reviewed, and merged into mainline, ideally with
+the fix being in linux-next at least briefly. This leads to delays you need to
+account for.
+
+Subsystem maintainers are expected to assist in reaching those periods by doing
+timely reviews and quick handling of accepted patches. They thus might have to
+send git-pull requests earlier or more often than usual; depending on the fix,
+it might even be acceptable to skip testing in linux-next. Especially fixes for
+regressions in stable and longterm kernels need to be handled quickly, as fixes
+need to be merged in mainline before they can be backported to older series.
+
 
 More aspects regarding regressions developers should be aware of
 ----------------------------------------------------------------
-- 
GitLab


From 247097e2bbff4201b85eee8de4f31b4065877f67 Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Wed, 16 Feb 2022 07:51:35 +0100
Subject: [PATCH 0643/1586] docs: reporting-issues.rst: link new document about
 regressions

Make Documentation/admin-guide/reporting-issues.rst point to the newly
created document about regressions
(Documentation/admin-guide/regressions-regressions.rst). This allows to
shorten a few explanations the new document describes better and in more
detail.

While at it move the copyright hint to the end of the file and remove
quotes around links to other places in the documentation. Both issues
came up during the review of the new documents about regressions.

Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
Link: https://lore.kernel.org/r/f20114eaddc356a8c79dd62812a6c7f4ca5d87b9.1644994117.git.linux@leemhuis.info
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../admin-guide/reporting-issues.rst          | 73 +++++++++----------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index d7ac13f789cce..ec62151fe6720 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -1,14 +1,5 @@
 .. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
-..
-   If you want to distribute this text under CC-BY-4.0 only, please use 'The
-   Linux kernel developers' for author attribution and link this as source:
-   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
-..
-   Note: Only the content of this RST file as found in the Linux kernel sources
-   is available under CC-BY-4.0, as versions of this text that were processed
-   (for example by the kernel's build system) might contain content taken from
-   files which use a more restrictive license.
-
+.. See the bottom of this file for additional redistribution information.
 
 Reporting issues
 ++++++++++++++++
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
 handled slightly differently in the reporting process. Three type of cases
 qualify: regressions, security issues, and really severe problems.
 
-You deal with a 'regression' if something that worked with an older version of
-the Linux kernel does not work with a newer one or somehow works worse with it.
-It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
-somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
-an application shows erratic behavior with a newer kernel, which might happen
-due to incompatible changes in the interface between the kernel and the
-userland (like procfs and sysfs). Significantly reduced performance or
-increased power consumption also qualify as regression. But keep in mind: the
-new kernel needs to be built with a configuration that is similar to the one
-from the old kernel (see below how to achieve that). That's because the kernel
-developers sometimes can not avoid incompatibilities when implementing new
-features; but to avoid regressions such features have to be enabled explicitly
-during build time configuration.
+You deal with a regression if some application or practical use case running
+fine with one Linux kernel works worse or not at all with a newer version
+compiled using a similar configuration. The document
+Documentation/admin-guide/reporting-regressions.rst explains this in more
+detail. It also provides a good deal of other information about regressions you
+might want to be aware of; it for example explains how to add your issue to the
+list of tracked regressions, to ensure it won't fall through the cracks.
 
 What qualifies as security issue is left to your judgment. Consider reading
-'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
+Documentation/admin-guide/security-bugs.rst before proceeding, as it
 provides additional details how to best handle security issues.
 
 An issue is a 'really severe problem' when something totally unacceptably bad
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
 not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
 followed by a few spaces and some letters.
 
-If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
+If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
 to find out why. Try to eliminate the reason. Often it's caused by one these
 three things:
 
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
 reproduce it themselves.
 
 To find the change there is a process called 'bisection' which the document
-'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
+Documentation/admin-guide/bug-bisect.rst describes in detail. That process
 will often require you to build about ten to twenty kernel images, trying to
 reproduce the issue with each of them before building the next. Yes, that takes
 some time, but don't worry, it works a lot quicker than most people assume.
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
 the kernel and not by something else, as outlined above already.
 
 In the whole process keep in mind: an issue only qualifies as regression if the
-older and the newer kernel got built with a similar configuration. The best way
-to archive this: copy the configuration file (``.config``) from the old working
-kernel freshly to each newer kernel version you try. Afterwards run ``make
-olddefconfig`` to adjust it for the needs of the new version.
+older and the newer kernel got built with a similar configuration. This can be
+achieved by using ``make olddefconfig``, as explained in more detail by
+Documentation/admin-guide/reporting-regressions.rst; that document also
+provides a good deal of other information about regressions you might want to be
+aware of.
 
 
 Write and send the report
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
 the report's text to these addresses; but on top of it put a small note where
 you mention that you filed it with a link to the ticket.
 
-See 'Documentation/admin-guide/security-bugs.rst' for more information.
+See Documentation/admin-guide/security-bugs.rst for more information.
 
 
 Duties after the report went out
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
 pinpoint the exact change that causes the issue (which then can easily get
 reverted to fix the issue quickly). Hence consider to do a proper bisection
 right away if time permits. See the section 'Special care for regressions' and
-the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
+the document Documentation/admin-guide/bug-bisect.rst for details how to
 perform one. In case of a successful bisection add the author of the culprit to
 the recipients; also CC everyone in the signed-off-by chain, which you find at
 the end of its commit message.
@@ -1594,7 +1580,7 @@ Some fixes are too complex
 Even small and seemingly obvious code-changes sometimes introduce new and
 totally unexpected problems. The maintainers of the stable and longterm kernels
 are very aware of that and thus only apply changes to these kernels that are
-within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
+within rules outlined in Documentation/process/stable-kernel-rules.rst.
 
 Complex or risky changes for example do not qualify and thus only get applied
 to mainline. Other fixes are easy to get backported to the newest stable and
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
 
 
 ..
-   This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you
-   spot a typo or small mistake, feel free to let him know directly and he'll
-   fix it. You are free to do the same in a mostly informal way if you want
-   to contribute changes to the text, but for copyright reasons please CC
+   end-of-content
+..
+   This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+   you spot a typo or small mistake, feel free to let him know directly and
+   he'll fix it. You are free to do the same in a mostly informal way if you
+   want to contribute changes to the text, but for copyright reasons please CC
    linux-doc@vger.kernel.org and "sign-off" your contribution as
    Documentation/process/submitting-patches.rst outlines in the section "Sign
    your work - the Developer's Certificate of Origin".
+..
+   This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+   of the file. If you want to distribute this text under CC-BY-4.0 only,
+   please use "The Linux kernel developers" for author attribution and link
+   this as source:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
+..
+   Note: Only the content of this RST file as found in the Linux kernel sources
+   is available under CC-BY-4.0, as versions of this text that were processed
+   (for example by the kernel's build system) might contain content taken from
+   files which use a more restrictive license.
-- 
GitLab


From ac982578e7d340dc4f4fd243f4a4b24787d28c3f Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Tue, 22 Feb 2022 23:26:07 +0530
Subject: [PATCH 0644/1586] spi: tegra210-quad: use device_reset method

Use device_reset api to replace duplicate code in driver to call
reset_control_get api with reset handle.

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Link: https://lore.kernel.org/r/20220222175611.58051-2-kyarlagadda@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index cb00ac2fc7d8e..a353f2a9abd44 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -137,7 +137,6 @@ struct tegra_qspi {
 	spinlock_t				lock;
 
 	struct clk				*clk;
-	struct reset_control			*rst;
 	void __iomem				*base;
 	phys_addr_t				phys;
 	unsigned int				irq;
@@ -948,9 +947,8 @@ static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
 	dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
 	tegra_qspi_dump_regs(tqspi);
 	tegra_qspi_flush_fifos(tqspi, true);
-	reset_control_assert(tqspi->rst);
-	udelay(2);
-	reset_control_deassert(tqspi->rst);
+	if (device_reset(tqspi->dev) < 0)
+		dev_warn_once(tqspi->dev, "device reset failed\n");
 }
 
 static void tegra_qspi_transfer_end(struct spi_device *spi)
@@ -1251,13 +1249,6 @@ static int tegra_qspi_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	tqspi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
-	if (IS_ERR(tqspi->rst)) {
-		ret = PTR_ERR(tqspi->rst);
-		dev_err(&pdev->dev, "failed to get reset control: %d\n", ret);
-		return ret;
-	}
-
 	tqspi->max_buf_size = QSPI_FIFO_DEPTH << 2;
 	tqspi->dma_buf_size = DEFAULT_QSPI_DMA_BUF_LEN;
 
@@ -1279,9 +1270,8 @@ static int tegra_qspi_probe(struct platform_device *pdev)
 		goto exit_pm_disable;
 	}
 
-	reset_control_assert(tqspi->rst);
-	udelay(2);
-	reset_control_deassert(tqspi->rst);
+	if (device_reset(tqspi->dev) < 0)
+		dev_warn_once(tqspi->dev, "device reset failed\n");
 
 	tqspi->def_command1_reg = QSPI_M_S | QSPI_CS_SW_HW |  QSPI_CS_SW_VAL;
 	tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
-- 
GitLab


From de2f678b11bdcbabb6d804c543f9a3325c0e83bf Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Tue, 22 Feb 2022 23:26:08 +0530
Subject: [PATCH 0645/1586] spi: Add Tegra234 QUAD SPI compatible

Add compatible string for Tegra234 for Tegra QUAD SPI

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Link: https://lore.kernel.org/r/20220222175611.58051-3-kyarlagadda@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
index 35a8045b2c70d..6efea8970e623 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
@@ -19,6 +19,7 @@ properties:
       - nvidia,tegra210-qspi
       - nvidia,tegra186-qspi
       - nvidia,tegra194-qspi
+      - nvidia,tegra234-qspi
 
   reg:
     maxItems: 1
-- 
GitLab


From ea23f0e148b82e5bcbc6c814926f53133552f0f3 Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Tue, 22 Feb 2022 23:26:09 +0530
Subject: [PATCH 0646/1586] spi: tegra210-quad: add new chips to compatible

Add support for Tegra234 and soc data to select capabilities.

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Link: https://lore.kernel.org/r/20220222175611.58051-4-kyarlagadda@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index a353f2a9abd44..3725ee5331ae0 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -125,6 +125,10 @@
 #define QSPI_DMA_TIMEOUT			(msecs_to_jiffies(1000))
 #define DEFAULT_QSPI_DMA_BUF_LEN		(64 * 1024)
 
+struct tegra_qspi_soc_data {
+	bool has_dma;
+};
+
 struct tegra_qspi_client_data {
 	int tx_clk_tap_delay;
 	int rx_clk_tap_delay;
@@ -184,6 +188,7 @@ struct tegra_qspi {
 	u32					*tx_dma_buf;
 	dma_addr_t				tx_dma_phys;
 	struct dma_async_tx_descriptor		*tx_dma_desc;
+	const struct tegra_qspi_soc_data	*soc_data;
 };
 
 static inline u32 tegra_qspi_readl(struct tegra_qspi *tqspi, unsigned long offset)
@@ -1191,10 +1196,32 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
 	return handle_dma_based_xfer(tqspi);
 }
 
+static struct tegra_qspi_soc_data tegra210_qspi_soc_data = {
+	.has_dma = true,
+};
+
+static struct tegra_qspi_soc_data tegra186_qspi_soc_data = {
+	.has_dma = true,
+};
+
+static struct tegra_qspi_soc_data tegra234_qspi_soc_data = {
+	.has_dma = false,
+};
+
 static const struct of_device_id tegra_qspi_of_match[] = {
-	{ .compatible = "nvidia,tegra210-qspi", },
-	{ .compatible = "nvidia,tegra186-qspi", },
-	{ .compatible = "nvidia,tegra194-qspi", },
+	{
+		.compatible = "nvidia,tegra210-qspi",
+		.data	    = &tegra210_qspi_soc_data,
+	}, {
+		.compatible = "nvidia,tegra186-qspi",
+		.data	    = &tegra186_qspi_soc_data,
+	}, {
+		.compatible = "nvidia,tegra194-qspi",
+		.data	    = &tegra186_qspi_soc_data,
+	}, {
+		.compatible = "nvidia,tegra234-qspi",
+		.data	    = &tegra234_qspi_soc_data,
+	},
 	{}
 };
 
-- 
GitLab


From 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Fri, 18 Feb 2022 02:37:04 +0000
Subject: [PATCH 0647/1586] arm64: Change elfcore for_each_mte_vma() to use VMA
 iterator

Rework for_each_mte_vma() to use a VMA iterator instead of an explicit
linked-list. This will allow easy integration with the maple tree work
which removes the VMA list altogether.

Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220218023650.672072-1-Liam.Howlett@oracle.com
[will: Folded in fix from Catalin]
Link: https://lore.kernel.org/r/YhUcywqIhmHvX6dG@arm.com
Signed-off--by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/elfcore.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 3455ee4acc04c..3ed39c61a510c 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -8,9 +8,16 @@
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
 
-#define for_each_mte_vma(tsk, vma)					\
+#ifndef VMA_ITERATOR
+#define VMA_ITERATOR(name, mm, addr)	\
+	struct mm_struct *name = mm
+#define for_each_vma(vmi, vma)		\
+	for (vma = vmi->mmap; vma; vma = vma->vm_next)
+#endif
+
+#define for_each_mte_vma(vmi, vma)					\
 	if (system_supports_mte())					\
-		for (vma = tsk->mm->mmap; vma; vma = vma->vm_next)	\
+		for_each_vma(vmi, vma)					\
 			if (vma->vm_flags & VM_MTE)
 
 static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
@@ -65,8 +72,9 @@ Elf_Half elf_core_extra_phdrs(void)
 {
 	struct vm_area_struct *vma;
 	int vma_count = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma)
+	for_each_mte_vma(vmi, vma)
 		vma_count++;
 
 	return vma_count;
@@ -75,8 +83,9 @@ Elf_Half elf_core_extra_phdrs(void)
 int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma) {
+	for_each_mte_vma(vmi, vma) {
 		struct elf_phdr phdr;
 
 		phdr.p_type = PT_ARM_MEMTAG_MTE;
@@ -100,8 +109,9 @@ size_t elf_core_extra_data_size(void)
 {
 	struct vm_area_struct *vma;
 	size_t data_size = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma)
+	for_each_mte_vma(vmi, vma)
 		data_size += mte_vma_tag_dump_size(vma);
 
 	return data_size;
@@ -110,8 +120,9 @@ size_t elf_core_extra_data_size(void)
 int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma) {
+	for_each_mte_vma(vmi, vma) {
 		if (vma->vm_flags & VM_DONTDUMP)
 			continue;
 
-- 
GitLab


From da844beb6d9f97cb6fe4b443f9610a9fcc534f9d Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 24 Feb 2022 12:49:50 +0000
Subject: [PATCH 0648/1586] arm64: cpufeature: Account min_field_value when
 cheking secondaries for PAuth

In case, both boot_val and sec_val have value below min_field_value we
would wrongly report that address authentication is supported. It is
not a big issue because we enable address authentication based on boot
cpu (and check there is correct).

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220224124952.119612-2-vladimir.murzin@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5f23dab1c8df..3271770b60d66 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1829,7 +1829,7 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
 	/* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
 	sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
 					      entry->field_pos, entry->sign);
-	return sec_val == boot_val;
+	return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
 }
 
 static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
-- 
GitLab


From be3256a086afb4048baf18e6a35a3a81482aa2fa Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 24 Feb 2022 12:49:51 +0000
Subject: [PATCH 0649/1586] arm64: cpufeature: Mark existing PAuth architected
 algorithm as QARMA5

In preparation of supporting PAuth QARMA3 architected algorithm mark
existing one as QARMA5, so we can distingwish between two.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220224124952.119612-3-vladimir.murzin@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 20 ++++++++++++--------
 arch/arm64/tools/cpucaps       |  4 ++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3271770b60d66..1b955bea28569 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1835,15 +1835,19 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
 static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
 				     int scope)
 {
-	return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
-	       has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+	bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+	bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+
+	return apa || api;
 }
 
 static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
 			     int __unused)
 {
-	return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
-	       __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+	bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+	bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5);
+
+	return gpa || gpi;
 }
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
@@ -2230,8 +2234,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	},
 #ifdef CONFIG_ARM64_PTR_AUTH
 	{
-		.desc = "Address authentication (architected algorithm)",
-		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
+		.desc = "Address authentication (architected QARMA5 algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5,
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
@@ -2255,8 +2259,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_address_auth_metacap,
 	},
 	{
-		.desc = "Generic authentication (architected algorithm)",
-		.capability = ARM64_HAS_GENERIC_AUTH_ARCH,
+		.desc = "Generic authentication (architected QARMA5 algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 9c65b1e25a965..4c39247581f64 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -7,7 +7,7 @@ BTI
 HAS_32BIT_EL0_DO_NOT_USE
 HAS_32BIT_EL1
 HAS_ADDRESS_AUTH
-HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_ARCH_QARMA5
 HAS_ADDRESS_AUTH_IMP_DEF
 HAS_AMU_EXTN
 HAS_ARMv8_4_TTL
@@ -21,7 +21,7 @@ HAS_E0PD
 HAS_ECV
 HAS_EPAN
 HAS_GENERIC_AUTH
-HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_ARCH_QARMA5
 HAS_GENERIC_AUTH_IMP_DEF
 HAS_IRQ_PRIO_MASKING
 HAS_LDAPR
-- 
GitLab


From def8c222f054d18aac1fd065a50b9db5feaefa9d Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 24 Feb 2022 12:49:52 +0000
Subject: [PATCH 0650/1586] arm64: Add support of PAuth QARMA3 architected
 algorithm

QARMA3 is relaxed version of the QARMA5 algorithm which expected to
reduce the latency of calculation while still delivering a suitable
level of security.

Support for QARMA3 can be discovered via ID_AA64ISAR2_EL1

    APA3, bits [15:12] Indicates whether the QARMA3 algorithm is
                       implemented in the PE for address
                       authentication in AArch64 state.

    GPA3, bits [11:8]  Indicates whether the QARMA3 algorithm is
                       implemented in the PE for generic code
                       authentication in AArch64 state.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220224124952.119612-4-vladimir.murzin@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/asm_pointer_auth.h     |  3 ++
 arch/arm64/include/asm/cpufeature.h           |  1 +
 arch/arm64/include/asm/kvm_hyp.h              |  1 +
 arch/arm64/include/asm/sysreg.h               | 12 ++++++
 arch/arm64/kernel/cpufeature.c                | 41 ++++++++++++++++++-
 arch/arm64/kernel/idreg-override.c            | 16 +++++++-
 arch/arm64/kvm/arm.c                          |  1 +
 .../arm64/kvm/hyp/include/nvhe/fixed_config.h |  5 +++
 arch/arm64/kvm/hyp/nvhe/sys_regs.c            | 14 +++++++
 arch/arm64/kvm/sys_regs.c                     |  5 +++
 arch/arm64/tools/cpucaps                      |  2 +
 11 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index f1bba5fc61c49..ead62f7dd2694 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -60,6 +60,9 @@ alternative_else_nop_endif
 	.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
 	mrs	\tmp1, id_aa64isar1_el1
 	ubfx	\tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+	mrs_s	\tmp2, SYS_ID_AA64ISAR2_EL1
+	ubfx	\tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4
+	orr	\tmp1, \tmp1, \tmp2
 	cbz	\tmp1, .Lno_addr_auth\@
 	mov_q	\tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
 			SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ef6be92b1921a..fe7137ff61903 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -854,6 +854,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 extern struct arm64_ftr_override id_aa64mmfr1_override;
 extern struct arm64_ftr_override id_aa64pfr1_override;
 extern struct arm64_ftr_override id_aa64isar1_override;
+extern struct arm64_ftr_override id_aa64isar2_override;
 
 u32 get_kvm_ipa_limit(void);
 void dump_cpu_features(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 462882f356c77..aa7fa2a08f060 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004aee..cbe416462b886 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -773,6 +773,8 @@
 #define ID_AA64ISAR1_GPI_IMP_DEF		0x1
 
 /* id_aa64isar2 */
+#define ID_AA64ISAR2_APA3_SHIFT		12
+#define ID_AA64ISAR2_GPA3_SHIFT		8
 #define ID_AA64ISAR2_RPRES_SHIFT	4
 #define ID_AA64ISAR2_WFXT_SHIFT		0
 
@@ -786,6 +788,16 @@
 #define ID_AA64ISAR2_WFXT_NI		0x0
 #define ID_AA64ISAR2_WFXT_SUPPORTED	0x2
 
+#define ID_AA64ISAR2_APA3_NI			0x0
+#define ID_AA64ISAR2_APA3_ARCHITECTED		0x1
+#define ID_AA64ISAR2_APA3_ARCH_EPAC		0x2
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2		0x3
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC	0x4
+#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB	0x5
+
+#define ID_AA64ISAR2_GPA3_NI			0x0
+#define ID_AA64ISAR2_GPA3_ARCHITECTED		0x1
+
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1b955bea28569..f6ecad8fc1c1c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -226,6 +226,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -596,6 +600,7 @@ static const struct arm64_ftr_bits ftr_raz[] = {
 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
 struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
 
 static const struct __ftr_reg_entry {
 	u32			sys_id;
@@ -644,6 +649,8 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
 			       &id_aa64isar1_override),
 	ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
+	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
+			       &id_aa64isar2_override),
 
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -1837,8 +1844,9 @@ static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
 {
 	bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
 	bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+	bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
 
-	return apa || api;
+	return apa || apa3 || api;
 }
 
 static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
@@ -1846,8 +1854,9 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
 {
 	bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
 	bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5);
+	bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3);
 
-	return gpa || gpi;
+	return gpa || gpa3 || gpi;
 }
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
@@ -2243,6 +2252,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
 		.matches = has_address_auth_cpucap,
 	},
+	{
+		.desc = "Address authentication (architected QARMA3 algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3,
+		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR2_APA3_SHIFT,
+		.min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED,
+		.matches = has_address_auth_cpucap,
+	},
 	{
 		.desc = "Address authentication (IMP DEF algorithm)",
 		.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
@@ -2268,6 +2287,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
 		.matches = has_cpuid_feature,
 	},
+	{
+		.desc = "Generic authentication (architected QARMA3 algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR2_GPA3_SHIFT,
+		.min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED,
+		.matches = has_cpuid_feature,
+	},
 	{
 		.desc = "Generic authentication (IMP DEF algorithm)",
 		.capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF,
@@ -2415,6 +2444,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
 				  FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
 	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED)
+	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
 				  FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
@@ -2427,6 +2460,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
 				  FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
 	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED)
+	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
 				  FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index d8e606fe3c21b..8a2ceb5916863 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -17,7 +17,7 @@
 #define FTR_DESC_NAME_LEN	20
 #define FTR_DESC_FIELD_LEN	10
 #define FTR_ALIAS_NAME_LEN	30
-#define FTR_ALIAS_OPTION_LEN	80
+#define FTR_ALIAS_OPTION_LEN	116
 
 struct ftr_set_desc {
 	char 				name[FTR_DESC_NAME_LEN];
@@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = {
 	},
 };
 
+static const struct ftr_set_desc isar2 __initconst = {
+	.name		= "id_aa64isar2",
+	.override	= &id_aa64isar2_override,
+	.fields		= {
+	        { "gpa3", ID_AA64ISAR2_GPA3_SHIFT },
+	        { "apa3", ID_AA64ISAR2_APA3_SHIFT },
+		{}
+	},
+};
+
 extern struct arm64_ftr_override kaslr_feature_override;
 
 static const struct ftr_set_desc kaslr __initconst = {
@@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = {
 	&mmfr1,
 	&pfr1,
 	&isar1,
+	&isar2,
 	&kaslr,
 };
 
@@ -100,7 +111,8 @@ static const struct {
 	{ "arm64.nobti",		"id_aa64pfr1.bt=0" },
 	{ "arm64.nopauth",
 	  "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
-	  "id_aa64isar1.api=0 id_aa64isar1.apa=0"	   },
+	  "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+	  "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0"	   },
 	{ "arm64.nomte",		"id_aa64pfr1.mte=0" },
 	{ "nokaslr",			"kaslr.disabled=1" },
 };
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index ecc5958e27fe2..f3bfc0ddeb0b9 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1870,6 +1870,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
 	kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
 	kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
 	kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
 	kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index eea1f6a537230..5ad626527d411 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -192,6 +192,11 @@
 	ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
 	)
 
+#define PVM_ID_AA64ISAR2_ALLOW (\
+	ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \
+	ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \
+	)
+
 u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
 bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
 bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 792cf6e6ac920..33f5181af330d 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val;
 u64 id_aa64pfr1_el1_sys_val;
 u64 id_aa64isar0_el1_sys_val;
 u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64isar2_el1_sys_val;
 u64 id_aa64mmfr0_el1_sys_val;
 u64 id_aa64mmfr1_el1_sys_val;
 u64 id_aa64mmfr2_el1_sys_val;
@@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
 	return id_aa64isar1_el1_sys_val & allow_mask;
 }
 
+static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
+{
+	u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+
+	if (!vcpu_has_ptrauth(vcpu))
+		allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+				ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+
+	return id_aa64isar2_el1_sys_val & allow_mask;
+}
+
 static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
 {
 	u64 set_mask;
@@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
 		return get_pvm_id_aa64isar0(vcpu);
 	case SYS_ID_AA64ISAR1_EL1:
 		return get_pvm_id_aa64isar1(vcpu);
+	case SYS_ID_AA64ISAR2_EL1:
+		return get_pvm_id_aa64isar2(vcpu);
 	case SYS_ID_AA64MMFR0_EL1:
 		return get_pvm_id_aa64mmfr0(vcpu);
 	case SYS_ID_AA64MMFR1_EL1:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4dc2fba316fff..baa65292bbc2a 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
 				 ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
 		break;
+	case SYS_ID_AA64ISAR2_EL1:
+		if (!vcpu_has_ptrauth(vcpu))
+			val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
+				 ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
 		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 4c39247581f64..162bc2443217b 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -7,6 +7,7 @@ BTI
 HAS_32BIT_EL0_DO_NOT_USE
 HAS_32BIT_EL1
 HAS_ADDRESS_AUTH
+HAS_ADDRESS_AUTH_ARCH_QARMA3
 HAS_ADDRESS_AUTH_ARCH_QARMA5
 HAS_ADDRESS_AUTH_IMP_DEF
 HAS_AMU_EXTN
@@ -21,6 +22,7 @@ HAS_E0PD
 HAS_ECV
 HAS_EPAN
 HAS_GENERIC_AUTH
+HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
 HAS_GENERIC_AUTH_IMP_DEF
 HAS_IRQ_PRIO_MASKING
-- 
GitLab


From 83854c231262d2ad43c4fb32414ba25304f925d8 Mon Sep 17 00:00:00 2001
From: Li-hao Kuo <lhjeff911@gmail.com>
Date: Fri, 25 Feb 2022 14:31:53 +0800
Subject: [PATCH 0651/1586] spi: dt-bindings: remove unused required property

fix issue
/builds/robherring/linux-dt/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.example.dt.yaml:
spi@9C002D80: 'clocks-names' is a required property
From schema: /builds/robherring/linux-dt/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
delete unused required(clock-name)

Fixes: 3b8ab4da34 ("spi: Fix test error for sp7021")

Reported-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Li-hao Kuo <lhjeff911@gmail.com>
Link: https://lore.kernel.org/r/097bbc8b703b17e8fb3e3f6f6d2f97fe668bd5c5.1645770648.git.lhjeff911@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
index 298eac28c40ff..3a58cf0f1ec86 100644
--- a/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-sunplus-sp7021.yaml
@@ -50,7 +50,6 @@ required:
   - interrupts
   - interrupt-names
   - clocks
-  - clocks-names
   - resets
   - pinctrl-names
   - pinctrl-0
-- 
GitLab


From 032e6c33790dc51836a40ef702de4c9e0941145f Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 24 Feb 2022 16:47:39 +0000
Subject: [PATCH 0652/1586] arm64: cpufeature: Remove cpu_has_fwb() check

cpu_has_fwb() is supposed to warn user is following architectural
requirement is not valid:

LoUU, bits [29:27] - Level of Unification Uniprocessor for the cache
                     hierarchy.

  Note

    When FEAT_S2FWB is implemented, the architecture requires that
    this field is zero so that no levels of data cache need to be
    cleaned in order to manage coherency with instruction fetches.

LoUIS, bits [23:21] - Level of Unification Inner Shareable for the
                      cache hierarchy.

  Note

    When FEAT_S2FWB is implemented, the architecture requires that
    this field is zero so that no levels of data cache need to be
    cleaned in order to manage coherency with instruction fetches.

It is not really clear what user have to do if assertion fires. Having
assertions about the CPU design like this inspire even more assertions
to be added and the kernel definitely is not the right place for that,
so let's remove cpu_has_fwb() altogether.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Link: https://lore.kernel.org/r/20220224164739.119168-1-vladimir.murzin@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5f23dab1c8df..6d1da359f8041 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1775,14 +1775,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
 }
 
-static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
-{
-	u64 val = read_sysreg_s(SYS_CLIDR_EL1);
-
-	/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
-	WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
-}
-
 #ifdef CONFIG_ARM64_PAN
 static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
 {
@@ -2144,7 +2136,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
-		.cpu_enable = cpu_has_fwb,
 	},
 	{
 		.desc = "ARMv8.4 Translation Table Level",
-- 
GitLab


From 4013e26670c590944abdab56c4fa797527b74325 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 18 Feb 2022 00:12:09 -0800
Subject: [PATCH 0653/1586] arm64: module: remove (NOLOAD) from linker script

On ELF, (NOLOAD) sets the section type to SHT_NOBITS[1]. It is conceptually
inappropriate for .plt and .text.* sections which are always
SHT_PROGBITS.

In GNU ld, if PLT entries are needed, .plt will be SHT_PROGBITS anyway
and (NOLOAD) will be essentially ignored. In ld.lld, since
https://reviews.llvm.org/D118840 ("[ELF] Support (TYPE=<value>) to
customize the output section type"), ld.lld will report a `section type
mismatch` error. Just remove (NOLOAD) to fix the error.

[1] https://lld.llvm.org/ELF/linker_script.html As of today, "The
section should be marked as not loadable" on
https://sourceware.org/binutils/docs/ld/Output-Section-Type.html is
outdated for ELF.

Tested-by: Nathan Chancellor <nathan@kernel.org>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Fangrui Song <maskray@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220218081209.354383-1-maskray@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/module.lds.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index a11ccadd47d29..094701ec5500b 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,8 +1,8 @@
 SECTIONS {
 #ifdef CONFIG_ARM64_MODULE_PLTS
-	.plt 0 (NOLOAD) : { BYTE(0) }
-	.init.plt 0 (NOLOAD) : { BYTE(0) }
-	.text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
+	.plt 0 : { BYTE(0) }
+	.init.plt 0 : { BYTE(0) }
+	.text.ftrace_trampoline 0 : { BYTE(0) }
 #endif
 
 #ifdef CONFIG_KASAN_SW_TAGS
-- 
GitLab


From 879358fc670dbc8dc3b0e3e4975ff39e38847707 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Feb 2022 15:20:30 +0000
Subject: [PATCH 0654/1586] arm64: Define CPACR_EL1_FPEN similarly to other
 floating point controls

The base floating point, SVE and SME all have enable controls for EL0 and
EL1 in CPACR_EL1 which have a similar layout and function. Currently the
basic floating point enable FPEN is defined differently to the SVE control,
specified as a single define in kvm_arm.h rather than in sysreg.h. Move the
define to sysreg.h and provide separate EL0 and EL1 control bits so code
managing the different floating point enables can look consistent.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220207152109.197566-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_arm.h | 1 -
 arch/arm64/include/asm/sysreg.h  | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 01d47c5886dc4..eec790842fe23 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -355,7 +355,6 @@
 	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
 	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
 
-#define CPACR_EL1_FPEN		(3 << 20)
 #define CPACR_EL1_TTA		(1 << 28)
 #define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004aee..1da4c43d597d0 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1097,6 +1097,10 @@
 #define ZCR_ELx_LEN_SIZE	9
 #define ZCR_ELx_LEN_MASK	0x1ff
 
+#define CPACR_EL1_FPEN_EL1EN	(BIT(20)) /* enable EL1 access */
+#define CPACR_EL1_FPEN_EL0EN	(BIT(21)) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_FPEN		(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+
 #define CPACR_EL1_ZEN_EL1EN	(BIT(16)) /* enable EL1 access */
 #define CPACR_EL1_ZEN_EL0EN	(BIT(17)) /* enable EL0 access, if EL1EN set */
 #define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-- 
GitLab


From 3bb72d86d80eb9296d43f9e807b6f9ff58049552 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Feb 2022 15:20:31 +0000
Subject: [PATCH 0655/1586] arm64: Always use individual bits in CPACR floating
 point enables

CPACR_EL1 has several bitfields for controlling traps for floating point
features to EL1, each of which has a separate bits for EL0 and EL1. Marc
Zyngier noted that we are not consistent in our use of defines to
manipulate these, sometimes using a define covering the whole field and
sometimes using defines for the individual bits. Make this consistent by
expanding the whole field defines where they are used (currently only in
the KVM code) and deleting them so that no further uses can be
introduced.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220207152109.197566-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_arm.h        | 3 ++-
 arch/arm64/include/asm/sysreg.h         | 2 --
 arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++--
 arch/arm64/kvm/hyp/vhe/switch.c         | 6 +++---
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index eec790842fe23..1767ded838880 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -356,6 +356,7 @@
 	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
 
 #define CPACR_EL1_TTA		(1 << 28)
-#define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
+#define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
+				 CPACR_EL1_ZEN_EL1EN)
 
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1da4c43d597d0..e66dd9ebc3374 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1099,11 +1099,9 @@
 
 #define CPACR_EL1_FPEN_EL1EN	(BIT(20)) /* enable EL1 access */
 #define CPACR_EL1_FPEN_EL0EN	(BIT(21)) /* enable EL0 access, if EL1EN set */
-#define CPACR_EL1_FPEN		(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
 
 #define CPACR_EL1_ZEN_EL1EN	(BIT(16)) /* enable EL1 access */
 #define CPACR_EL1_ZEN_EL0EN	(BIT(17)) /* enable EL0 access, if EL1EN set */
-#define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
 
 /* TCR EL1 Bit Definitions */
 #define SYS_TCR_EL1_TCMA1	(BIT(58))
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 701cfb964905d..6379a1e3e6e51 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 	/* Valid trap.  Switch the context: */
 	if (has_vhe()) {
-		reg = CPACR_EL1_FPEN;
+		reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
 		if (sve_guest)
-			reg |= CPACR_EL1_ZEN;
+			reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 
 		sysreg_clear_set(cpacr_el1, 0, reg);
 	} else {
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 11d053fdd604b..619353b06e38f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -38,7 +38,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
-	val &= ~CPACR_EL1_ZEN;
+	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
 
 	/*
 	 * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -53,9 +53,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	if (update_fp_enabled(vcpu)) {
 		if (vcpu_has_sve(vcpu))
-			val |= CPACR_EL1_ZEN;
+			val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 	} else {
-		val &= ~CPACR_EL1_FPEN;
+		val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
 		__activate_traps_fpsimd32(vcpu);
 	}
 
-- 
GitLab


From 0a2eec83c2c23cf609e781732b338a9a4f18e00c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Feb 2022 15:20:32 +0000
Subject: [PATCH 0656/1586] arm64: cpufeature: Always specify and use a field
 width for capabilities

Since all the fields in the main ID registers are 4 bits wide we have up
until now not bothered specifying the width in the code. Since we now
wish to use this mechanism to enumerate features from the floating point
feature registers which do not follow this pattern add a width to the
table.  This means updating all the existing table entries but makes it
less likely that we run into issues in future due to implicitly assuming
a 4 bit width.

Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220207152109.197566-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h |   1 +
 arch/arm64/kernel/cpufeature.c      | 167 +++++++++++++++++-----------
 2 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ef6be92b1921a..2728abd9cae4b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -356,6 +356,7 @@ struct arm64_cpu_capabilities {
 		struct {	/* Feature register checking */
 			u32 sys_reg;
 			u8 field_pos;
+			u8 field_width;
 			u8 min_field_value;
 			u8 hwcap_type;
 			bool sign;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5f23dab1c8df..64a748c2b351f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1307,7 +1307,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
-	int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+	int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+						    entry->field_width,
+						    entry->sign);
 
 	return val >= entry->min_field_value;
 }
@@ -1955,6 +1957,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR0_EL1,
 		.field_pos = ID_AA64MMFR0_ECV_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -1966,6 +1969,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_pan,
@@ -1979,6 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 3,
 	},
@@ -1991,6 +1996,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 2,
 	},
@@ -2015,6 +2021,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 #ifdef CONFIG_KVM
@@ -2026,6 +2033,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL1_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
 	},
 	{
@@ -2046,6 +2054,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		 */
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_CSV3_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = unmap_kernel_at_el0,
 		.cpu_enable = kpti_install_ng_mappings,
@@ -2065,6 +2074,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 	},
 	{
@@ -2075,6 +2085,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 2,
 	},
 #endif
@@ -2086,6 +2097,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_SVE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_SVE,
 		.matches = has_cpuid_feature,
 		.cpu_enable = sve_kernel_enable,
@@ -2100,6 +2112,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_RAS_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_RAS_V1,
 		.cpu_enable = cpu_clear_disr,
 	},
@@ -2118,6 +2131,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_AMU_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR0_AMU,
 		.cpu_enable = cpu_amu_enable,
 	},
@@ -2142,6 +2156,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
 		.cpu_enable = cpu_has_fwb,
@@ -2153,6 +2168,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_TTL_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
 	},
@@ -2163,6 +2179,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_TLB_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = ID_AA64ISAR0_TLB_RANGE,
 	},
@@ -2181,6 +2198,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.field_width = 4,
 		.min_field_value = 2,
 		.matches = has_hw_dbm,
 		.cpu_enable = cpu_enable_hw_dbm,
@@ -2193,6 +2211,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 	},
 	{
@@ -2202,6 +2221,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_SSBS_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
 	},
@@ -2214,6 +2234,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64MMFR2_CNP_SHIFT,
+		.field_width = 4,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_cnp,
 	},
@@ -2225,6 +2246,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.field_pos = ID_AA64ISAR1_SB_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -2236,6 +2258,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_APA_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
 		.matches = has_address_auth_cpucap,
 	},
@@ -2246,6 +2269,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_API_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
 		.matches = has_address_auth_cpucap,
 	},
@@ -2261,6 +2285,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_GPA_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
 		.matches = has_cpuid_feature,
 	},
@@ -2271,6 +2296,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_GPI_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
 		.matches = has_cpuid_feature,
 	},
@@ -2291,6 +2317,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = can_use_gic_priorities,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -2302,6 +2329,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
+		.field_width = 4,
 		.field_pos = ID_AA64MMFR2_E0PD_SHIFT,
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
@@ -2316,6 +2344,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_RNDR_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
@@ -2333,6 +2362,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = bti_enable,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_BT_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_BT_BTI,
 		.sign = FTR_UNSIGNED,
 	},
@@ -2345,6 +2375,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_MTE,
 		.sign = FTR_UNSIGNED,
 		.cpu_enable = cpu_enable_mte,
@@ -2356,6 +2387,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_width = 4,
 		.min_field_value = ID_AA64PFR1_MTE_ASYMM,
 		.sign = FTR_UNSIGNED,
 	},
@@ -2367,16 +2399,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+		.field_width = 4,
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 	},
 	{},
 };
 
-#define HWCAP_CPUID_MATCH(reg, field, s, min_value)				\
+#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value)			\
 		.matches = has_cpuid_feature,					\
 		.sys_reg = reg,							\
 		.field_pos = field,						\
+		.field_width = width,						\
 		.sign = s,							\
 		.min_field_value = min_value,
 
@@ -2386,10 +2420,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.hwcap_type = cap_type,						\
 		.hwcap = cap,							\
 
-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap)			\
+#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap)		\
 	{									\
 		__HWCAP_CAP(#cap, cap_type, cap)				\
-		HWCAP_CPUID_MATCH(reg, field, s, min_value)			\
+		HWCAP_CPUID_MATCH(reg, field, width, s, min_value) 		\
 	}
 
 #define HWCAP_MULTI_CAP(list, cap_type, cap)					\
@@ -2409,11 +2443,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
+				  4, FTR_UNSIGNED,
+				  ID_AA64ISAR1_APA_ARCHITECTED)
 	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
 	},
 	{},
 };
@@ -2421,77 +2456,77 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
 	},
 	{
 		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
-				  FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+				  4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
 	},
 	{},
 };
 #endif
 
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
-	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
-	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_BTI
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
 #ifdef CONFIG_ARM64_MTE
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
 #endif /* CONFIG_ARM64_MTE */
-	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
-	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
-	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	{},
 };
 
@@ -2520,15 +2555,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
 static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
 #ifdef CONFIG_COMPAT
 	HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
-	HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+	HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
 	/* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
-	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
-	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+	HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
 #endif
 	{},
 };
-- 
GitLab


From 32de73e89099c3f243032a733d3a64d417327a70 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Feb 2022 15:20:34 +0000
Subject: [PATCH 0657/1586] kselftest/arm64: signal: Allow tests to be
 incompatible with features

Some features may invalidate some tests, for example by supporting an
operation which would trap otherwise. Allow tests to list features that
they are incompatible with so we can cover the case where a signal will
be generated without disruption on systems where that won't happen.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220207152109.197566-6-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/signal/test_signals.h     |  1 +
 .../arm64/signal/test_signals_utils.c         | 34 ++++++++++++++-----
 .../arm64/signal/test_signals_utils.h         |  2 ++
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index ebe8694dbef0f..f909b70d9e980 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -53,6 +53,7 @@ struct tdescr {
 	char			*name;
 	char			*descr;
 	unsigned long		feats_required;
+	unsigned long		feats_incompatible;
 	/* bitmask of effectively supported feats: populated at run-time */
 	unsigned long		feats_supported;
 	bool			initialized;
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2f8c23af3b5e0..5743897984b0e 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats)
 {
 	size_t flen = MAX_FEATS_SZ - 1;
 
+	feats_string[0] = '\0';
+
 	for (int i = 0; i < FMAX_END; i++) {
 		if (feats & (1UL << i)) {
 			size_t tlen = strlen(feats_names[i]);
@@ -256,7 +258,7 @@ int test_init(struct tdescr *td)
 		td->minsigstksz = MINSIGSTKSZ;
 	fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
 
-	if (td->feats_required) {
+	if (td->feats_required || td->feats_incompatible) {
 		td->feats_supported = 0;
 		/*
 		 * Checking for CPU required features using both the
@@ -267,15 +269,29 @@ int test_init(struct tdescr *td)
 		if (getauxval(AT_HWCAP) & HWCAP_SVE)
 			td->feats_supported |= FEAT_SVE;
 		if (feats_ok(td)) {
-			fprintf(stderr,
-				"Required Features: [%s] supported\n",
-				feats_to_string(td->feats_required &
-						td->feats_supported));
+			if (td->feats_required & td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] supported\n",
+					feats_to_string(td->feats_required &
+							td->feats_supported));
+			if (!(td->feats_incompatible & td->feats_supported))
+				fprintf(stderr,
+					"Incompatible Features: [%s] absent\n",
+					feats_to_string(td->feats_incompatible));
 		} else {
-			fprintf(stderr,
-				"Required Features: [%s] NOT supported\n",
-				feats_to_string(td->feats_required &
-						~td->feats_supported));
+			if ((td->feats_required & td->feats_supported) !=
+			    td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] NOT supported\n",
+					feats_to_string(td->feats_required &
+							~td->feats_supported));
+			if (td->feats_incompatible & td->feats_supported)
+				fprintf(stderr,
+					"Incompatible Features: [%s] supported\n",
+					feats_to_string(td->feats_incompatible &
+							~td->feats_supported));
+
+
 			td->result = KSFT_SKIP;
 			return 0;
 		}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274d..f3aa99ba67bb7 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -18,6 +18,8 @@ void test_result(struct tdescr *td);
 
 static inline bool feats_ok(struct tdescr *td)
 {
+	if (td->feats_incompatible & td->feats_supported)
+		return false;
 	return (td->feats_required & td->feats_supported) == td->feats_required;
 }
 
-- 
GitLab


From 3f9ab2a6986fe2930782408cc2a6c25e3e5cb98f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 16 Feb 2022 17:32:21 +0000
Subject: [PATCH 0658/1586] arm64/mte: Document ABI for asymmetric mode

MTE3 adds a new mode which is synchronous for reads but asynchronous for
writes. Document the userspace ABI for this feature, we call the new
mode ASYMM and add a new prctl flag and mte_tcf_preferred value for it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220216173224.2342152-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../arm64/memory-tagging-extension.rst        | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 7b99c8f428eb6..42677d48a114b 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -76,6 +76,9 @@ configurable behaviours:
   with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
   address is unknown).
 
+- *Asymmetric* - Reads are handled as for synchronous mode while writes
+  are handled as for asynchronous mode.
+
 The user can select the above modes, per thread, using the
 ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
 contains any number of the following values in the ``PR_MTE_TCF_MASK``
@@ -85,6 +88,7 @@ bit-field:
                          (ignored if combined with other options)
 - ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
 - ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
+- ``PR_MTE_TCF_ASYMM`` - *Asymmetric* tag check fault mode
 
 If no modes are specified, tag check faults are ignored. If a single
 mode is specified, the program will run in that mode. If multiple
@@ -139,18 +143,23 @@ tag checking mode as the CPU's preferred tag checking mode.
 
 The preferred tag checking mode for each CPU is controlled by
 ``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
-privileged user may write the value ``async`` or ``sync``.  The default
-preferred mode for each CPU is ``async``.
+privileged user may write the value ``async``, ``sync`` or ``asymm``.  The
+default preferred mode for each CPU is ``async``.
 
 To allow a program to potentially run in the CPU's preferred tag
 checking mode, the user program may set multiple tag check fault mode
 bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
 flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
-mode is in the task's set of provided tag checking modes (this will
-always be the case at present because the kernel only supports two
-tag checking modes, but future kernels may support more modes), that
+mode is in the task's set of provided tag checking modes, that
 mode will be selected. Otherwise, one of the modes in the task's mode
-set will be selected in a currently unspecified manner.
+selected by the kernel using the preference order:
+
+	1. Asynchronous
+	2. Asymmetric
+	3. Synchronous
+
+If asymmetric mode is specified by the program but not supported by
+either the system or the kernel then an error will be returned.
 
 Initial process state
 ---------------------
-- 
GitLab


From cb627397e02bc65e44912daebfe0bbe6b0ecd384 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 16 Feb 2022 17:32:22 +0000
Subject: [PATCH 0659/1586] arm64/mte: Add a little bit of documentation for
 mte_update_sctlr_user()

The code isn't that obscure but it probably won't hurt to have a little
bit more documentation for anyone trying to find out where everything
actually takes effect.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Tested-by: Branislav Rankov <branislav.rankov@arm.com>
Link: https://lore.kernel.org/r/20220216173224.2342152-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/mte.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f983795b5eda2..b9a2d13e85f6d 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void)
 }
 #endif
 
+/*
+ * This is where we actually resolve the system and process MTE mode
+ * configuration into an actual value in SCTLR_EL1 that affects
+ * userspace.
+ */
 static void mte_update_sctlr_user(struct task_struct *task)
 {
 	/*
@@ -199,8 +204,17 @@ static void mte_update_sctlr_user(struct task_struct *task)
 	unsigned long pref, resolved_mte_tcf;
 
 	pref = __this_cpu_read(mte_tcf_preferred);
+	/*
+	 * If there is no overlap between the system preferred and
+	 * program requested values go with what was requested.
+	 */
 	resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 	sctlr &= ~SCTLR_EL1_TCF0_MASK;
+	/*
+	 * Pick an actual setting. The order in which we check for
+	 * set bits and map into register values determines our
+	 * default order.
+	 */
 	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
-- 
GitLab


From d082a0255fcb8fcb4bd8257df111f2caa67086bc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 16 Feb 2022 17:32:23 +0000
Subject: [PATCH 0660/1586] arm64/mte: Add hwcap for asymmetric mode

Allow userspace to detect support for asymmetric mode by providing a hwcap
for it, using the official feature name FEAT_MTE3.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Tested-by: Branislav Rankov <branislav.rankov@arm.com>
Link: https://lore.kernel.org/r/20220216173224.2342152-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/elf_hwcaps.rst  | 5 +++++
 arch/arm64/include/asm/hwcap.h      | 1 +
 arch/arm64/include/uapi/asm/hwcap.h | 1 +
 arch/arm64/kernel/cpufeature.c      | 1 +
 arch/arm64/kernel/cpuinfo.c         | 1 +
 5 files changed, 9 insertions(+)

diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index b72ff17d600ae..a8f30963e550d 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -259,6 +259,11 @@ HWCAP2_RPRES
 
     Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
 
+HWCAP2_MTE3
+
+    Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
+    by Documentation/arm64/memory-tagging-extension.rst.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index f68fbb2074730..8db5ec0089dbd 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -108,6 +108,7 @@
 #define KERNEL_HWCAP_ECV		__khwcap2_feature(ECV)
 #define KERNEL_HWCAP_AFP		__khwcap2_feature(AFP)
 #define KERNEL_HWCAP_RPRES		__khwcap2_feature(RPRES)
+#define KERNEL_HWCAP_MTE3		__khwcap2_feature(MTE3)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index f03731847d9df..99cb5d383048d 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -78,5 +78,6 @@
 #define HWCAP2_ECV		(1 << 19)
 #define HWCAP2_AFP		(1 << 20)
 #define HWCAP2_RPRES		(1 << 21)
+#define HWCAP2_MTE3		(1 << 22)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5f23dab1c8df..5809d5d59258f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2488,6 +2488,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 #endif
 #ifdef CONFIG_ARM64_MTE
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3),
 #endif /* CONFIG_ARM64_MTE */
 	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
 	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 591c18a889a56..330b92ea863aa 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -97,6 +97,7 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_ECV]		= "ecv",
 	[KERNEL_HWCAP_AFP]		= "afp",
 	[KERNEL_HWCAP_RPRES]		= "rpres",
+	[KERNEL_HWCAP_MTE3]		= "mte3",
 };
 
 #ifdef CONFIG_COMPAT
-- 
GitLab


From 766121ba5de38a6f67980ec24a6af76c55def100 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 16 Feb 2022 17:32:24 +0000
Subject: [PATCH 0661/1586] arm64/mte: Add userspace interface for enabling
 asymmetric mode

The architecture provides an asymmetric mode for MTE where tag mismatches
are checked asynchronously for stores but synchronously for loads. Allow
userspace processes to select this and make it available as a default mode
via the existing per-CPU sysfs interface.

Since there PR_MTE_TCF_ values are a bitmask (allowing the kernel to choose
between the multiple modes) and there are no free bits adjacent to the
existing PR_MTE_TCF_ bits the set of bits used to specify the mode becomes
disjoint. Programs using the new interface should be aware of this and
programs that do not use it will not see any change in behaviour.

When userspace requests two possible modes but the system default for the
CPU is the third mode (eg, default is synchronous but userspace requests
either asynchronous or asymmetric) the preference order is:

   ASYMM > ASYNC > SYNC

This situation is not currently possible since there are only two modes and
it is mandatory to have a system default so there could be no ambiguity and
there is no ABI change. The chosen order is basically arbitrary as we do not
have a clear metric for what is better here.

If userspace requests specifically asymmetric mode via the prctl() and the
system does not support it then we will return an error, this mirrors
how we handle the case where userspace enables MTE on a system that does
not support MTE at all and the behaviour that will be seen if running on
an older kernel that does not support userspace use of asymmetric mode.

Attempts to set asymmetric mode as the default mode will result in an error
if the system does not support it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Tested-by: Branislav Rankov <branislav.rankov@arm.com>
Link: https://lore.kernel.org/r/20220216173224.2342152-5-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h |  1 +
 arch/arm64/kernel/mte.c            | 12 +++++++++++-
 arch/arm64/kernel/process.c        |  5 ++++-
 include/uapi/linux/prctl.h         |  4 +++-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6f41b65f99628..73e38d9a540ce 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -21,6 +21,7 @@
 
 #define MTE_CTRL_TCF_SYNC		(1UL << 16)
 #define MTE_CTRL_TCF_ASYNC		(1UL << 17)
+#define MTE_CTRL_TCF_ASYMM		(1UL << 18)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b9a2d13e85f6d..cbbd8d93fc502 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -215,7 +215,9 @@ static void mte_update_sctlr_user(struct task_struct *task)
 	 * set bits and map into register values determines our
 	 * default order.
 	 */
-	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
+		sctlr |= SCTLR_EL1_TCF0_ASYMM;
+	else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 		sctlr |= SCTLR_EL1_TCF0_SYNC;
@@ -309,6 +311,8 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 		mte_ctrl |= MTE_CTRL_TCF_ASYNC;
 	if (arg & PR_MTE_TCF_SYNC)
 		mte_ctrl |= MTE_CTRL_TCF_SYNC;
+	if (arg & PR_MTE_TCF_ASYMM)
+		mte_ctrl |= MTE_CTRL_TCF_ASYMM;
 
 	task->thread.mte_ctrl = mte_ctrl;
 	if (task == current) {
@@ -337,6 +341,8 @@ long get_mte_ctrl(struct task_struct *task)
 		ret |= PR_MTE_TCF_ASYNC;
 	if (mte_ctrl & MTE_CTRL_TCF_SYNC)
 		ret |= PR_MTE_TCF_SYNC;
+	if (mte_ctrl & MTE_CTRL_TCF_ASYMM)
+		ret |= PR_MTE_TCF_ASYMM;
 
 	return ret;
 }
@@ -484,6 +490,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev,
 		return sysfs_emit(buf, "async\n");
 	case MTE_CTRL_TCF_SYNC:
 		return sysfs_emit(buf, "sync\n");
+	case MTE_CTRL_TCF_ASYMM:
+		return sysfs_emit(buf, "asymm\n");
 	default:
 		return sysfs_emit(buf, "???\n");
 	}
@@ -499,6 +507,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev,
 		tcf = MTE_CTRL_TCF_ASYNC;
 	else if (sysfs_streq(buf, "sync"))
 		tcf = MTE_CTRL_TCF_SYNC;
+	else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
+		tcf = MTE_CTRL_TCF_ASYMM;
 	else
 		return -EINVAL;
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 5369e649fa79f..941cfa7117b9b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -635,7 +635,10 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 		return -EINVAL;
 
 	if (system_supports_mte())
-		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
+		valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
+			| PR_MTE_TAG_MASK;
+	if (cpus_have_cap(ARM64_MTE_ASYMM))
+		valid_mask |= PR_MTE_TCF_ASYMM;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index e998764f02625..4ae2b21e4066a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,7 +238,9 @@ struct prctl_mm_map {
 # define PR_MTE_TCF_NONE		0UL
 # define PR_MTE_TCF_SYNC		(1UL << 1)
 # define PR_MTE_TCF_ASYNC		(1UL << 2)
-# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
+# define PR_MTE_TCF_ASYMM		(1UL << 19)
+# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \
+					 PR_MTE_TCF_ASYMM)
 /* MTE tag inclusion mask */
 # define PR_MTE_TAG_SHIFT		3
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
-- 
GitLab


From 2fef99b8372c1ae3d8445ab570e888b5a358dbe9 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 11 Feb 2022 18:56:46 -0800
Subject: [PATCH 0662/1586] f2fs: fix missing free nid in
 f2fs_handle_failed_inode

This patch fixes xfstests/generic/475 failure.

[  293.680694] F2FS-fs (dm-1): May loss orphan inode, run fsck to fix.
[  293.685358] Buffer I/O error on dev dm-1, logical block 8388592, async page read
[  293.691527] Buffer I/O error on dev dm-1, logical block 8388592, async page read
[  293.691764] sh (7615): drop_caches: 3
[  293.691819] sh (7616): drop_caches: 3
[  293.694017] Buffer I/O error on dev dm-1, logical block 1, async page read
[  293.695659] sh (7618): drop_caches: 3
[  293.696979] sh (7617): drop_caches: 3
[  293.700290] sh (7623): drop_caches: 3
[  293.708621] sh (7626): drop_caches: 3
[  293.711386] sh (7628): drop_caches: 3
[  293.711825] sh (7627): drop_caches: 3
[  293.716738] sh (7630): drop_caches: 3
[  293.719613] sh (7632): drop_caches: 3
[  293.720971] sh (7633): drop_caches: 3
[  293.727741] sh (7634): drop_caches: 3
[  293.730783] sh (7636): drop_caches: 3
[  293.732681] sh (7635): drop_caches: 3
[  293.732988] sh (7637): drop_caches: 3
[  293.738836] sh (7639): drop_caches: 3
[  293.740568] sh (7641): drop_caches: 3
[  293.743053] sh (7640): drop_caches: 3
[  293.821889] ------------[ cut here ]------------
[  293.824654] kernel BUG at fs/f2fs/node.c:3334!
[  293.826226] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[  293.828713] CPU: 0 PID: 7653 Comm: umount Tainted: G           OE     5.17.0-rc1-custom #1
[  293.830946] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[  293.832526] RIP: 0010:f2fs_destroy_node_manager+0x33f/0x350 [f2fs]
[  293.833905] Code: e8 d6 3d f9 f9 48 8b 45 d0 65 48 2b 04 25 28 00 00 00 75 1a 48 81 c4 28 03 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b
[  293.837783] RSP: 0018:ffffb04ec31e7a20 EFLAGS: 00010202
[  293.839062] RAX: 0000000000000001 RBX: ffff9df947db2eb8 RCX: 0000000080aa0072
[  293.840666] RDX: 0000000000000000 RSI: ffffe86c0432a140 RDI: ffffffffc0b72a21
[  293.842261] RBP: ffffb04ec31e7d70 R08: ffff9df94ca85780 R09: 0000000080aa0072
[  293.843909] R10: ffff9df94ca85700 R11: ffff9df94e1ccf58 R12: ffff9df947db2e00
[  293.845594] R13: ffff9df947db2ed0 R14: ffff9df947db2eb8 R15: ffff9df947db2eb8
[  293.847855] FS:  00007f5a97379800(0000) GS:ffff9dfa77c00000(0000) knlGS:0000000000000000
[  293.850647] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  293.852940] CR2: 00007f5a97528730 CR3: 000000010bc76005 CR4: 0000000000370ef0
[  293.854680] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  293.856423] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  293.858380] Call Trace:
[  293.859302]  <TASK>
[  293.860311]  ? ttwu_do_wakeup+0x1c/0x170
[  293.861800]  ? ttwu_do_activate+0x6d/0xb0
[  293.863057]  ? _raw_spin_unlock_irqrestore+0x29/0x40
[  293.864411]  ? try_to_wake_up+0x9d/0x5e0
[  293.865618]  ? debug_smp_processor_id+0x17/0x20
[  293.866934]  ? debug_smp_processor_id+0x17/0x20
[  293.868223]  ? free_unref_page+0xbf/0x120
[  293.869470]  ? __free_slab+0xcb/0x1c0
[  293.870614]  ? preempt_count_add+0x7a/0xc0
[  293.871811]  ? __slab_free+0xa0/0x2d0
[  293.872918]  ? __wake_up_common_lock+0x8a/0xc0
[  293.874186]  ? __slab_free+0xa0/0x2d0
[  293.875305]  ? free_inode_nonrcu+0x20/0x20
[  293.876466]  ? free_inode_nonrcu+0x20/0x20
[  293.877650]  ? debug_smp_processor_id+0x17/0x20
[  293.878949]  ? call_rcu+0x11a/0x240
[  293.880060]  ? f2fs_destroy_stats+0x59/0x60 [f2fs]
[  293.881437]  ? kfree+0x1fe/0x230
[  293.882674]  f2fs_put_super+0x160/0x390 [f2fs]
[  293.883978]  generic_shutdown_super+0x7a/0x120
[  293.885274]  kill_block_super+0x27/0x50
[  293.886496]  kill_f2fs_super+0x7f/0x100 [f2fs]
[  293.887806]  deactivate_locked_super+0x35/0xa0
[  293.889271]  deactivate_super+0x40/0x50
[  293.890513]  cleanup_mnt+0x139/0x190
[  293.891689]  __cleanup_mnt+0x12/0x20
[  293.892850]  task_work_run+0x64/0xa0
[  293.894035]  exit_to_user_mode_prepare+0x1b7/0x1c0
[  293.895409]  syscall_exit_to_user_mode+0x27/0x50
[  293.896872]  do_syscall_64+0x48/0xc0
[  293.898090]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  293.899517] RIP: 0033:0x7f5a975cd25b

Fixes: 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()")
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0ec8e32a00b47..ab8e0c06c78c4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -885,6 +885,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
 	err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
 	if (err) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
+		set_inode_flag(inode, FI_FREE_NID);
 		f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
 		goto out;
 	}
-- 
GitLab


From c7f91bd4102902384137dd5c50d04bfed27050dd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 22 Feb 2022 10:43:13 -0800
Subject: [PATCH 0663/1586] f2fs: Restore rwsem lockdep support

Lockdep uses lock class keys in its analysis. init_rwsem() instantiates
one lock class key with each init_rwsem() user as follows:

 #define init_rwsem(sem)                                        \
 do {                                                           \
         static struct lock_class_key __key;                    \
                                                                \
         __init_rwsem((sem), #sem, &__key);                     \
 } while (0)

Commit e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems") reduced
the number of lock class keys from one per init_rwsem() user to one per
file in which init_f2fs_rwsem() is used. This causes the same lock class key
to be associated with multiple f2fs rwsems and also triggers a number of
false positive lockdep deadlock reports. Fix this by again instantiating one
lock class key with each init_f2fs_rwsem() caller.

Cc: Tim Murray <timmurray@google.com>
Reported-by: syzbot+0b9cadf5fc45a98a5083@syzkaller.appspotmail.com
Fixes: e4544b63a7ee ("f2fs: move f2fs to use reader-unfair rwsems")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c9515c3c54fd5..47bf9e30913f9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2115,9 +2115,17 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
-static inline void init_f2fs_rwsem(struct f2fs_rwsem *sem)
+#define init_f2fs_rwsem(sem)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__init_f2fs_rwsem((sem), #sem, &__key);			\
+} while (0)
+
+static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem,
+		const char *sem_name, struct lock_class_key *key)
 {
-	init_rwsem(&sem->internal_rwsem);
+	__init_rwsem(&sem->internal_rwsem, sem_name, key);
 	init_waitqueue_head(&sem->read_waiters);
 }
 
-- 
GitLab


From 680af5b824a52faa819167628665804a14f0e0df Mon Sep 17 00:00:00 2001
From: Juhyung Park <qkrwngud825@gmail.com>
Date: Tue, 15 Feb 2022 17:27:21 +0900
Subject: [PATCH 0664/1586] f2fs: quota: fix loop condition at
 f2fs_quota_sync()

cnt should be passed to sb_has_quota_active() instead of type to check
active quota properly.

Moreover, when the type is -1, the compiler with enough inline knowledge
can discard sb_has_quota_active() check altogether, causing a NULL pointer
dereference at the following inode_lock(dqopt->files[cnt]):

[    2.796010] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0
[    2.796024] Mem abort info:
[    2.796025]   ESR = 0x96000005
[    2.796028]   EC = 0x25: DABT (current EL), IL = 32 bits
[    2.796029]   SET = 0, FnV = 0
[    2.796031]   EA = 0, S1PTW = 0
[    2.796032] Data abort info:
[    2.796034]   ISV = 0, ISS = 0x00000005
[    2.796035]   CM = 0, WnR = 0
[    2.796046] user pgtable: 4k pages, 39-bit VAs, pgdp=00000003370d1000
[    2.796048] [00000000000000a0] pgd=0000000000000000, pud=0000000000000000
[    2.796051] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[    2.796056] CPU: 7 PID: 640 Comm: f2fs_ckpt-259:7 Tainted: G S                5.4.179-arter97-r8-64666-g2f16e087f9d8 #1
[    2.796057] Hardware name: Qualcomm Technologies, Inc. Lahaina MTP lemonadep (DT)
[    2.796059] pstate: 80c00005 (Nzcv daif +PAN +UAO)
[    2.796065] pc : down_write+0x28/0x70
[    2.796070] lr : f2fs_quota_sync+0x100/0x294
[    2.796071] sp : ffffffa3f48ffc30
[    2.796073] x29: ffffffa3f48ffc30 x28: 0000000000000000
[    2.796075] x27: ffffffa3f6d718b8 x26: ffffffa415fe9d80
[    2.796077] x25: ffffffa3f7290048 x24: 0000000000000001
[    2.796078] x23: 0000000000000000 x22: ffffffa3f7290000
[    2.796080] x21: ffffffa3f72904a0 x20: ffffffa3f7290110
[    2.796081] x19: ffffffa3f77a9800 x18: ffffffc020aae038
[    2.796083] x17: ffffffa40e38e040 x16: ffffffa40e38e6d0
[    2.796085] x15: ffffffa40e38e6cc x14: ffffffa40e38e6d0
[    2.796086] x13: 00000000000004f6 x12: 00162c44ff493000
[    2.796088] x11: 0000000000000400 x10: ffffffa40e38c948
[    2.796090] x9 : 0000000000000000 x8 : 00000000000000a0
[    2.796091] x7 : 0000000000000000 x6 : 0000d1060f00002a
[    2.796093] x5 : ffffffa3f48ff718 x4 : 000000000000000d
[    2.796094] x3 : 00000000060c0000 x2 : 0000000000000001
[    2.796096] x1 : 0000000000000000 x0 : 00000000000000a0
[    2.796098] Call trace:
[    2.796100]  down_write+0x28/0x70
[    2.796102]  f2fs_quota_sync+0x100/0x294
[    2.796104]  block_operations+0x120/0x204
[    2.796106]  f2fs_write_checkpoint+0x11c/0x520
[    2.796107]  __checkpoint_and_complete_reqs+0x7c/0xd34
[    2.796109]  issue_checkpoint_thread+0x6c/0xb8
[    2.796112]  kthread+0x138/0x414
[    2.796114]  ret_from_fork+0x10/0x18
[    2.796117] Code: aa0803e0 aa1f03e1 52800022 aa0103e9 (c8e97d02)
[    2.796120] ---[ end trace 96e942e8eb6a0b53 ]---
[    2.800116] Kernel panic - not syncing: Fatal exception
[    2.800120] SMP: stopping secondary CPUs

Fixes: 9de71ede81e6 ("f2fs: quota: fix potential deadlock")
Cc: <stable@vger.kernel.org> # v5.15+
Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 22fb4d3b11702..8e3840973077a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2689,7 +2689,7 @@ int f2fs_quota_sync(struct super_block *sb, int type)
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
-	int ret;
+	int ret = 0;
 
 	/*
 	 * Now when everything is written we can discard the pagecache so
@@ -2700,8 +2700,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
 		if (type != -1 && cnt != type)
 			continue;
 
-		if (!sb_has_quota_active(sb, type))
-			return 0;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
 
 		inode_lock(dqopt->files[cnt]);
 
-- 
GitLab


From 9ddb00a2a136cc6ebbf6ee32fcf527d0d66044a2 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 08:09:27 -0800
Subject: [PATCH 0665/1586] ACPI: fan: Fix error reporting to user space

When user get/set cur_state fails, it should be some negative error
value instead of whatever returned by acpi_evaluate_object() or from
acpi_execute_simple_method(). The return value from these apis is
some positive values greater than 0. For example if AE_NOT_FOUND
is returned it will be "5".

In other ACPI drivers, -ENODEV is returned when ACPI_FAILURE(status)
is true. Do the same thing here for thermal sysfs callbacks for
get and set for failures.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/fan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 5cd0ceb50bc8a..098d64568d6d4 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -107,7 +107,7 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
 	status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		dev_err(&device->dev, "Get fan state failed\n");
-		return status;
+		return -ENODEV;
 	}
 
 	obj = buffer.pointer;
@@ -195,7 +195,7 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
 					    fan->fps[state].control);
 	if (ACPI_FAILURE(status)) {
 		dev_dbg(&device->dev, "Failed to set state by _FSL\n");
-		return status;
+		return -ENODEV;
 	}
 
 	return 0;
-- 
GitLab


From 00ae053a0533155d830da27070a931e6fa747327 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 08:09:28 -0800
Subject: [PATCH 0666/1586] ACPI: fan: Separate file for attributes creation

Move the functionality of creation of sysfs attributes under acpi device
to a new file fan_attr.c. This cleans up the core fan code, which just
use thermal sysfs interface. The original fan.c is renamed to
fan_core.c.

No functional changes are expected.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Makefile              |  3 +
 drivers/acpi/fan.h                 | 35 +++++++++++
 drivers/acpi/fan_attr.c            | 86 ++++++++++++++++++++++++++
 drivers/acpi/{fan.c => fan_core.c} | 98 +++---------------------------
 4 files changed, 133 insertions(+), 89 deletions(-)
 create mode 100644 drivers/acpi/fan_attr.c
 rename drivers/acpi/{fan.c => fan_core.c} (80%)

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bb757148e7ba0..b5a8d3e00a52b 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -81,6 +81,9 @@ obj-$(CONFIG_ACPI_AC) 		+= ac.o
 obj-$(CONFIG_ACPI_BUTTON)	+= button.o
 obj-$(CONFIG_ACPI_TINY_POWER_BUTTON)	+= tiny-power-button.o
 obj-$(CONFIG_ACPI_FAN)		+= fan.o
+fan-objs			:= fan_core.o
+fan-objs			+= fan_attr.o
+
 obj-$(CONFIG_ACPI_VIDEO)	+= video.o
 obj-$(CONFIG_ACPI_TAD)		+= acpi_tad.o
 obj-$(CONFIG_ACPI_PCI_SLOT)	+= pci_slot.o
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index dd9bb8ca22442..36c5e1a570944 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -6,9 +6,44 @@
  *
  * Add new device IDs before the generic ACPI fan one.
  */
+
+#ifndef _ACPI_FAN_H_
+#define _ACPI_FAN_H_
+
 #define ACPI_FAN_DEVICE_IDS	\
 	{"INT3404", }, /* Fan */ \
 	{"INTC1044", }, /* Fan for Tiger Lake generation */ \
 	{"INTC1048", }, /* Fan for Alder Lake generation */ \
 	{"INTC10A2", }, /* Fan for Raptor Lake generation */ \
 	{"PNP0C0B", } /* Generic ACPI fan */
+
+#define ACPI_FPS_NAME_LEN	20
+
+struct acpi_fan_fps {
+	u64 control;
+	u64 trip_point;
+	u64 speed;
+	u64 noise_level;
+	u64 power;
+	char name[ACPI_FPS_NAME_LEN];
+	struct device_attribute dev_attr;
+};
+
+struct acpi_fan_fif {
+	u64 revision;
+	u64 fine_grain_ctrl;
+	u64 step_size;
+	u64 low_speed_notification;
+};
+
+struct acpi_fan {
+	bool acpi4;
+	struct acpi_fan_fif fif;
+	struct acpi_fan_fps *fps;
+	int fps_count;
+	struct thermal_cooling_device *cdev;
+};
+
+int acpi_fan_create_attributes(struct acpi_device *device);
+void acpi_fan_delete_attributes(struct acpi_device *device);
+#endif
diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c
new file mode 100644
index 0000000000000..7b109022108b4
--- /dev/null
+++ b/drivers/acpi/fan_attr.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  fan_attr.c - Create extra attributes for ACPI Fan driver
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2022 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include "fan.h"
+
+MODULE_LICENSE("GPL");
+
+static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
+	int count;
+
+	if (fps->control == 0xFFFFFFFF || fps->control > 100)
+		count = scnprintf(buf, PAGE_SIZE, "not-defined:");
+	else
+		count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
+
+	if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
+
+	if (fps->speed == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
+
+	if (fps->noise_level == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
+
+	if (fps->power == 0xFFFFFFFF)
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
+	else
+		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
+
+	return count;
+}
+
+int acpi_fan_create_attributes(struct acpi_device *device)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	int i, status = 0;
+
+	for (i = 0; i < fan->fps_count; ++i) {
+		struct acpi_fan_fps *fps = &fan->fps[i];
+
+		snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
+		sysfs_attr_init(&fps->dev_attr.attr);
+		fps->dev_attr.show = show_state;
+		fps->dev_attr.store = NULL;
+		fps->dev_attr.attr.name = fps->name;
+		fps->dev_attr.attr.mode = 0444;
+		status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
+		if (status) {
+			int j;
+
+			for (j = 0; j < i; ++j)
+				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
+			break;
+		}
+	}
+
+	return status;
+}
+
+void acpi_fan_delete_attributes(struct acpi_device *device)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	int i;
+
+	for (i = 0; i < fan->fps_count; ++i)
+		sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+}
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan_core.c
similarity index 80%
rename from drivers/acpi/fan.c
rename to drivers/acpi/fan_core.c
index 098d64568d6d4..9f8e68403fad6 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan_core.c
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- *  acpi_fan.c - ACPI Fan Driver ($Revision: 29 $)
+ *  fan_core.c - ACPI Fan core Driver
  *
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2022 Intel Corporation. All rights reserved.
  */
 
 #include <linux/kernel.h>
@@ -45,33 +46,6 @@ static const struct dev_pm_ops acpi_fan_pm = {
 #define FAN_PM_OPS_PTR NULL
 #endif
 
-#define ACPI_FPS_NAME_LEN	20
-
-struct acpi_fan_fps {
-	u64 control;
-	u64 trip_point;
-	u64 speed;
-	u64 noise_level;
-	u64 power;
-	char name[ACPI_FPS_NAME_LEN];
-	struct device_attribute dev_attr;
-};
-
-struct acpi_fan_fif {
-	u64 revision;
-	u64 fine_grain_ctrl;
-	u64 step_size;
-	u64 low_speed_notification;
-};
-
-struct acpi_fan {
-	bool acpi4;
-	struct acpi_fan_fif fif;
-	struct acpi_fan_fps *fps;
-	int fps_count;
-	struct thermal_cooling_device *cdev;
-};
-
 static struct platform_driver acpi_fan_driver = {
 	.probe = acpi_fan_probe,
 	.remove = acpi_fan_remove,
@@ -270,39 +244,6 @@ static int acpi_fan_speed_cmp(const void *a, const void *b)
 	return fps1->speed - fps2->speed;
 }
 
-static ssize_t show_state(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct acpi_fan_fps *fps = container_of(attr, struct acpi_fan_fps, dev_attr);
-	int count;
-
-	if (fps->control == 0xFFFFFFFF || fps->control > 100)
-		count = scnprintf(buf, PAGE_SIZE, "not-defined:");
-	else
-		count = scnprintf(buf, PAGE_SIZE, "%lld:", fps->control);
-
-	if (fps->trip_point == 0xFFFFFFFF || fps->trip_point > 9)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->trip_point);
-
-	if (fps->speed == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->speed);
-
-	if (fps->noise_level == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined:");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld:", fps->noise_level * 100);
-
-	if (fps->power == 0xFFFFFFFF)
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "not-defined\n");
-	else
-		count += scnprintf(&buf[count], PAGE_SIZE - count, "%lld\n", fps->power);
-
-	return count;
-}
-
 static int acpi_fan_get_fps(struct acpi_device *device)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
@@ -347,25 +288,6 @@ static int acpi_fan_get_fps(struct acpi_device *device)
 	sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
 	     acpi_fan_speed_cmp, NULL);
 
-	for (i = 0; i < fan->fps_count; ++i) {
-		struct acpi_fan_fps *fps = &fan->fps[i];
-
-		snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
-		sysfs_attr_init(&fps->dev_attr.attr);
-		fps->dev_attr.show = show_state;
-		fps->dev_attr.store = NULL;
-		fps->dev_attr.attr.name = fps->name;
-		fps->dev_attr.attr.mode = 0444;
-		status = sysfs_create_file(&device->dev.kobj, &fps->dev_attr.attr);
-		if (status) {
-			int j;
-
-			for (j = 0; j < i; ++j)
-				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
-			break;
-		}
-	}
-
 err:
 	kfree(obj);
 	return status;
@@ -396,6 +318,10 @@ static int acpi_fan_probe(struct platform_device *pdev)
 		if (result)
 			return result;
 
+		result = acpi_fan_create_attributes(device);
+		if (result)
+			return result;
+
 		fan->acpi4 = true;
 	} else {
 		result = acpi_device_update_power(device, NULL);
@@ -437,12 +363,8 @@ static int acpi_fan_probe(struct platform_device *pdev)
 	return 0;
 
 err_end:
-	if (fan->acpi4) {
-		int i;
-
-		for (i = 0; i < fan->fps_count; ++i)
-			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
-	}
+	if (fan->acpi4)
+		acpi_fan_delete_attributes(device);
 
 	return result;
 }
@@ -453,10 +375,8 @@ static int acpi_fan_remove(struct platform_device *pdev)
 
 	if (fan->acpi4) {
 		struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
-		int i;
 
-		for (i = 0; i < fan->fps_count; ++i)
-			sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+		acpi_fan_delete_attributes(device);
 	}
 	sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
 	sysfs_remove_link(&fan->cdev->device.kobj, "device");
-- 
GitLab


From d445571fa369cf08148dcd9bce563d5fae14fcd7 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 08:09:29 -0800
Subject: [PATCH 0667/1586] ACPI: fan: Optimize struct acpi_fan_fif

We don't need u64 to store the information about _FIF. There are two
booleans (fine_grain_ctrl and low_speed_notification) and one field
step_size which can take value from 1-9. There are no internal users
of revision field. So convert all fields to u8, by not directly
extracting the _FIF info the struct. Use an intermediate buffer to
extract and assign.

This will help to do u32 math using these fields. No functional
changes are expected.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/fan.h      | 8 ++++----
 drivers/acpi/fan_core.c | 8 +++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index 36c5e1a570944..6cbb4b028da03 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -30,10 +30,10 @@ struct acpi_fan_fps {
 };
 
 struct acpi_fan_fif {
-	u64 revision;
-	u64 fine_grain_ctrl;
-	u64 step_size;
-	u64 low_speed_notification;
+	u8 revision;
+	u8 fine_grain_ctrl;
+	u8 step_size;
+	u8 low_speed_notification;
 };
 
 struct acpi_fan {
diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c
index 9f8e68403fad6..484cee0fb13e2 100644
--- a/drivers/acpi/fan_core.c
+++ b/drivers/acpi/fan_core.c
@@ -211,7 +211,8 @@ static int acpi_fan_get_fif(struct acpi_device *device)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_fan *fan = acpi_driver_data(device);
 	struct acpi_buffer format = { sizeof("NNNN"), "NNNN" };
-	struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif };
+	u64 fields[4];
+	struct acpi_buffer fif = { sizeof(fields), fields };
 	union acpi_object *obj;
 	acpi_status status;
 
@@ -232,6 +233,11 @@ static int acpi_fan_get_fif(struct acpi_device *device)
 		status = -EINVAL;
 	}
 
+	fan->fif.revision = fields[0];
+	fan->fif.fine_grain_ctrl = fields[1];
+	fan->fif.step_size = fields[2];
+	fan->fif.low_speed_notification = fields[3];
+
 err:
 	kfree(obj);
 	return status;
-- 
GitLab


From bea2d9868ef553e376480de3cd84a7a06fb03e41 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 15:27:42 -0800
Subject: [PATCH 0668/1586] ACPI: fan: Properly handle fine grain control

When _FIF object specifies support for fine grain control, then fan speed
can be set from 0 to 100% with the recommended minimum "step size" via
_FSL object. Here the control value doesn't need to match any value from
_FPS object.

Currently we have a simple solution implemented which just pick maximum
control value from _FPS to display the actual state, but this is not
optimal when there is a big window between two control values in
_FPS. Also there is no way to set to any speed which doesn't match
control values in _FPS. The system firmware can start the fan at speed
which doesn't match any control value.

To support fine grain control (when supported) via thermal sysfs:
- cooling device max state is not _FPS state count but it will be
100 / _FIF.step_size
Step size can be from 1 to 9.
- cooling device current state is _FST.control / _FIF.step_size
- cooling device set state will set the control value
cdev.curr_state * _FIF.step_size plus any adjustment for 100%.
By the spec, when control value do not sum to 100% because of
_FIF.step_size, OSPM may select an appropriate ending Level increment
to reach 100%.

There is no rounding during calculation. For example if step size
is 6:
thermal sysfs cooling device max_state = 100/6 = 16
So user can set any value from 0-16.

If the system boots with a _FST.control which is not multiples
of step_size, the thermal sysfs cur_state will be based on the
range. For example for step size = 6:
_FST.control	thermal sysfs cur_state
------------------------------------------------
0-5		0
6-11		1
..
..
90-95		15
96-100		16

While setting the _FST.control, the compensation will be at
the last step for cur_state = 16, which will set the _FST.control
to 100.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/fan.h      |  6 +++
 drivers/acpi/fan_core.c | 94 +++++++++++++++++++++++++++++------------
 2 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index 6cbb4b028da03..4c01be2e3b779 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -36,6 +36,12 @@ struct acpi_fan_fif {
 	u8 low_speed_notification;
 };
 
+struct acpi_fan_fst {
+	u64 revision;
+	u64 control;
+	u64 speed;
+};
+
 struct acpi_fan {
 	bool acpi4;
 	struct acpi_fan_fif fif;
diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c
index 484cee0fb13e2..01616ec2e9ac0 100644
--- a/drivers/acpi/fan_core.c
+++ b/drivers/acpi/fan_core.c
@@ -63,20 +63,24 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
 	struct acpi_device *device = cdev->devdata;
 	struct acpi_fan *fan = acpi_driver_data(device);
 
-	if (fan->acpi4)
-		*state = fan->fps_count - 1;
-	else
+	if (fan->acpi4) {
+		if (fan->fif.fine_grain_ctrl)
+			*state = 100 / fan->fif.step_size;
+		else
+			*state = fan->fps_count - 1;
+	} else {
 		*state = 1;
+	}
+
 	return 0;
 }
 
-static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+static int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_fan *fan = acpi_driver_data(device);
 	union acpi_object *obj;
 	acpi_status status;
-	int control, i;
+	int ret = 0;
 
 	status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
@@ -89,35 +93,52 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
 	    obj->package.count != 3 ||
 	    obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
 		dev_err(&device->dev, "Invalid _FST data\n");
-		status = -EINVAL;
+		ret = -EINVAL;
 		goto err;
 	}
 
-	control = obj->package.elements[1].integer.value;
+	fst->revision = obj->package.elements[0].integer.value;
+	fst->control = obj->package.elements[1].integer.value;
+	fst->speed = obj->package.elements[2].integer.value;
+
+err:
+	kfree(obj);
+	return ret;
+}
+
+static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
+{
+	struct acpi_fan *fan = acpi_driver_data(device);
+	struct acpi_fan_fst fst;
+	int status, i;
+
+	status = acpi_fan_get_fst(device, &fst);
+	if (status)
+		return status;
+
+	if (fan->fif.fine_grain_ctrl) {
+		/* This control should be same what we set using _FSL by spec */
+		if (fst.control > 100) {
+			dev_dbg(&device->dev, "Invalid control value returned\n");
+			goto match_fps;
+		}
+
+		*state = (int) fst.control / fan->fif.step_size;
+		return 0;
+	}
+
+match_fps:
 	for (i = 0; i < fan->fps_count; i++) {
-		/*
-		 * When Fine Grain Control is set, return the state
-		 * corresponding to maximum fan->fps[i].control
-		 * value compared to the current speed. Here the
-		 * fan->fps[] is sorted array with increasing speed.
-		 */
-		if (fan->fif.fine_grain_ctrl && control < fan->fps[i].control) {
-			i = (i > 0) ? i - 1 : 0;
-			break;
-		} else if (control == fan->fps[i].control) {
+		if (fst.control == fan->fps[i].control)
 			break;
-		}
 	}
 	if (i == fan->fps_count) {
 		dev_dbg(&device->dev, "Invalid control value returned\n");
-		status = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	*state = i;
 
-err:
-	kfree(obj);
 	return status;
 }
 
@@ -161,12 +182,27 @@ static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
 	acpi_status status;
+	u64 value = state;
+	int max_state;
 
-	if (state >= fan->fps_count)
+	if (fan->fif.fine_grain_ctrl)
+		max_state = 100 / fan->fif.step_size;
+	else
+		max_state = fan->fps_count - 1;
+
+	if (state > max_state)
 		return -EINVAL;
 
-	status = acpi_execute_simple_method(device->handle, "_FSL",
-					    fan->fps[state].control);
+	if (fan->fif.fine_grain_ctrl) {
+		value *= fan->fif.step_size;
+		/* Spec allows compensate the last step only */
+		if (value + fan->fif.step_size > 100)
+			value = 100;
+	} else {
+		value = fan->fps[state].control;
+	}
+
+	status = acpi_execute_simple_method(device->handle, "_FSL", value);
 	if (ACPI_FAILURE(status)) {
 		dev_dbg(&device->dev, "Failed to set state by _FSL\n");
 		return -ENODEV;
@@ -238,6 +274,12 @@ static int acpi_fan_get_fif(struct acpi_device *device)
 	fan->fif.step_size = fields[2];
 	fan->fif.low_speed_notification = fields[3];
 
+	/* If there is a bug in step size and set as 0, change to 1 */
+	if (!fan->fif.step_size)
+		fan->fif.step_size = 1;
+	/* If step size > 9, change to 9 (by spec valid values 1-9) */
+	else if (fan->fif.step_size > 9)
+		fan->fif.step_size = 9;
 err:
 	kfree(obj);
 	return status;
-- 
GitLab


From f1197343f07749035d74c08cf8b546c4f95614ab Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 08:09:31 -0800
Subject: [PATCH 0669/1586] ACPI: fan: Add additional attributes for fine grain
 control

Add additional attributes, which helps in implementing algorithm in
the user space to optimize fan control. These attributes are presented
in the same directory as the existing performance state attributes.

Additional attributes:

1. Support of fine grain control
Publish support of presence of fine grain control so that fan speed
can be tuned correctly. This attribute is called "fine_grain_control".

2. fan speed
Publish the actual fan rpm in sysfs. Knowing fan rpm is helpful to
reduce noise level and use passive control instead. Also fan performance
may not be same over time, so the same control value may not be enough
to run the fan at a speed. So a feedback value of speed is helpful. This
sysfs attribute is called "fan_speed_rpm".

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/fan.h      |  3 +++
 drivers/acpi/fan_attr.c | 55 +++++++++++++++++++++++++++++++++++++++--
 drivers/acpi/fan_core.c |  2 +-
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
index 4c01be2e3b779..44728529a5b6b 100644
--- a/drivers/acpi/fan.h
+++ b/drivers/acpi/fan.h
@@ -48,8 +48,11 @@ struct acpi_fan {
 	struct acpi_fan_fps *fps;
 	int fps_count;
 	struct thermal_cooling_device *cdev;
+	struct device_attribute fst_speed;
+	struct device_attribute fine_grain_control;
 };
 
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst);
 int acpi_fan_create_attributes(struct acpi_device *device);
 void acpi_fan_delete_attributes(struct acpi_device *device);
 #endif
diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c
index 7b109022108b4..f15157d40713e 100644
--- a/drivers/acpi/fan_attr.c
+++ b/drivers/acpi/fan_attr.c
@@ -49,10 +49,50 @@ static ssize_t show_state(struct device *dev, struct device_attribute *attr, cha
 	return count;
 }
 
+static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+	struct acpi_fan_fst fst;
+	int status;
+
+	status = acpi_fan_get_fst(acpi_dev, &fst);
+	if (status)
+		return status;
+
+	return sprintf(buf, "%lld\n", fst.speed);
+}
+
+static ssize_t show_fine_grain_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct acpi_device *acpi_dev = container_of(dev, struct acpi_device, dev);
+	struct acpi_fan *fan = acpi_driver_data(acpi_dev);
+
+	return sprintf(buf, "%d\n", fan->fif.fine_grain_ctrl);
+}
+
 int acpi_fan_create_attributes(struct acpi_device *device)
 {
 	struct acpi_fan *fan = acpi_driver_data(device);
-	int i, status = 0;
+	int i, status;
+
+	sysfs_attr_init(&fan->fine_grain_control.attr);
+	fan->fine_grain_control.show = show_fine_grain_control;
+	fan->fine_grain_control.store = NULL;
+	fan->fine_grain_control.attr.name = "fine_grain_control";
+	fan->fine_grain_control.attr.mode = 0444;
+	status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+	if (status)
+		return status;
+
+	/* _FST is present if we are here */
+	sysfs_attr_init(&fan->fst_speed.attr);
+	fan->fst_speed.show = show_fan_speed;
+	fan->fst_speed.store = NULL;
+	fan->fst_speed.attr.name = "fan_speed_rpm";
+	fan->fst_speed.attr.mode = 0444;
+	status = sysfs_create_file(&device->dev.kobj, &fan->fst_speed.attr);
+	if (status)
+		goto rem_fine_grain_attr;
 
 	for (i = 0; i < fan->fps_count; ++i) {
 		struct acpi_fan_fps *fps = &fan->fps[i];
@@ -69,10 +109,18 @@ int acpi_fan_create_attributes(struct acpi_device *device)
 
 			for (j = 0; j < i; ++j)
 				sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr);
-			break;
+			goto rem_fst_attr;
 		}
 	}
 
+	return 0;
+
+rem_fst_attr:
+	sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+
+rem_fine_grain_attr:
+	sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
+
 	return status;
 }
 
@@ -83,4 +131,7 @@ void acpi_fan_delete_attributes(struct acpi_device *device)
 
 	for (i = 0; i < fan->fps_count; ++i)
 		sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr);
+
+	sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr);
+	sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr);
 }
diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c
index 01616ec2e9ac0..b9a9a59ddcc10 100644
--- a/drivers/acpi/fan_core.c
+++ b/drivers/acpi/fan_core.c
@@ -75,7 +75,7 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
 	return 0;
 }
 
-static int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst)
+int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
-- 
GitLab


From 0750b8fcf313845b21c71344b4bea8ad7d3cee84 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Feb 2022 08:09:32 -0800
Subject: [PATCH 0670/1586] Documentation/admin-guide/acpi: Add documentation
 for fine grain control

Add documentation for the newly added attributes:
fine_grain_control
fan_speed_rpm

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/fan_performance_states.rst           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/admin-guide/acpi/fan_performance_states.rst b/Documentation/admin-guide/acpi/fan_performance_states.rst
index 98fe5c3331214..b9e4b4d146c1e 100644
--- a/Documentation/admin-guide/acpi/fan_performance_states.rst
+++ b/Documentation/admin-guide/acpi/fan_performance_states.rst
@@ -60,3 +60,31 @@ For example::
 
 When a given field is not populated or its value provided by the platform
 firmware is invalid, the "not-defined" string is shown instead of the value.
+
+ACPI Fan Fine Grain Control
+=============================
+
+When _FIF object specifies support for fine grain control, then fan speed
+can be set from 0 to 100% with the recommended minimum "step size" via
+_FSL object. User can adjust fan speed using thermal sysfs cooling device.
+
+Here use can look at fan performance states for a reference speed (speed_rpm)
+and set it by changing cooling device cur_state. If the fine grain control
+is supported then user can also adjust to some other speeds which are
+not defined in the performance states.
+
+The support of fine grain control is presented via sysfs attribute
+"fine_grain_control". If fine grain control is present, this attribute
+will show "1" otherwise "0".
+
+This sysfs attribute is presented in the same directory as performance states.
+
+ACPI Fan Performance Feedback
+=============================
+
+The optional _FST object provides status information for the fan device.
+This includes field to provide current fan speed in revolutions per minute
+at which the fan is rotating.
+
+This speed is presented in the sysfs using the attribute "fan_speed_rpm",
+in the same directory as performance states.
-- 
GitLab


From 7dacee0b9efc8bd061f097b1a8d4daa6591af0c6 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 13 Feb 2022 16:49:20 +0100
Subject: [PATCH 0671/1586] ACPI: battery: Add device HID and quirk for
 Microsoft Surface Go 3

For some reason, the Microsoft Surface Go 3 uses the standard ACPI
interface for battery information, but does not use the standard PNP0C0A
HID. Instead it uses MSHW0146 as identifier. Add that ID to the driver
as this seems to work well.

Additionally, the power state is not updated immediately after the AC
has been (un-)plugged, so add the respective quirk for that.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ea31ae01458b4..dc208f5f5a1f7 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
 static const struct acpi_device_id battery_device_ids[] = {
 	{"PNP0C0A", 0},
+
+	/* Microsoft Surface Go 3 */
+	{"MSHW0146", 0},
+
 	{"", 0},
 };
 
@@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"),
 		},
 	},
+	{
+		/* Microsoft Surface Go 3 */
+		.callback = battery_notification_delay_quirk,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"),
+		},
+	},
 	{},
 };
 
-- 
GitLab


From 65881e1db4e948614d9eb195b8e1197339822949 Mon Sep 17 00:00:00 2001
From: Richard Haines <richard_c_haines@btinternet.com>
Date: Fri, 25 Feb 2022 17:54:38 +0000
Subject: [PATCH 0672/1586] selinux: allow FIOCLEX and FIONCLEX with policy
 capability

These ioctls are equivalent to fcntl(fd, F_SETFD, flags), which SELinux
always allows too.  Furthermore, a failed FIOCLEX could result in a file
descriptor being leaked to a process that should not have access to it.

As this patch removes access controls, a policy capability needs to be
enabled in policy to always allow these ioctls.

Based-on-patch-by: Demi Marie Obenour <demiobenour@gmail.com>
Signed-off-by: Richard Haines <richard_c_haines@btinternet.com>
[PM: subject line tweak]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c                   | 6 ++++++
 security/selinux/include/policycap.h       | 1 +
 security/selinux/include/policycap_names.h | 3 ++-
 security/selinux/include/security.h        | 7 +++++++
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1e69f88eb326a..b12e14b2797b9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3682,6 +3682,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 					    CAP_OPT_NONE, true);
 		break;
 
+	case FIOCLEX:
+	case FIONCLEX:
+		if (!selinux_policycap_ioctl_skip_cloexec())
+			error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd);
+		break;
+
 	/* default case assumes that the command will go
 	 * to the file's ioctl() function.
 	 */
diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h
index 2ec038efbb03c..a9e572ca4fd96 100644
--- a/security/selinux/include/policycap.h
+++ b/security/selinux/include/policycap.h
@@ -11,6 +11,7 @@ enum {
 	POLICYDB_CAPABILITY_CGROUPSECLABEL,
 	POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION,
 	POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS,
+	POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC,
 	__POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h
index b89289f092c93..ebd64afe1defd 100644
--- a/security/selinux/include/policycap_names.h
+++ b/security/selinux/include/policycap_names.h
@@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = {
 	"always_check_network",
 	"cgroup_seclabel",
 	"nnp_nosuid_transition",
-	"genfs_seclabel_symlinks"
+	"genfs_seclabel_symlinks",
+	"ioctl_skip_cloexec"
 };
 
 #endif /* _SELINUX_POLICYCAP_NAMES_H_ */
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index d91a5672de991..84f42fa8012f1 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
 	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
 }
 
+static inline bool selinux_policycap_ioctl_skip_cloexec(void)
+{
+	struct selinux_state *state = &selinux_state;
+
+	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]);
+}
+
 struct selinux_policy_convert_data;
 
 struct selinux_load_state {
-- 
GitLab


From 2792d84e6da5e0fd7d3b22fd70bc69b7ee263609 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Feb 2022 12:05:28 -0800
Subject: [PATCH 0673/1586] usercopy: Check valid lifetime via stack depth

One of the things that CONFIG_HARDENED_USERCOPY sanity-checks is whether
an object that is about to be copied to/from userspace is overlapping
the stack at all. If it is, it performs a number of inexpensive
bounds checks. One of the finer-grained checks is whether an object
crosses stack frames within the stack region. Doing this on x86 with
CONFIG_FRAME_POINTER was cheap/easy. Doing it with ORC was deemed too
heavy, and was left out (a while ago), leaving the courser whole-stack
check.

The LKDTM tests USERCOPY_STACK_FRAME_TO and USERCOPY_STACK_FRAME_FROM
try to exercise these cross-frame cases to validate the defense is
working. They have been failing ever since ORC was added (which was
expected). While Muhammad was investigating various LKDTM failures[1],
he asked me for additional details on them, and I realized that when
exact stack frame boundary checking is not available (i.e. everything
except x86 with FRAME_POINTER), it could check if a stack object is at
least "current depth valid", in the sense that any object within the
stack region but not between start-of-stack and current_stack_pointer
should be considered unavailable (i.e. its lifetime is from a call no
longer present on the stack).

Introduce ARCH_HAS_CURRENT_STACK_POINTER to track which architectures
have actually implemented the common global register alias.

Additionally report usercopy bounds checking failures with an offset
from current_stack_pointer, which may assist with diagnosing failures.

The LKDTM USERCOPY_STACK_FRAME_TO and USERCOPY_STACK_FRAME_FROM tests
(once slightly adjusted in a separate patch) pass again with this fixed.

[1] https://github.com/kernelci/kernelci-project/issues/84

Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
Reported-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
v1: https://lore.kernel.org/lkml/20220216201449.2087956-1-keescook@chromium.org
v2: https://lore.kernel.org/lkml/20220224060342.1855457-1-keescook@chromium.org
v3: https://lore.kernel.org/lkml/20220225173345.3358109-1-keescook@chromium.org
v4: - improve commit log (akpm)
---
 arch/arm/Kconfig     |  1 +
 arch/arm64/Kconfig   |  1 +
 arch/powerpc/Kconfig |  1 +
 arch/s390/Kconfig    |  1 +
 arch/sh/Kconfig      |  1 +
 arch/x86/Kconfig     |  1 +
 mm/Kconfig           |  9 +++++++++
 mm/usercopy.c        | 23 +++++++++++++++++++++--
 8 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c97cb40eebb6..a7a09eef18521 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,6 +5,7 @@ config ARM
 	select ARCH_32BIT_OFF_T
 	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
 	select ARCH_HAS_BINFMT_FLAT
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
 	select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f2b5a4abef215..b8ab790555c81 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -18,6 +18,7 @@ config ARM64
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b779603978e10..7e7387bd7d538 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -108,6 +108,7 @@ config PPC
 	select ARCH_ENABLE_MEMORY_HOTPLUG
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_HAS_COPY_MC			if PPC64
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index be9f39fd06df6..4845ab549dd1d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -60,6 +60,7 @@ config S390
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 2474a04ceac43..1c2b53bf30930 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,6 +7,7 @@ config SUPERH
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
 	select ARCH_HAS_BINFMT_FLAT if !MMU
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660c..90494fba3620f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,6 +69,7 @@ config X86
 	select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
 	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE	if !X86_PAE
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/mm/Kconfig b/mm/Kconfig
index 3326ee3903f33..c349599601f82 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -744,6 +744,15 @@ config IDLE_PAGE_TRACKING
 config ARCH_HAS_CACHE_LINE_SIZE
 	bool
 
+config ARCH_HAS_CURRENT_STACK_POINTER
+	bool
+	help
+	  In support of HARDENED_USERCOPY performing stack variable lifetime
+	  checking, an architecture-agnostic way to find the stack pointer
+	  is needed. Once an architecture defines an unsigned long global
+	  register alias named "current_stack_pointer", this config can be
+	  selected.
+
 config ARCH_HAS_PTE_DEVMAP
 	bool
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index d0d268135d96d..5d34c40c16c22 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -29,7 +29,7 @@
  * Returns:
  *	NOT_STACK: not at all on the stack
  *	GOOD_FRAME: fully within a valid stack frame
- *	GOOD_STACK: fully on the stack (when can't do frame-checking)
+ *	GOOD_STACK: within the current stack (when can't frame-check exactly)
  *	BAD_STACK: error condition (invalid stack position or bad stack frame)
  */
 static noinline int check_stack_object(const void *obj, unsigned long len)
@@ -55,6 +55,17 @@ static noinline int check_stack_object(const void *obj, unsigned long len)
 	if (ret)
 		return ret;
 
+	/* Finally, check stack depth if possible. */
+#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
+	if (IS_ENABLED(CONFIG_STACK_GROWSUP)) {
+		if ((void *)current_stack_pointer < obj + len)
+			return BAD_STACK;
+	} else {
+		if (obj < (void *)current_stack_pointer)
+			return BAD_STACK;
+	}
+#endif
+
 	return GOOD_STACK;
 }
 
@@ -280,7 +291,15 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 		 */
 		return;
 	default:
-		usercopy_abort("process stack", NULL, to_user, 0, n);
+		usercopy_abort("process stack", NULL, to_user,
+#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
+			IS_ENABLED(CONFIG_STACK_GROWSUP) ?
+				ptr - (void *)current_stack_pointer :
+				(void *)current_stack_pointer - ptr,
+#else
+			0,
+#endif
+			n);
 	}
 
 	/* Check for bad heap object. */
-- 
GitLab


From 92652cf986441b18282db6e5bd82afc74e8ed5e9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 19 Feb 2022 17:26:00 -0800
Subject: [PATCH 0674/1586] xtensa: Implement "current_stack_pointer"

To follow the existing per-arch conventions replace open-coded uses
of asm "sp" as "current_stack_pointer". This will let it be used in
non-arch places (like HARDENED_USERCOPY).

Cc: Chris Zankel <chris@zankel.net>
Cc: Marc Zyngier <maz@kernel.org>
Cc: linux-xtensa@linux-xtensa.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Link: https://lore.kernel.org/lkml/CAMo8BfJFJE-n3=AF+pb9_6oF3gzxX7a+7aBrASHjjNX5byqDqw@mail.gmail.com
---
 arch/xtensa/Kconfig                  | 1 +
 arch/xtensa/include/asm/current.h    | 2 ++
 arch/xtensa/include/asm/stacktrace.h | 8 ++++----
 arch/xtensa/kernel/irq.c             | 3 +--
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 8ac599aa6d994..8874323276131 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -3,6 +3,7 @@ config XTENSA
 	def_bool y
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_BINFMT_FLAT if !MMU
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DMA_PREP_COHERENT if MMU
 	select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 5d98a7ad4251b..08010dbf5e09a 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -26,6 +26,8 @@ static inline struct task_struct *get_current(void)
 
 #define current get_current()
 
+register unsigned long current_stack_pointer __asm__("a1");
+
 #else
 
 #define GET_CURRENT(reg,sp)		\
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index fe06e8ed162b7..a85e785a62888 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -19,14 +19,14 @@ struct stackframe {
 
 static __always_inline unsigned long *stack_pointer(struct task_struct *task)
 {
-	unsigned long *sp;
+	unsigned long sp;
 
 	if (!task || task == current)
-		__asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp));
+		sp = current_stack_pointer;
 	else
-		sp = (unsigned long *)task->thread.sp;
+		sp = task->thread.sp;
 
-	return sp;
+	return (unsigned long *)sp;
 }
 
 void walk_stackframe(unsigned long *sp,
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 15051a8a15399..529fe92458219 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -36,9 +36,8 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{
-		unsigned long sp;
+		unsigned long sp = current_stack_pointer;
 
-		__asm__ __volatile__ ("mov %0, a1\n" : "=a" (sp));
 		sp &= THREAD_SIZE - 1;
 
 		if (unlikely(sp < (sizeof(thread_info) + 1024)))
-- 
GitLab


From e3952fcce1aad934f1322843b564ff86256444b2 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 1 Feb 2022 14:13:45 +0100
Subject: [PATCH 0675/1586] ext4: fix remount with 'abort' option

After commit 6e47a3cc68fc ("ext4: get rid of super block and sbi from
handle_mount_ops()") the 'abort' options stopped working. This is
because we're using ctx_set_mount_flags() helper that's expecting an
argument with the appropriate bit set, but instead got
EXT4_MF_FS_ABORTED which is a bit position. ext4_set_mount_flag() is
using set_bit() while ctx_set_mount_flags() was using bitwise OR.

Create a separate helper ctx_set_mount_flag() to handle setting the
mount_flags correctly.

While we're at it clean up the EXT4_SET_CTX macros so that we're only
creating helpers that we actually use to avoid warnings.

Fixes: 6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()")
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Ye Bin <yebin10@huawei.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Tested-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Link: https://lore.kernel.org/r/20220201131345.77591-1-lczerner@redhat.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c5021ca0a28ad..1fe91a26b88c7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2045,8 +2045,8 @@ struct ext4_fs_context {
 	unsigned int	mask_s_mount_opt;
 	unsigned int	vals_s_mount_opt2;
 	unsigned int	mask_s_mount_opt2;
-	unsigned int	vals_s_mount_flags;
-	unsigned int	mask_s_mount_flags;
+	unsigned long	vals_s_mount_flags;
+	unsigned long	mask_s_mount_flags;
 	unsigned int	opt_flags;	/* MOPT flags */
 	unsigned int	spec;
 	u32		s_max_batch_time;
@@ -2149,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx,		\
 {									\
 	ctx->mask_s_##name |= flag;					\
 	ctx->vals_s_##name |= flag;					\
-}									\
+}
+
+#define EXT4_CLEAR_CTX(name)						\
 static inline void ctx_clear_##name(struct ext4_fs_context *ctx,	\
 				    unsigned long flag)			\
 {									\
 	ctx->mask_s_##name |= flag;					\
 	ctx->vals_s_##name &= ~flag;					\
-}									\
+}
+
+#define EXT4_TEST_CTX(name)						\
 static inline unsigned long						\
 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)	\
 {									\
 	return (ctx->vals_s_##name & flag);				\
-}									\
+}
 
-EXT4_SET_CTX(flags);
+EXT4_SET_CTX(flags); /* set only */
 EXT4_SET_CTX(mount_opt);
+EXT4_CLEAR_CTX(mount_opt);
+EXT4_TEST_CTX(mount_opt);
 EXT4_SET_CTX(mount_opt2);
-EXT4_SET_CTX(mount_flags);
+EXT4_CLEAR_CTX(mount_opt2);
+EXT4_TEST_CTX(mount_opt2);
+
+static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
+{
+	set_bit(bit, &ctx->mask_s_mount_flags);
+	set_bit(bit, &ctx->vals_s_mount_flags);
+}
 
 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
@@ -2235,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 			 param->key);
 		return 0;
 	case Opt_abort:
-		ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED);
+		ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
 		return 0;
 	case Opt_i_version:
 		ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20");
-- 
GitLab


From cc16eecae687912238ee6efbff71ad31e2bc414e Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Thu, 10 Feb 2022 21:07:11 +0530
Subject: [PATCH 0676/1586] jbd2: fix use-after-free of transaction_t race

jbd2_journal_wait_updates() is called with j_state_lock held. But if
there is a commit in progress, then this transaction might get committed
and freed via jbd2_journal_commit_transaction() ->
jbd2_journal_free_transaction(), when we release j_state_lock.
So check for journal->j_running_transaction everytime we release and
acquire j_state_lock to avoid use-after-free issue.

Link: https://lore.kernel.org/r/948c2fed518ae739db6a8f7f83f1d58b504f87d0.1644497105.git.ritesh.list@gmail.com
Fixes: 4f98186848707f53 ("jbd2: refactor wait logic for transaction updates into a common function")
Cc: stable@kernel.org
Reported-and-tested-by: syzbot+afa2ca5171d93e44b348@syzkaller.appspotmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8e2f8275a2535..259e00046a8bd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -842,27 +842,38 @@ EXPORT_SYMBOL(jbd2_journal_restart);
  */
 void jbd2_journal_wait_updates(journal_t *journal)
 {
-	transaction_t *commit_transaction = journal->j_running_transaction;
+	DEFINE_WAIT(wait);
 
-	if (!commit_transaction)
-		return;
+	while (1) {
+		/*
+		 * Note that the running transaction can get freed under us if
+		 * this transaction is getting committed in
+		 * jbd2_journal_commit_transaction() ->
+		 * jbd2_journal_free_transaction(). This can only happen when we
+		 * release j_state_lock -> schedule() -> acquire j_state_lock.
+		 * Hence we should everytime retrieve new j_running_transaction
+		 * value (after j_state_lock release acquire cycle), else it may
+		 * lead to use-after-free of old freed transaction.
+		 */
+		transaction_t *transaction = journal->j_running_transaction;
 
-	spin_lock(&commit_transaction->t_handle_lock);
-	while (atomic_read(&commit_transaction->t_updates)) {
-		DEFINE_WAIT(wait);
+		if (!transaction)
+			break;
 
+		spin_lock(&transaction->t_handle_lock);
 		prepare_to_wait(&journal->j_wait_updates, &wait,
-					TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&commit_transaction->t_updates)) {
-			spin_unlock(&commit_transaction->t_handle_lock);
-			write_unlock(&journal->j_state_lock);
-			schedule();
-			write_lock(&journal->j_state_lock);
-			spin_lock(&commit_transaction->t_handle_lock);
+				TASK_UNINTERRUPTIBLE);
+		if (!atomic_read(&transaction->t_updates)) {
+			spin_unlock(&transaction->t_handle_lock);
+			finish_wait(&journal->j_wait_updates, &wait);
+			break;
 		}
+		spin_unlock(&transaction->t_handle_lock);
+		write_unlock(&journal->j_state_lock);
+		schedule();
 		finish_wait(&journal->j_wait_updates, &wait);
+		write_lock(&journal->j_state_lock);
 	}
-	spin_unlock(&commit_transaction->t_handle_lock);
 }
 
 /**
@@ -877,8 +888,6 @@ void jbd2_journal_wait_updates(journal_t *journal)
  */
 void jbd2_journal_lock_updates(journal_t *journal)
 {
-	DEFINE_WAIT(wait);
-
 	jbd2_might_wait_for_commit(journal);
 
 	write_lock(&journal->j_state_lock);
-- 
GitLab


From f7f497cb702462e8505ff3d8d4e7722ad95626a1 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:30:35 +0530
Subject: [PATCH 0677/1586] jbd2: kill t_handle_lock transaction spinlock

This patch kills t_handle_lock transaction spinlock completely from
jbd2.

To explain the reasoning, currently there were three sites at which
this spinlock was used.

1. jbd2_journal_wait_updates()
   a. Based on careful code review it can be seen that, we don't need this
      lock here. This is since we wait for any currently ongoing updates
      based on a atomic variable t_updates. And we anyway don't take any
      t_handle_lock while in stop_this_handle().
      i.e.

	write_lock(&journal->j_state_lock()
	jbd2_journal_wait_updates() 			stop_this_handle()
		while (atomic_read(txn->t_updates) { 		|
		DEFINE_WAIT(wait); 				|
		prepare_to_wait(); 				|
		if (atomic_read(txn->t_updates) 		if (atomic_dec_and_test(txn->t_updates))
			write_unlock(&journal->j_state_lock);
			schedule();					wake_up()
			write_lock(&journal->j_state_lock);
		finish_wait();
	   }
	txn->t_state = T_COMMIT
	write_unlock(&journal->j_state_lock);

   b.  Also note that between atomic_inc(&txn->t_updates) in
       start_this_handle() and jbd2_journal_wait_updates(), the
       synchronization happens via read_lock(journal->j_state_lock) in
       start_this_handle();

2. jbd2_journal_extend()
   a. jbd2_journal_extend() is called with the handle of each process from
      task_struct. So no lock required in updating member fields of handle_t

   b. For member fields of h_transaction, all updates happens only via
      atomic APIs (which is also within read_lock()).
      So, no need of this transaction spinlock.

3. update_t_max_wait()
   Based on Jan suggestion, this can be carefully removed using atomic
   cmpxchg API.
   Note that there can be several processes which are waiting for a new
   transaction to be allocated and started. For doing this only one
   process will succeed in taking write_lock() and allocating a new txn.
   After that all of the process will be updating the t_max_wait (max
   transaction wait time). This can be done via below method w/o taking
   any locks using atomic cmpxchg.
   For more details refer [1]

	   new = get_new_val();
	   old = READ_ONCE(ptr->max_val);
	   while (old < new)
		old = cmpxchg(&ptr->max_val, old, new);

[1]: https://lwn.net/Articles/849237/

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 28 +++++++++-------------------
 include/linux/jbd2.h  |  3 ---
 2 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 259e00046a8bd..83801a8be078e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
 	transaction->t_start_time = ktime_get();
 	transaction->t_tid = journal->j_transaction_sequence++;
 	transaction->t_expires = jiffies + journal->j_commit_interval;
-	spin_lock_init(&transaction->t_handle_lock);
 	atomic_set(&transaction->t_updates, 0);
 	atomic_set(&transaction->t_outstanding_credits,
 		   jbd2_descriptor_blocks_per_trans(journal) +
@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
 /*
  * Update transaction's maximum wait time, if debugging is enabled.
  *
- * In order for t_max_wait to be reliable, it must be protected by a
- * lock.  But doing so will mean that start_this_handle() can not be
- * run in parallel on SMP systems, which limits our scalability.  So
- * unless debugging is enabled, we no longer update t_max_wait, which
- * means that maximum wait time reported by the jbd2_run_stats
- * tracepoint will always be zero.
+ * t_max_wait is carefully updated here with use of atomic compare exchange.
+ * Note that there could be multiplre threads trying to do this simultaneously
+ * hence using cmpxchg to avoid any use of locks in this case.
  */
 static inline void update_t_max_wait(transaction_t *transaction,
 				     unsigned long ts)
 {
 #ifdef CONFIG_JBD2_DEBUG
+	unsigned long oldts, newts;
 	if (jbd2_journal_enable_debug &&
 	    time_after(transaction->t_start, ts)) {
-		ts = jbd2_time_diff(ts, transaction->t_start);
-		spin_lock(&transaction->t_handle_lock);
-		if (ts > transaction->t_max_wait)
-			transaction->t_max_wait = ts;
-		spin_unlock(&transaction->t_handle_lock);
+		newts = jbd2_time_diff(ts, transaction->t_start);
+		oldts = READ_ONCE(transaction->t_max_wait);
+		while (oldts < newts)
+			oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
 	}
 #endif
 }
@@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
 		DIV_ROUND_UP(
 			handle->h_revoke_credits_requested,
 			journal->j_revoke_records_per_block);
-	spin_lock(&transaction->t_handle_lock);
 	wanted = atomic_add_return(nblocks,
 				   &transaction->t_outstanding_credits);
 
@@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
 		jbd_debug(3, "denied handle %p %d blocks: "
 			  "transaction too large\n", handle, nblocks);
 		atomic_sub(nblocks, &transaction->t_outstanding_credits);
-		goto unlock;
+		goto error_out;
 	}
 
 	trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
@@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
 	result = 0;
 
 	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-unlock:
-	spin_unlock(&transaction->t_handle_lock);
 error_out:
 	read_unlock(&journal->j_state_lock);
 	return result;
@@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal)
 		if (!transaction)
 			break;
 
-		spin_lock(&transaction->t_handle_lock);
 		prepare_to_wait(&journal->j_wait_updates, &wait,
 				TASK_UNINTERRUPTIBLE);
 		if (!atomic_read(&transaction->t_updates)) {
-			spin_unlock(&transaction->t_handle_lock);
 			finish_wait(&journal->j_wait_updates, &wait);
 			break;
 		}
-		spin_unlock(&transaction->t_handle_lock);
 		write_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_updates, &wait);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 9c3ada74ffb18..a787872e1e86d 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
  *    ->j_list_lock
  *
  *    j_state_lock
- *    ->t_handle_lock
- *
- *    j_state_lock
  *    ->j_list_lock			(journal_unmap_buffer)
  *
  */
-- 
GitLab


From 2d4429205882817100e5e88870ce0663d30c77af Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:30:36 +0530
Subject: [PATCH 0678/1586] jbd2: remove CONFIG_JBD2_DEBUG to update t_max_wait

CONFIG_JBD2_DEBUG and jbd2_journal_enable_debug knobs were added in
update_t_max_wait(), since earlier it used to take a spinlock for
updating t_max_wait, which could cause a bottleneck while starting a
txn (start_this_handle()).

Since in previous patch, we have killed t_handle_lock completely, we
could get rid of this debug config and knob to let t_max_wait be
updated by default again.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/ad7319a601fd501079310747ce87d908e0944763.1644992076.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 83801a8be078e..73ed02f061e18 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -141,20 +141,19 @@ static void jbd2_get_transaction(journal_t *journal,
  * t_max_wait is carefully updated here with use of atomic compare exchange.
  * Note that there could be multiplre threads trying to do this simultaneously
  * hence using cmpxchg to avoid any use of locks in this case.
+ * With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug.
  */
 static inline void update_t_max_wait(transaction_t *transaction,
 				     unsigned long ts)
 {
-#ifdef CONFIG_JBD2_DEBUG
 	unsigned long oldts, newts;
-	if (jbd2_journal_enable_debug &&
-	    time_after(transaction->t_start, ts)) {
+
+	if (time_after(transaction->t_start, ts)) {
 		newts = jbd2_time_diff(ts, transaction->t_start);
 		oldts = READ_ONCE(transaction->t_max_wait);
 		while (oldts < newts)
 			oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
 	}
-#endif
 }
 
 /*
-- 
GitLab


From a5c0e2fdf7cea535ba03259894dc184e5a4c2800 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:43 +0530
Subject: [PATCH 0679/1586] ext4: correct cluster len and clusters changed
 accounting in ext4_mb_mark_bb

ext4_mb_mark_bb() currently wrongly calculates cluster len (clen) and
flex_group->free_clusters. This patch fixes that.

Identified based on code review of ext4_mb_mark_bb() function.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/a0b035d536bafa88110b74456853774b64c8ac40.1644992609.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 67ac95c4cd9b8..b8ffbc0ebe14e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3899,10 +3899,11 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_group_t group;
 	ext4_grpblk_t blkoff;
-	int i, clen, err;
+	int i, err;
 	int already;
+	unsigned int clen, clen_changed;
 
-	clen = EXT4_B2C(sbi, len);
+	clen = EXT4_NUM_B2C(sbi, len);
 
 	ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
 	bitmap_bh = ext4_read_block_bitmap(sb, group);
@@ -3923,6 +3924,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 		if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
 			already++;
 
+	clen_changed = clen - already;
 	if (state)
 		ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
 	else
@@ -3935,9 +3937,9 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 						group, gdp));
 	}
 	if (state)
-		clen = ext4_free_group_clusters(sb, gdp) - clen + already;
+		clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
 	else
-		clen = ext4_free_group_clusters(sb, gdp) + clen - already;
+		clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
 
 	ext4_free_group_clusters_set(sb, gdp, clen);
 	ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
@@ -3947,10 +3949,13 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, group);
+		struct flex_groups *fg = sbi_array_rcu_deref(sbi,
+					   s_flex_groups, flex_group);
 
-		atomic64_sub(len,
-			     &sbi_array_rcu_deref(sbi, s_flex_groups,
-						  flex_group)->free_clusters);
+		if (state)
+			atomic64_sub(clen_changed, &fg->free_clusters);
+		else
+			atomic64_add(clen_changed, &fg->free_clusters);
 	}
 
 	err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
-- 
GitLab


From bfdc502a4a4c058bf4cbb1df0c297761d528f54d Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:44 +0530
Subject: [PATCH 0680/1586] ext4: fix ext4_mb_mark_bb() with flex_bg with
 fast_commit

In case of flex_bg feature (which is by default enabled), extents for
any given inode might span across blocks from two different block group.
ext4_mb_mark_bb() only reads the buffer_head of block bitmap once for the
starting block group, but it fails to read it again when the extent length
boundary overflows to another block group. Then in this below loop it
accesses memory beyond the block group bitmap buffer_head and results
into a data abort.

	for (i = 0; i < clen; i++)
		if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
			already++;

This patch adds this functionality for checking block group boundary in
ext4_mb_mark_bb() and update the buffer_head(bitmap_bh) for every different
block group.

w/o this patch, I was easily able to hit a data access abort using Power platform.

<...>
[   74.327662] EXT4-fs error (device loop3): ext4_mb_generate_buddy:1141: group 11, block bitmap and bg descriptor inconsistent: 21248 vs 23294 free clusters
[   74.533214] EXT4-fs (loop3): shut down requested (2)
[   74.536705] Aborting journal on device loop3-8.
[   74.702705] BUG: Unable to handle kernel data access on read at 0xc00000005e980000
[   74.703727] Faulting instruction address: 0xc0000000007bffb8
cpu 0xd: Vector: 300 (Data Access) at [c000000015db7060]
    pc: c0000000007bffb8: ext4_mb_mark_bb+0x198/0x5a0
    lr: c0000000007bfeec: ext4_mb_mark_bb+0xcc/0x5a0
    sp: c000000015db7300
   msr: 800000000280b033
   dar: c00000005e980000
 dsisr: 40000000
  current = 0xc000000027af6880
  paca    = 0xc00000003ffd5200   irqmask: 0x03   irq_happened: 0x01
    pid   = 5167, comm = mount
<...>
enter ? for help
[c000000015db7380] c000000000782708 ext4_ext_clear_bb+0x378/0x410
[c000000015db7400] c000000000813f14 ext4_fc_replay+0x1794/0x2000
[c000000015db7580] c000000000833f7c do_one_pass+0xe9c/0x12a0
[c000000015db7710] c000000000834504 jbd2_journal_recover+0x184/0x2d0
[c000000015db77c0] c000000000841398 jbd2_journal_load+0x188/0x4a0
[c000000015db7880] c000000000804de8 ext4_fill_super+0x2638/0x3e10
[c000000015db7a40] c0000000005f8404 get_tree_bdev+0x2b4/0x350
[c000000015db7ae0] c0000000007ef058 ext4_get_tree+0x28/0x40
[c000000015db7b00] c0000000005f6344 vfs_get_tree+0x44/0x100
[c000000015db7b70] c00000000063c408 path_mount+0xdd8/0xe70
[c000000015db7c40] c00000000063c8f0 sys_mount+0x450/0x550
[c000000015db7d50] c000000000035770 system_call_exception+0x4a0/0x4e0
[c000000015db7e10] c00000000000c74c system_call_common+0xec/0x250

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/2609bc8f66fc15870616ee416a18a3d392a209c4.1644992609.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 131 +++++++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 55 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b8ffbc0ebe14e..816322eddd2bd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3901,72 +3901,93 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 	ext4_grpblk_t blkoff;
 	int i, err;
 	int already;
-	unsigned int clen, clen_changed;
+	unsigned int clen, clen_changed, thisgrp_len;
 
-	clen = EXT4_NUM_B2C(sbi, len);
-
-	ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
-	bitmap_bh = ext4_read_block_bitmap(sb, group);
-	if (IS_ERR(bitmap_bh)) {
-		err = PTR_ERR(bitmap_bh);
-		bitmap_bh = NULL;
-		goto out_err;
-	}
-
-	err = -EIO;
-	gdp = ext4_get_group_desc(sb, group, &gdp_bh);
-	if (!gdp)
-		goto out_err;
+	while (len > 0) {
+		ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
 
-	ext4_lock_group(sb, group);
-	already = 0;
-	for (i = 0; i < clen; i++)
-		if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
-			already++;
-
-	clen_changed = clen - already;
-	if (state)
-		ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
-	else
-		mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
-	if (ext4_has_group_desc_csum(sb) &&
-	    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
-		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-		ext4_free_group_clusters_set(sb, gdp,
-					     ext4_free_clusters_after_init(sb,
-						group, gdp));
-	}
-	if (state)
-		clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
-	else
-		clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
+		/*
+		 * Check to see if we are freeing blocks across a group
+		 * boundary.
+		 * In case of flex_bg, this can happen that (block, len) may
+		 * span across more than one group. In that case we need to
+		 * get the corresponding group metadata to work with.
+		 * For this we have goto again loop.
+		 */
+		thisgrp_len = min_t(unsigned int, (unsigned int)len,
+			EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
+		clen = EXT4_NUM_B2C(sbi, thisgrp_len);
 
-	ext4_free_group_clusters_set(sb, gdp, clen);
-	ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
-	ext4_group_desc_csum_set(sb, group, gdp);
+		bitmap_bh = ext4_read_block_bitmap(sb, group);
+		if (IS_ERR(bitmap_bh)) {
+			err = PTR_ERR(bitmap_bh);
+			bitmap_bh = NULL;
+			break;
+		}
 
-	ext4_unlock_group(sb, group);
+		err = -EIO;
+		gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+		if (!gdp)
+			break;
 
-	if (sbi->s_log_groups_per_flex) {
-		ext4_group_t flex_group = ext4_flex_group(sbi, group);
-		struct flex_groups *fg = sbi_array_rcu_deref(sbi,
-					   s_flex_groups, flex_group);
+		ext4_lock_group(sb, group);
+		already = 0;
+		for (i = 0; i < clen; i++)
+			if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
+					 !state)
+				already++;
 
+		clen_changed = clen - already;
 		if (state)
-			atomic64_sub(clen_changed, &fg->free_clusters);
+			ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
 		else
-			atomic64_add(clen_changed, &fg->free_clusters);
+			mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
+		if (ext4_has_group_desc_csum(sb) &&
+		    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+			ext4_free_group_clusters_set(sb, gdp,
+			     ext4_free_clusters_after_init(sb, group, gdp));
+		}
+		if (state)
+			clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
+		else
+			clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
+
+		ext4_free_group_clusters_set(sb, gdp, clen);
+		ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
+		ext4_group_desc_csum_set(sb, group, gdp);
+
+		ext4_unlock_group(sb, group);
+
+		if (sbi->s_log_groups_per_flex) {
+			ext4_group_t flex_group = ext4_flex_group(sbi, group);
+			struct flex_groups *fg = sbi_array_rcu_deref(sbi,
+						   s_flex_groups, flex_group);
+
+			if (state)
+				atomic64_sub(clen_changed, &fg->free_clusters);
+			else
+				atomic64_add(clen_changed, &fg->free_clusters);
+
+		}
+
+		err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+		if (err)
+			break;
+		sync_dirty_buffer(bitmap_bh);
+		err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+		sync_dirty_buffer(gdp_bh);
+		if (err)
+			break;
+
+		block += thisgrp_len;
+		len -= thisgrp_len;
+		brelse(bitmap_bh);
+		BUG_ON(len < 0);
 	}
 
-	err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
 	if (err)
-		goto out_err;
-	sync_dirty_buffer(bitmap_bh);
-	err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
-	sync_dirty_buffer(gdp_bh);
-
-out_err:
-	brelse(bitmap_bh);
+		brelse(bitmap_bh);
 }
 
 /*
-- 
GitLab


From 8ac3939db99f99667b8eb670cf4baf292896e72d Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:45 +0530
Subject: [PATCH 0681/1586] ext4: refactor ext4_free_blocks() to pull out
 ext4_mb_clear_bb()

ext4_free_blocks() function became too long and confusing, this patch
just pulls out the ext4_mb_clear_bb() function logic from it
which clears the block bitmap and frees it.

No functionality change in this patch

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/22c30fbb26ba409cf8aa5f0c7912970272c459e8.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 180 ++++++++++++++++++++++++++--------------------
 1 file changed, 102 insertions(+), 78 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 816322eddd2bd..7b80c5dd9f40f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5872,7 +5872,8 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
 }
 
 /**
- * ext4_free_blocks() -- Free given blocks and update quota
+ * ext4_mb_clear_bb() -- helper function for freeing blocks.
+ *			Used by ext4_free_blocks()
  * @handle:		handle for this transaction
  * @inode:		inode
  * @bh:			optional buffer of the block to be freed
@@ -5880,9 +5881,9 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
  * @count:		number of blocks to be freed
  * @flags:		flags used by ext4_free_blocks
  */
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
-		      struct buffer_head *bh, ext4_fsblk_t block,
-		      unsigned long count, int flags)
+static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
+			       ext4_fsblk_t block, unsigned long count,
+			       int flags)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct super_block *sb = inode->i_sb;
@@ -5899,80 +5900,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 
 	sbi = EXT4_SB(sb);
 
-	if (sbi->s_mount_state & EXT4_FC_REPLAY) {
-		ext4_free_blocks_simple(inode, block, count);
-		return;
-	}
-
-	might_sleep();
-	if (bh) {
-		if (block)
-			BUG_ON(block != bh->b_blocknr);
-		else
-			block = bh->b_blocknr;
-	}
-
-	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
-	    !ext4_inode_block_valid(inode, block, count)) {
-		ext4_error(sb, "Freeing blocks not in datazone - "
-			   "block = %llu, count = %lu", block, count);
-		goto error_return;
-	}
-
-	ext4_debug("freeing block %llu\n", block);
-	trace_ext4_free_blocks(inode, block, count, flags);
-
-	if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
-		BUG_ON(count > 1);
-
-		ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
-			    inode, bh, block);
-	}
-
-	/*
-	 * If the extent to be freed does not begin on a cluster
-	 * boundary, we need to deal with partial clusters at the
-	 * beginning and end of the extent.  Normally we will free
-	 * blocks at the beginning or the end unless we are explicitly
-	 * requested to avoid doing so.
-	 */
-	overflow = EXT4_PBLK_COFF(sbi, block);
-	if (overflow) {
-		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
-			overflow = sbi->s_cluster_ratio - overflow;
-			block += overflow;
-			if (count > overflow)
-				count -= overflow;
-			else
-				return;
-		} else {
-			block -= overflow;
-			count += overflow;
-		}
-	}
-	overflow = EXT4_LBLK_COFF(sbi, count);
-	if (overflow) {
-		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
-			if (count > overflow)
-				count -= overflow;
-			else
-				return;
-		} else
-			count += sbi->s_cluster_ratio - overflow;
-	}
-
-	if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
-		int i;
-		int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
-
-		for (i = 0; i < count; i++) {
-			cond_resched();
-			if (is_metadata)
-				bh = sb_find_get_block(inode->i_sb, block + i);
-			ext4_forget(handle, is_metadata, inode, bh, block + i);
-		}
-	}
-
 do_more:
 	overflow = 0;
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -6140,6 +6067,103 @@ error_return:
 	return;
 }
 
+/**
+ * ext4_free_blocks() -- Free given blocks and update quota
+ * @handle:		handle for this transaction
+ * @inode:		inode
+ * @bh:			optional buffer of the block to be freed
+ * @block:		starting physical block to be freed
+ * @count:		number of blocks to be freed
+ * @flags:		flags used by ext4_free_blocks
+ */
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+		      struct buffer_head *bh, ext4_fsblk_t block,
+		      unsigned long count, int flags)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned int overflow;
+	struct ext4_sb_info *sbi;
+
+	sbi = EXT4_SB(sb);
+
+	if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+		ext4_free_blocks_simple(inode, block, count);
+		return;
+	}
+
+	might_sleep();
+	if (bh) {
+		if (block)
+			BUG_ON(block != bh->b_blocknr);
+		else
+			block = bh->b_blocknr;
+	}
+
+	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+	    !ext4_inode_block_valid(inode, block, count)) {
+		ext4_error(sb, "Freeing blocks not in datazone - "
+			   "block = %llu, count = %lu", block, count);
+		return;
+	}
+
+	ext4_debug("freeing block %llu\n", block);
+	trace_ext4_free_blocks(inode, block, count, flags);
+
+	if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+		BUG_ON(count > 1);
+
+		ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
+			    inode, bh, block);
+	}
+
+	/*
+	 * If the extent to be freed does not begin on a cluster
+	 * boundary, we need to deal with partial clusters at the
+	 * beginning and end of the extent.  Normally we will free
+	 * blocks at the beginning or the end unless we are explicitly
+	 * requested to avoid doing so.
+	 */
+	overflow = EXT4_PBLK_COFF(sbi, block);
+	if (overflow) {
+		if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
+			overflow = sbi->s_cluster_ratio - overflow;
+			block += overflow;
+			if (count > overflow)
+				count -= overflow;
+			else
+				return;
+		} else {
+			block -= overflow;
+			count += overflow;
+		}
+	}
+	overflow = EXT4_LBLK_COFF(sbi, count);
+	if (overflow) {
+		if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
+			if (count > overflow)
+				count -= overflow;
+			else
+				return;
+		} else
+			count += sbi->s_cluster_ratio - overflow;
+	}
+
+	if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
+		int i;
+		int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
+
+		for (i = 0; i < count; i++) {
+			cond_resched();
+			if (is_metadata)
+				bh = sb_find_get_block(inode->i_sb, block + i);
+			ext4_forget(handle, is_metadata, inode, bh, block + i);
+		}
+	}
+
+	ext4_mb_clear_bb(handle, inode, block, count, flags);
+	return;
+}
+
 /**
  * ext4_group_add_blocks() -- Add given blocks to an existing group
  * @handle:			handle to this transaction
-- 
GitLab


From dbaafbadc5c3dad4010099d0ff135204a8dbff49 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:46 +0530
Subject: [PATCH 0682/1586] ext4: use in_range() for range checking in
 ext4_fc_replay_check_excluded

Instead of open coding it, use in_range() function instead.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/8e5526ef14150778871ac7c937c8993c6a09cd3e.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 7964ee34e322a..3c5baca38767b 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1875,8 +1875,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
 		if (state->fc_regions[i].ino == 0 ||
 			state->fc_regions[i].len == 0)
 			continue;
-		if (blk >= state->fc_regions[i].pblk &&
-		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
+		if (in_range(blk, state->fc_regions[i].pblk,
+					state->fc_regions[i].len))
 			return true;
 	}
 	return false;
-- 
GitLab


From 123e3016ee9b3674a819537bc4c3174e25cd48fc Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:47 +0530
Subject: [PATCH 0683/1586] ext4: rename ext4_set_bits to mb_set_bits

ext4_set_bits() should actually be mb_set_bits() for uniform API naming
convention.
This is via below cmd -

grep -nr "ext4_set_bits" fs/ext4/ | cut -d ":" -f 1 | xargs sed -i 's/ext4_set_bits/mb_set_bits/g'

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/f1f6ece1405b76a7a987e9145d1adfaf71e30695.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h    |  2 +-
 fs/ext4/mballoc.c | 14 +++++++-------
 fs/ext4/resize.c  |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bcd3b9bf8069b..97c85ae185a92 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1279,7 +1279,7 @@ struct ext4_inode_info {
 #define ext4_find_next_zero_bit		find_next_zero_bit_le
 #define ext4_find_next_bit		find_next_bit_le
 
-extern void ext4_set_bits(void *bm, int cur, int len);
+extern void mb_set_bits(void *bm, int cur, int len);
 
 /*
  * Maximal mount counts between two filesystem checks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7b80c5dd9f40f..c2a6f924b4569 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1689,7 +1689,7 @@ static int mb_test_and_clear_bits(void *bm, int cur, int len)
 	return zero_bit;
 }
 
-void ext4_set_bits(void *bm, int cur, int len)
+void mb_set_bits(void *bm, int cur, int len)
 {
 	__u32 *addr;
 
@@ -1996,7 +1996,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
 
 	mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
-	ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
+	mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
 	mb_check_buddy(e4b);
 
 	return ret;
@@ -3825,7 +3825,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		 * We leak some of the blocks here.
 		 */
 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-		ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
 			      ac->ac_b_ex.fe_len);
 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3844,7 +3844,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		}
 	}
 #endif
-	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
 		      ac->ac_b_ex.fe_len);
 	if (ext4_has_group_desc_csum(sb) &&
 	    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
@@ -3939,7 +3939,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 
 		clen_changed = clen - already;
 		if (state)
-			ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
+			mb_set_bits(bitmap_bh->b_data, blkoff, clen);
 		else
 			mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
 		if (ext4_has_group_desc_csum(sb) &&
@@ -4459,7 +4459,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, efd_node);
-		ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
+		mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
 		n = rb_next(n);
 	}
 	return;
@@ -4500,7 +4500,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 		if (unlikely(len == 0))
 			continue;
 		BUG_ON(groupnr != group);
-		ext4_set_bits(bitmap, start, len);
+		mb_set_bits(bitmap, start, len);
 		preallocated += len;
 	}
 	mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ee8f02f406cb6..f507f34be602f 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -483,7 +483,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 		}
 		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
 			   first_cluster, first_cluster - start, count2);
-		ext4_set_bits(bh->b_data, first_cluster - start, count2);
+		mb_set_bits(bh->b_data, first_cluster - start, count2);
 
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
 		brelse(bh);
@@ -632,7 +632,7 @@ handle_bb:
 		if (overhead != 0) {
 			ext4_debug("mark backup superblock %#04llx (+0)\n",
 				   start);
-			ext4_set_bits(bh->b_data, 0,
+			mb_set_bits(bh->b_data, 0,
 				      EXT4_NUM_B2C(sbi, overhead));
 		}
 		ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
-- 
GitLab


From bd8247eee1a2b22e2270b3933ab8dca9316b3718 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:48 +0530
Subject: [PATCH 0684/1586] ext4: no need to test for block bitmap bits in
 ext4_mb_mark_bb()

We don't need the return value of mb_test_and_clear_bits() in ext4_mb_mark_bb()
So simply use mb_clear_bits() instead.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/a971935306dafb124da0193c7dad1aa485210b62.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c2a6f924b4569..bb3cfcd545cee 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3941,7 +3941,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 		if (state)
 			mb_set_bits(bitmap_bh->b_data, blkoff, clen);
 		else
-			mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
+			mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
 		if (ext4_has_group_desc_csum(sb) &&
 		    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-- 
GitLab


From 6bc6c2bdf1baca6522b8d9ba976257d722423085 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:49 +0530
Subject: [PATCH 0685/1586] ext4: add ext4_sb_block_valid() refactored out of
 ext4_inode_block_valid()

This API will be needed at places where we don't have an inode
for e.g. while freeing blocks in ext4_group_add_blocks()

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/dd34a236543ad5ae7123eeebe0cb69e6bdd44f34.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/block_validity.c | 26 +++++++++++++++++---------
 fs/ext4/ext4.h           |  3 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4666b55b736ec..5504f72bbbbe7 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -292,15 +292,10 @@ void ext4_release_system_zone(struct super_block *sb)
 		call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
 }
 
-/*
- * Returns 1 if the passed-in block region (start_blk,
- * start_blk+count) is valid; 0 if some part of the block region
- * overlaps with some other filesystem metadata blocks.
- */
-int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
-			  unsigned int count)
+int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
+				ext4_fsblk_t start_blk, unsigned int count)
 {
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_system_blocks *system_blks;
 	struct ext4_system_zone *entry;
 	struct rb_node *n;
@@ -329,7 +324,9 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
 		else if (start_blk >= (entry->start_blk + entry->count))
 			n = n->rb_right;
 		else {
-			ret = (entry->ino == inode->i_ino);
+			ret = 0;
+			if (inode)
+				ret = (entry->ino == inode->i_ino);
 			break;
 		}
 	}
@@ -338,6 +335,17 @@ out_rcu:
 	return ret;
 }
 
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with some other filesystem metadata blocks.
+ */
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+			  unsigned int count)
+{
+	return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count);
+}
+
 int ext4_check_blockref(const char *function, unsigned int line,
 			struct inode *inode, __le32 *p, unsigned int max)
 {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 97c85ae185a92..0d4f284c0514e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3707,6 +3707,9 @@ extern int ext4_inode_block_valid(struct inode *inode,
 				  unsigned int count);
 extern int ext4_check_blockref(const char *, unsigned int,
 			       struct inode *, __le32 *, unsigned int);
+extern int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
+				ext4_fsblk_t start_blk, unsigned int count);
+
 
 /* extents.c */
 struct ext4_ext_path;
-- 
GitLab


From a00b482b82fb098956a5bed22bd7873e56f152f1 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:50 +0530
Subject: [PATCH 0686/1586] ext4: add strict range checks while freeing blocks

Currently ext4_mb_clear_bb() & ext4_group_add_blocks() only checks
whether the given block ranges (which is to be freed) belongs to any FS
metadata blocks or not, of the block's respective block group.
But to detect any FS error early, it is better to add more strict
checkings in those functions which checks whether the given blocks
belongs to any critical FS metadata or not within system-zone.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/ddd9143d064774e32d6364a99667817c6e8bfdc0.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bb3cfcd545cee..0a95bdb1e07ba 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5930,13 +5930,7 @@ do_more:
 		goto error_return;
 	}
 
-	if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
-	    in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
-	    in_range(block, ext4_inode_table(sb, gdp),
-		     sbi->s_itb_per_group) ||
-	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
-		     sbi->s_itb_per_group)) {
-
+	if (!ext4_inode_block_valid(inode, block, count)) {
 		ext4_error(sb, "Freeing blocks in system zone - "
 			   "Block = %llu, count = %lu", block, count);
 		/* err = 0. ext4_std_error should be a no op */
@@ -6007,7 +6001,7 @@ do_more:
 						 NULL);
 			if (err && err != -EOPNOTSUPP)
 				ext4_msg(sb, KERN_WARNING, "discard request in"
-					 " group:%d block:%d count:%lu failed"
+					 " group:%u block:%d count:%lu failed"
 					 " with %d", block_group, bit, count,
 					 err);
 		} else
@@ -6220,11 +6214,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 		goto error_return;
 	}
 
-	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
-	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
-	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
-	    in_range(block + count - 1, ext4_inode_table(sb, desc),
-		     sbi->s_itb_per_group)) {
+	if (!ext4_sb_block_valid(sb, NULL, block, count)) {
 		ext4_error(sb, "Adding blocks in system zones - "
 			   "Block = %llu, count = %lu",
 			   block, count);
-- 
GitLab


From 8c91c57907d3ad8f88a12097213bb0920eb453b8 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Wed, 16 Feb 2022 12:32:51 +0530
Subject: [PATCH 0687/1586] ext4: add extra check in ext4_mb_mark_bb() to
 prevent against possible corruption

This patch adds an extra checks in ext4_mb_mark_bb() function
to make sure we mark & report error if we were to mark/clear any
of the critical FS metadata specific bitmaps (&bail out) to prevent
from any accidental corruption.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/53cbb6f2573db162a57f935365050d8b1df202ee.1644992610.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0a95bdb1e07ba..5f0bc6d0aabe0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3918,6 +3918,14 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
 			EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
 		clen = EXT4_NUM_B2C(sbi, thisgrp_len);
 
+		if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) {
+			ext4_error(sb, "Marking blocks in system zone - "
+				   "Block = %llu, len = %u",
+				   block, thisgrp_len);
+			bitmap_bh = NULL;
+			break;
+		}
+
 		bitmap_bh = ext4_read_block_bitmap(sb, group);
 		if (IS_ERR(bitmap_bh)) {
 			err = PTR_ERR(bitmap_bh);
-- 
GitLab


From 575d6b77fa2697afd2b3a443f7f879faa65ae0ca Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 19 Feb 2022 17:20:02 -0800
Subject: [PATCH 0688/1586] m68k: Implement "current_stack_pointer"

To follow the existing per-arch conventions, add asm "sp" as
"current_stack_pointer". This will let it be used in non-arch places
(like HARDENED_USERCOPY).

Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/lkml/CAMuHMdU6msvi0j=mS28GFYbm+uMRk7PkYe+zOM4sDmOVxeibLQ@mail.gmail.com
---
 arch/m68k/Kconfig               | 1 +
 arch/m68k/include/asm/current.h | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 936e1803c7c7d..f0eac0e2f1237 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,6 +4,7 @@ config M68K
 	default y
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_BINFMT_FLAT
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h
index 6390ef2f7f864..c117907e12765 100644
--- a/arch/m68k/include/asm/current.h
+++ b/arch/m68k/include/asm/current.h
@@ -24,6 +24,8 @@ static inline struct task_struct *get_current(void)
 
 #define	current	get_current()
 
-#endif /* CONFNIG_MMU */
+#endif /* CONFIG_MMU */
+
+register unsigned long current_stack_pointer __asm__("sp");
 
 #endif /* !(_M68K_CURRENT_H) */
-- 
GitLab


From 617f55e20743fc50c989b498f9dee289eb644cfd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Feb 2022 14:17:49 -0800
Subject: [PATCH 0689/1586] lib: overflow: Convert to Kunit

Convert overflow unit tests to KUnit, for better integration into the
kernel self test framework. Includes a rename of test_overflow.c to
overflow_kunit.c, and CONFIG_TEST_OVERFLOW to CONFIG_OVERFLOW_KUNIT_TEST.

$ ./tools/testing/kunit/kunit.py run overflow
...
[14:33:51] Starting KUnit Kernel (1/1)...
[14:33:51] ============================================================
[14:33:51] ================== overflow (11 subtests) ==================
[14:33:51] [PASSED] u8_overflow_test
[14:33:51] [PASSED] s8_overflow_test
[14:33:51] [PASSED] u16_overflow_test
[14:33:51] [PASSED] s16_overflow_test
[14:33:51] [PASSED] u32_overflow_test
[14:33:51] [PASSED] s32_overflow_test
[14:33:51] [PASSED] u64_overflow_test
[14:33:51] [PASSED] s64_overflow_test
[14:33:51] [PASSED] overflow_shift_test
[14:33:51] [PASSED] overflow_allocation_test
[14:33:51] [PASSED] overflow_size_helpers_test
[14:33:51] ==================== [PASSED] overflow =====================
[14:33:51] ============================================================
[14:33:51] Testing complete. Passed: 11, Failed: 0, Crashed: 0, Skipped: 0, Errors: 0
[14:33:51] Elapsed time: 12.525s total, 0.001s configuring, 12.402s building, 0.101s running

Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Co-developed-by: Vitor Massaru Iha <vitor@massaru.org>
Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: https://lore.kernel.org/lkml/20200720224418.200495-1-vitor@massaru.org/
Co-developed-by: Daniel Latypov <dlatypov@google.com>
Signed-off-by: Daniel Latypov <dlatypov@google.com>
Link: https://lore.kernel.org/linux-kselftest/20210503211536.1384578-1-dlatypov@google.com/
Acked-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/lkml/CAKwvOdm62iA1dNiC6Q11UJ-MnTqtc4kXkm-ubPaFMK824_k0nw@mail.gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: David Gow <davidgow@google.com>
Link: https://lore.kernel.org/lkml/CABVgOS=TWVh649_Vjo3wnMu9gZnq66gkV-LtGgsksAWMqc+MSA@mail.gmail.com
---
 lib/Kconfig.debug                         |  16 +-
 lib/Makefile                              |   2 +-
 lib/{test_overflow.c => overflow_kunit.c} | 554 ++++++++++------------
 3 files changed, 263 insertions(+), 309 deletions(-)
 rename lib/{test_overflow.c => overflow_kunit.c} (54%)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 14b89aa37c5c9..14d90d03bc8d9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2214,9 +2214,6 @@ config TEST_UUID
 config TEST_XARRAY
 	tristate "Test the XArray code at runtime"
 
-config TEST_OVERFLOW
-	tristate "Test check_*_overflow() functions at runtime"
-
 config TEST_RHASHTABLE
 	tristate "Perform selftest on resizable hash table"
 	help
@@ -2501,6 +2498,19 @@ config MEMCPY_KUNIT_TEST
 
 	  If unsure, say N.
 
+config OVERFLOW_KUNIT_TEST
+	tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Builds unit tests for the check_*_overflow(), size_*(), allocation, and
+	  related functions.
+
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 300f569c626b0..fdfcbfaff32f1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
-obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
@@ -363,6 +362,7 @@ obj-$(CONFIG_BITS_TEST) += test_bits.o
 obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
 obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
 obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
+obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
diff --git a/lib/test_overflow.c b/lib/overflow_kunit.c
similarity index 54%
rename from lib/test_overflow.c
rename to lib/overflow_kunit.c
index f6530fce799db..475f0c064bf65 100644
--- a/lib/test_overflow.c
+++ b/lib/overflow_kunit.c
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0 OR MIT
 /*
- * Test cases for arithmetic overflow checks.
+ * Test cases for arithmetic overflow checks. See:
+ * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests
+ *	./tools/testing/kunit/kunit.py run overflow [--raw_output]
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <kunit/test.h>
 #include <linux/device.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -19,7 +21,7 @@
 		t a, b;				\
 		t sum, diff, prod;		\
 		bool s_of, d_of, p_of;		\
-	} t ## _tests[] __initconst
+	} t ## _tests[]
 
 DEFINE_TEST_ARRAY(u8) = {
 	{0, 0, 0, 0, 0, false, false, false},
@@ -220,43 +222,31 @@ DEFINE_TEST_ARRAY(s64) = {
 	bool _of;						\
 								\
 	_of = check_ ## op ## _overflow(a, b, &_r);		\
-	if (_of != of) {					\
-		pr_warn("expected "fmt" "sym" "fmt		\
-			" to%s overflow (type %s)\n",		\
-			a, b, of ? "" : " not", #t);		\
-		err = 1;					\
-	}							\
-	if (_r != r) {						\
-		pr_warn("expected "fmt" "sym" "fmt" == "	\
-			fmt", got "fmt" (type %s)\n",		\
-			a, b, r, _r, #t);			\
-		err = 1;					\
-	}							\
+	KUNIT_EXPECT_EQ_MSG(test, _of, of,			\
+		"expected "fmt" "sym" "fmt" to%s overflow (type %s)\n",	\
+		a, b, of ? "" : " not", #t);			\
+	KUNIT_EXPECT_EQ_MSG(test, _r, r,			\
+		"expected "fmt" "sym" "fmt" == "fmt", got "fmt" (type %s)\n", \
+		a, b, r, _r, #t);				\
 } while (0)
 
 #define DEFINE_TEST_FUNC(t, fmt)					\
-static int __init do_test_ ## t(const struct test_ ## t *p)		\
+static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p) \
 {							   		\
-	int err = 0;							\
-									\
 	check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of);	\
 	check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of);	\
 	check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of);	\
 	check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of);	\
 	check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of);	\
-									\
-	return err;							\
 }									\
 									\
-static int __init test_ ## t ## _overflow(void) {			\
-	int err = 0;							\
+static void t ## _overflow_test(struct kunit *test) {			\
 	unsigned i;							\
 									\
 	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
-		err |= do_test_ ## t(&t ## _tests[i]);			\
-	pr_info("%zu %s arithmetic tests finished\n",			\
+		do_test_ ## t(test, &t ## _tests[i]);			\
+	kunit_info(test, "%zu %s arithmetic tests finished\n",		\
 		ARRAY_SIZE(t ## _tests), #t);				\
-	return err;							\
 }
 
 DEFINE_TEST_FUNC(u8, "%d");
@@ -270,198 +260,176 @@ DEFINE_TEST_FUNC(u64, "%llu");
 DEFINE_TEST_FUNC(s64, "%lld");
 #endif
 
-static int __init test_overflow_calculation(void)
+static void overflow_shift_test(struct kunit *test)
 {
-	int err = 0;
-
-	err |= test_u8_overflow();
-	err |= test_s8_overflow();
-	err |= test_u16_overflow();
-	err |= test_s16_overflow();
-	err |= test_u32_overflow();
-	err |= test_s32_overflow();
-#if BITS_PER_LONG == 64
-	err |= test_u64_overflow();
-	err |= test_s64_overflow();
-#endif
-
-	return err;
-}
-
-static int __init test_overflow_shift(void)
-{
-	int err = 0;
 	int count = 0;
 
 /* Args are: value, shift, type, expected result, overflow expected */
-#define TEST_ONE_SHIFT(a, s, t, expect, of) ({				\
-	int __failed = 0;						\
+#define TEST_ONE_SHIFT(a, s, t, expect, of)	do {			\
 	typeof(a) __a = (a);						\
 	typeof(s) __s = (s);						\
 	t __e = (expect);						\
 	t __d;								\
 	bool __of = check_shl_overflow(__a, __s, &__d);			\
 	if (__of != of) {						\
-		pr_warn("expected (%s)(%s << %s) to%s overflow\n",	\
+		KUNIT_EXPECT_EQ_MSG(test, __of, of,			\
+			"expected (%s)(%s << %s) to%s overflow\n",	\
 			#t, #a, #s, of ? "" : " not");			\
-		__failed = 1;						\
 	} else if (!__of && __d != __e) {				\
-		pr_warn("expected (%s)(%s << %s) == %s\n",		\
+		KUNIT_EXPECT_EQ_MSG(test, __d, __e,			\
+			"expected (%s)(%s << %s) == %s\n",		\
 			#t, #a, #s, #expect);				\
 		if ((t)-1 < 0)						\
-			pr_warn("got %lld\n", (s64)__d);		\
+			kunit_info(test, "got %lld\n", (s64)__d);	\
 		else							\
-			pr_warn("got %llu\n", (u64)__d);		\
-		__failed = 1;						\
+			kunit_info(test, "got %llu\n", (u64)__d);	\
 	}								\
 	count++;							\
-	__failed;							\
-})
+} while (0)
 
 	/* Sane shifts. */
-	err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
-	err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
-	err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
-	err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
-	err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
-	err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
-	err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
-	err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
-	err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
-	err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
-	err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
-	err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
-	err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
-	err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
-	err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
-	err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
-	err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
-	err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
-	err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
-	err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
-	err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
-	err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
-	err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
-	err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
-	err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
-	err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64,
-			      0xFFFFFFFFULL << 32, false);
+	TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
+	TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
+	TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
+	TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
+	TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
+	TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
+	TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
+	TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
+	TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
+	TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
+	TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
+	TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
+	TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
+	TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
+	TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
+	TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
+	TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
+	TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
+	TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
+	TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
+	TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
+	TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
+	TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
+	TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
+	TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
+	TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, 0xFFFFFFFFULL << 32, false);
 
 	/* Sane shift: start and end with 0, without a too-wide shift. */
-	err |= TEST_ONE_SHIFT(0, 7, u8, 0, false);
-	err |= TEST_ONE_SHIFT(0, 15, u16, 0, false);
-	err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
-	err |= TEST_ONE_SHIFT(0, 31, u32, 0, false);
-	err |= TEST_ONE_SHIFT(0, 63, u64, 0, false);
+	TEST_ONE_SHIFT(0, 7, u8, 0, false);
+	TEST_ONE_SHIFT(0, 15, u16, 0, false);
+	TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
+	TEST_ONE_SHIFT(0, 31, u32, 0, false);
+	TEST_ONE_SHIFT(0, 63, u64, 0, false);
 
 	/* Sane shift: start and end with 0, without reaching signed bit. */
-	err |= TEST_ONE_SHIFT(0, 6, s8, 0, false);
-	err |= TEST_ONE_SHIFT(0, 14, s16, 0, false);
-	err |= TEST_ONE_SHIFT(0, 30, int, 0, false);
-	err |= TEST_ONE_SHIFT(0, 30, s32, 0, false);
-	err |= TEST_ONE_SHIFT(0, 62, s64, 0, false);
+	TEST_ONE_SHIFT(0, 6, s8, 0, false);
+	TEST_ONE_SHIFT(0, 14, s16, 0, false);
+	TEST_ONE_SHIFT(0, 30, int, 0, false);
+	TEST_ONE_SHIFT(0, 30, s32, 0, false);
+	TEST_ONE_SHIFT(0, 62, s64, 0, false);
 
 	/* Overflow: shifted the bit off the end. */
-	err |= TEST_ONE_SHIFT(1, 8, u8, 0, true);
-	err |= TEST_ONE_SHIFT(1, 16, u16, 0, true);
-	err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
-	err |= TEST_ONE_SHIFT(1, 32, u32, 0, true);
-	err |= TEST_ONE_SHIFT(1, 64, u64, 0, true);
+	TEST_ONE_SHIFT(1, 8, u8, 0, true);
+	TEST_ONE_SHIFT(1, 16, u16, 0, true);
+	TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
+	TEST_ONE_SHIFT(1, 32, u32, 0, true);
+	TEST_ONE_SHIFT(1, 64, u64, 0, true);
 
 	/* Overflow: shifted into the signed bit. */
-	err |= TEST_ONE_SHIFT(1, 7, s8, 0, true);
-	err |= TEST_ONE_SHIFT(1, 15, s16, 0, true);
-	err |= TEST_ONE_SHIFT(1, 31, int, 0, true);
-	err |= TEST_ONE_SHIFT(1, 31, s32, 0, true);
-	err |= TEST_ONE_SHIFT(1, 63, s64, 0, true);
+	TEST_ONE_SHIFT(1, 7, s8, 0, true);
+	TEST_ONE_SHIFT(1, 15, s16, 0, true);
+	TEST_ONE_SHIFT(1, 31, int, 0, true);
+	TEST_ONE_SHIFT(1, 31, s32, 0, true);
+	TEST_ONE_SHIFT(1, 63, s64, 0, true);
 
 	/* Overflow: high bit falls off unsigned types. */
 	/* 10010110 */
-	err |= TEST_ONE_SHIFT(150, 1, u8, 0, true);
+	TEST_ONE_SHIFT(150, 1, u8, 0, true);
 	/* 1000100010010110 */
-	err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true);
+	TEST_ONE_SHIFT(34966, 1, u16, 0, true);
 	/* 10000100000010001000100010010110 */
-	err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
-	err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
+	TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
+	TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
 	/* 1000001000010000010000000100000010000100000010001000100010010110 */
-	err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
+	TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
 
 	/* Overflow: bit shifted into signed bit on signed types. */
 	/* 01001011 */
-	err |= TEST_ONE_SHIFT(75, 1, s8, 0, true);
+	TEST_ONE_SHIFT(75, 1, s8, 0, true);
 	/* 0100010001001011 */
-	err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true);
+	TEST_ONE_SHIFT(17483, 1, s16, 0, true);
 	/* 01000010000001000100010001001011 */
-	err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
-	err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
+	TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
+	TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
 	/* 0100000100001000001000000010000001000010000001000100010001001011 */
-	err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
+	TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
 
 	/* Overflow: bit shifted past signed bit on signed types. */
 	/* 01001011 */
-	err |= TEST_ONE_SHIFT(75, 2, s8, 0, true);
+	TEST_ONE_SHIFT(75, 2, s8, 0, true);
 	/* 0100010001001011 */
-	err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true);
+	TEST_ONE_SHIFT(17483, 2, s16, 0, true);
 	/* 01000010000001000100010001001011 */
-	err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
-	err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
+	TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
+	TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
 	/* 0100000100001000001000000010000001000010000001000100010001001011 */
-	err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
+	TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
 
 	/* Overflow: values larger than destination type. */
-	err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
-	err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
-	err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
-	err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
-	err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
-	err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
-	err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
-	err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
-	err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
+	TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
+	TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
+	TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
+	TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
+	TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
+	TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
+	TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
+	TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
+	TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
 
 	/* Nonsense: negative initial value. */
-	err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true);
-	err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true);
-	err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true);
-	err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true);
-	err |= TEST_ONE_SHIFT(-10, 0, int, 0, true);
-	err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
-	err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true);
-	err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true);
-	err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
-	err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
+	TEST_ONE_SHIFT(-1, 0, s8, 0, true);
+	TEST_ONE_SHIFT(-1, 0, u8, 0, true);
+	TEST_ONE_SHIFT(-5, 0, s16, 0, true);
+	TEST_ONE_SHIFT(-5, 0, u16, 0, true);
+	TEST_ONE_SHIFT(-10, 0, int, 0, true);
+	TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
+	TEST_ONE_SHIFT(-100, 0, s32, 0, true);
+	TEST_ONE_SHIFT(-100, 0, u32, 0, true);
+	TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
+	TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
 
 	/* Nonsense: negative shift values. */
-	err |= TEST_ONE_SHIFT(0, -5, s8, 0, true);
-	err |= TEST_ONE_SHIFT(0, -5, u8, 0, true);
-	err |= TEST_ONE_SHIFT(0, -10, s16, 0, true);
-	err |= TEST_ONE_SHIFT(0, -10, u16, 0, true);
-	err |= TEST_ONE_SHIFT(0, -15, int, 0, true);
-	err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
-	err |= TEST_ONE_SHIFT(0, -20, s32, 0, true);
-	err |= TEST_ONE_SHIFT(0, -20, u32, 0, true);
-	err |= TEST_ONE_SHIFT(0, -30, s64, 0, true);
-	err |= TEST_ONE_SHIFT(0, -30, u64, 0, true);
+	TEST_ONE_SHIFT(0, -5, s8, 0, true);
+	TEST_ONE_SHIFT(0, -5, u8, 0, true);
+	TEST_ONE_SHIFT(0, -10, s16, 0, true);
+	TEST_ONE_SHIFT(0, -10, u16, 0, true);
+	TEST_ONE_SHIFT(0, -15, int, 0, true);
+	TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
+	TEST_ONE_SHIFT(0, -20, s32, 0, true);
+	TEST_ONE_SHIFT(0, -20, u32, 0, true);
+	TEST_ONE_SHIFT(0, -30, s64, 0, true);
+	TEST_ONE_SHIFT(0, -30, u64, 0, true);
 
 	/* Overflow: shifted at or beyond entire type's bit width. */
-	err |= TEST_ONE_SHIFT(0, 8, u8, 0, true);
-	err |= TEST_ONE_SHIFT(0, 9, u8, 0, true);
-	err |= TEST_ONE_SHIFT(0, 8, s8, 0, true);
-	err |= TEST_ONE_SHIFT(0, 9, s8, 0, true);
-	err |= TEST_ONE_SHIFT(0, 16, u16, 0, true);
-	err |= TEST_ONE_SHIFT(0, 17, u16, 0, true);
-	err |= TEST_ONE_SHIFT(0, 16, s16, 0, true);
-	err |= TEST_ONE_SHIFT(0, 17, s16, 0, true);
-	err |= TEST_ONE_SHIFT(0, 32, u32, 0, true);
-	err |= TEST_ONE_SHIFT(0, 33, u32, 0, true);
-	err |= TEST_ONE_SHIFT(0, 32, int, 0, true);
-	err |= TEST_ONE_SHIFT(0, 33, int, 0, true);
-	err |= TEST_ONE_SHIFT(0, 32, s32, 0, true);
-	err |= TEST_ONE_SHIFT(0, 33, s32, 0, true);
-	err |= TEST_ONE_SHIFT(0, 64, u64, 0, true);
-	err |= TEST_ONE_SHIFT(0, 65, u64, 0, true);
-	err |= TEST_ONE_SHIFT(0, 64, s64, 0, true);
-	err |= TEST_ONE_SHIFT(0, 65, s64, 0, true);
+	TEST_ONE_SHIFT(0, 8, u8, 0, true);
+	TEST_ONE_SHIFT(0, 9, u8, 0, true);
+	TEST_ONE_SHIFT(0, 8, s8, 0, true);
+	TEST_ONE_SHIFT(0, 9, s8, 0, true);
+	TEST_ONE_SHIFT(0, 16, u16, 0, true);
+	TEST_ONE_SHIFT(0, 17, u16, 0, true);
+	TEST_ONE_SHIFT(0, 16, s16, 0, true);
+	TEST_ONE_SHIFT(0, 17, s16, 0, true);
+	TEST_ONE_SHIFT(0, 32, u32, 0, true);
+	TEST_ONE_SHIFT(0, 33, u32, 0, true);
+	TEST_ONE_SHIFT(0, 32, int, 0, true);
+	TEST_ONE_SHIFT(0, 33, int, 0, true);
+	TEST_ONE_SHIFT(0, 32, s32, 0, true);
+	TEST_ONE_SHIFT(0, 33, s32, 0, true);
+	TEST_ONE_SHIFT(0, 64, u64, 0, true);
+	TEST_ONE_SHIFT(0, 65, u64, 0, true);
+	TEST_ONE_SHIFT(0, 64, s64, 0, true);
+	TEST_ONE_SHIFT(0, 65, s64, 0, true);
 
 	/*
 	 * Corner case: for unsigned types, we fail when we've shifted
@@ -472,17 +440,14 @@ static int __init test_overflow_shift(void)
 	 * signed bit). So, for now, we will test this condition but
 	 * mark it as not expected to overflow.
 	 */
-	err |= TEST_ONE_SHIFT(0, 7, s8, 0, false);
-	err |= TEST_ONE_SHIFT(0, 15, s16, 0, false);
-	err |= TEST_ONE_SHIFT(0, 31, int, 0, false);
-	err |= TEST_ONE_SHIFT(0, 31, s32, 0, false);
-	err |= TEST_ONE_SHIFT(0, 63, s64, 0, false);
-
-	pr_info("%d shift tests finished\n", count);
+	TEST_ONE_SHIFT(0, 7, s8, 0, false);
+	TEST_ONE_SHIFT(0, 15, s16, 0, false);
+	TEST_ONE_SHIFT(0, 31, int, 0, false);
+	TEST_ONE_SHIFT(0, 31, s32, 0, false);
+	TEST_ONE_SHIFT(0, 63, s64, 0, false);
 
+	kunit_info(test, "%d shift tests finished\n", count);
 #undef TEST_ONE_SHIFT
-
-	return err;
 }
 
 /*
@@ -502,7 +467,7 @@ static int __init test_overflow_shift(void)
 #define TEST_SIZE		(5 * 4096)
 
 #define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\
-static int __init test_ ## func (void *arg)				\
+static void test_ ## func (struct kunit *test, void *arg)		\
 {									\
 	volatile size_t a = TEST_SIZE;					\
 	volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1;			\
@@ -510,30 +475,24 @@ static int __init test_ ## func (void *arg)				\
 									\
 	/* Tiny allocation test. */					\
 	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\
-	if (!ptr) {							\
-		pr_warn(#func " failed regular allocation?!\n");	\
-		return 1;						\
-	}								\
+	KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr,			\
+			    #func " failed regular allocation?!\n");	\
 	free ## want_arg (free_func, arg, ptr);				\
 									\
 	/* Wrapped allocation test. */					\
 	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
 							  a * b);	\
-	if (!ptr) {							\
-		pr_warn(#func " unexpectedly failed bad wrapping?!\n");	\
-		return 1;						\
-	}								\
+	KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr,			\
+			    #func " unexpectedly failed bad wrapping?!\n"); \
 	free ## want_arg (free_func, arg, ptr);				\
 									\
 	/* Saturated allocation test. */				\
 	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
 						   array_size(a, b));	\
 	if (ptr) {							\
-		pr_warn(#func " missed saturation!\n");			\
+		KUNIT_FAIL(test, #func " missed saturation!\n");	\
 		free ## want_arg (free_func, arg, ptr);			\
-		return 1;						\
 	}								\
-	return 0;							\
 }
 
 /*
@@ -554,44 +513,38 @@ DEFINE_TEST_ALLOC(kvzalloc_node, kvfree,     0, 1, 1);
 DEFINE_TEST_ALLOC(devm_kmalloc,  devm_kfree, 1, 1, 0);
 DEFINE_TEST_ALLOC(devm_kzalloc,  devm_kfree, 1, 1, 0);
 
-static int __init test_overflow_allocation(void)
+static void overflow_allocation_test(struct kunit *test)
 {
 	const char device_name[] = "overflow-test";
 	struct device *dev;
 	int count = 0;
-	int err = 0;
 
-#define check_allocation_overflow(alloc)	({	\
+#define check_allocation_overflow(alloc)	do {	\
 	count++;					\
-	test_ ## alloc(dev);				\
-})
+	test_ ## alloc(test, dev);			\
+} while (0)
 
 	/* Create dummy device for devm_kmalloc()-family tests. */
 	dev = root_device_register(device_name);
-	if (IS_ERR(dev)) {
-		pr_warn("Cannot register test device\n");
-		return 1;
-	}
-
-	err |= check_allocation_overflow(kmalloc);
-	err |= check_allocation_overflow(kmalloc_node);
-	err |= check_allocation_overflow(kzalloc);
-	err |= check_allocation_overflow(kzalloc_node);
-	err |= check_allocation_overflow(__vmalloc);
-	err |= check_allocation_overflow(kvmalloc);
-	err |= check_allocation_overflow(kvmalloc_node);
-	err |= check_allocation_overflow(kvzalloc);
-	err |= check_allocation_overflow(kvzalloc_node);
-	err |= check_allocation_overflow(devm_kmalloc);
-	err |= check_allocation_overflow(devm_kzalloc);
+	KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev),
+			       "Cannot register test device\n");
+
+	check_allocation_overflow(kmalloc);
+	check_allocation_overflow(kmalloc_node);
+	check_allocation_overflow(kzalloc);
+	check_allocation_overflow(kzalloc_node);
+	check_allocation_overflow(__vmalloc);
+	check_allocation_overflow(kvmalloc);
+	check_allocation_overflow(kvmalloc_node);
+	check_allocation_overflow(kvzalloc);
+	check_allocation_overflow(kvzalloc_node);
+	check_allocation_overflow(devm_kmalloc);
+	check_allocation_overflow(devm_kzalloc);
 
 	device_unregister(dev);
 
-	pr_info("%d allocation overflow tests finished\n", count);
-
+	kunit_info(test, "%d allocation overflow tests finished\n", count);
 #undef check_allocation_overflow
-
-	return err;
 }
 
 struct __test_flex_array {
@@ -600,127 +553,118 @@ struct __test_flex_array {
 	unsigned long data[];
 };
 
-static int __init test_overflow_size_helpers(void)
+static void overflow_size_helpers_test(struct kunit *test)
 {
 	/* Make sure struct_size() can be used in a constant expression. */
 	u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)];
 	struct __test_flex_array *obj;
 	int count = 0;
-	int err = 0;
 	int var;
 	volatile int unconst = 0;
 
 	/* Verify constant expression against runtime version. */
 	var = 55;
 	OPTIMIZER_HIDE_VAR(var);
-	err |= sizeof(ce_array) != struct_size(obj, data, var);
+	KUNIT_EXPECT_EQ(test, sizeof(ce_array), struct_size(obj, data, var));
 
-#define check_one_size_helper(expected, func, args...)	({	\
-	bool __failure = false;					\
-	size_t _r;						\
-								\
-	_r = func(args);					\
-	if (_r != (expected)) {					\
-		pr_warn("expected " #func "(" #args ") "	\
-			"to return %zu but got %zu instead\n",	\
-			(size_t)(expected), _r);		\
-		__failure = true;				\
-	}							\
+#define check_one_size_helper(expected, func, args...)	do {	\
+	size_t _r = func(args);					\
+	KUNIT_EXPECT_EQ_MSG(test, _r, expected,			\
+		"expected " #func "(" #args ") to return %zu but got %zu instead\n", \
+		(size_t)(expected), _r);			\
 	count++;						\
-	__failure;						\
-})
+} while (0)
 
 	var = 4;
-	err |= check_one_size_helper(20,       size_mul, var++, 5);
-	err |= check_one_size_helper(20,       size_mul, 4, var++);
-	err |= check_one_size_helper(0,	       size_mul, 0, 3);
-	err |= check_one_size_helper(0,	       size_mul, 3, 0);
-	err |= check_one_size_helper(6,	       size_mul, 2, 3);
-	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX,  1);
-	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX,  3);
-	err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3);
+	check_one_size_helper(20,	size_mul, var++, 5);
+	check_one_size_helper(20,	size_mul, 4, var++);
+	check_one_size_helper(0,	size_mul, 0, 3);
+	check_one_size_helper(0,	size_mul, 3, 0);
+	check_one_size_helper(6,	size_mul, 2, 3);
+	check_one_size_helper(SIZE_MAX,	size_mul, SIZE_MAX,  1);
+	check_one_size_helper(SIZE_MAX,	size_mul, SIZE_MAX,  3);
+	check_one_size_helper(SIZE_MAX,	size_mul, SIZE_MAX, -3);
 
 	var = 4;
-	err |= check_one_size_helper(9,        size_add, var++, 5);
-	err |= check_one_size_helper(9,        size_add, 4, var++);
-	err |= check_one_size_helper(9,	       size_add, 9, 0);
-	err |= check_one_size_helper(9,	       size_add, 0, 9);
-	err |= check_one_size_helper(5,	       size_add, 2, 3);
-	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  1);
-	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  3);
-	err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3);
+	check_one_size_helper(9,	size_add, var++, 5);
+	check_one_size_helper(9,	size_add, 4, var++);
+	check_one_size_helper(9,	size_add, 9, 0);
+	check_one_size_helper(9,	size_add, 0, 9);
+	check_one_size_helper(5,	size_add, 2, 3);
+	check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  1);
+	check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX,  3);
+	check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3);
 
 	var = 4;
-	err |= check_one_size_helper(1,        size_sub, var--, 3);
-	err |= check_one_size_helper(1,        size_sub, 4, var--);
-	err |= check_one_size_helper(1,        size_sub, 3, 2);
-	err |= check_one_size_helper(9,	       size_sub, 9, 0);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX,  0);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, 0,  SIZE_MAX);
-	err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX);
-	err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1,  1);
-	err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1,  3);
-	err |= check_one_size_helper(1,		size_sub, SIZE_MAX - 1, -3);
+	check_one_size_helper(1,	size_sub, var--, 3);
+	check_one_size_helper(1,	size_sub, 4, var--);
+	check_one_size_helper(1,	size_sub, 3, 2);
+	check_one_size_helper(9,	size_sub, 9, 0);
+	check_one_size_helper(SIZE_MAX, size_sub, 9, -3);
+	check_one_size_helper(SIZE_MAX, size_sub, 0, 9);
+	check_one_size_helper(SIZE_MAX, size_sub, 2, 3);
+	check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX,  0);
+	check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10);
+	check_one_size_helper(SIZE_MAX, size_sub, 0,  SIZE_MAX);
+	check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX);
+	check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1,  1);
+	check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1,  3);
+	check_one_size_helper(1,		size_sub, SIZE_MAX - 1, -3);
 
 	var = 4;
-	err |= check_one_size_helper(4 * sizeof(*obj->data),
-				     flex_array_size, obj, data, var++);
-	err |= check_one_size_helper(5 * sizeof(*obj->data),
-				     flex_array_size, obj, data, var++);
-	err |= check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst);
-	err |= check_one_size_helper(sizeof(*obj->data),
-				     flex_array_size, obj, data, 1 + unconst);
-	err |= check_one_size_helper(7 * sizeof(*obj->data),
-				     flex_array_size, obj, data, 7 + unconst);
-	err |= check_one_size_helper(SIZE_MAX,
-				     flex_array_size, obj, data, -1 + unconst);
-	err |= check_one_size_helper(SIZE_MAX,
-				     flex_array_size, obj, data, SIZE_MAX - 4 + unconst);
+	check_one_size_helper(4 * sizeof(*obj->data),
+			      flex_array_size, obj, data, var++);
+	check_one_size_helper(5 * sizeof(*obj->data),
+			      flex_array_size, obj, data, var++);
+	check_one_size_helper(0, flex_array_size, obj, data, 0 + unconst);
+	check_one_size_helper(sizeof(*obj->data),
+			      flex_array_size, obj, data, 1 + unconst);
+	check_one_size_helper(7 * sizeof(*obj->data),
+			      flex_array_size, obj, data, 7 + unconst);
+	check_one_size_helper(SIZE_MAX,
+			      flex_array_size, obj, data, -1 + unconst);
+	check_one_size_helper(SIZE_MAX,
+			      flex_array_size, obj, data, SIZE_MAX - 4 + unconst);
 
 	var = 4;
-	err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
-				     struct_size, obj, data, var++);
-	err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
-				     struct_size, obj, data, var++);
-	err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst);
-	err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
-				     struct_size, obj, data, 1 + unconst);
-	err |= check_one_size_helper(SIZE_MAX,
-				     struct_size, obj, data, -3 + unconst);
-	err |= check_one_size_helper(SIZE_MAX,
-				     struct_size, obj, data, SIZE_MAX - 3 + unconst);
-
-	pr_info("%d overflow size helper tests finished\n", count);
-
-	return err;
+	check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)),
+			      struct_size, obj, data, var++);
+	check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)),
+			      struct_size, obj, data, var++);
+	check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0 + unconst);
+	check_one_size_helper(sizeof(*obj) + sizeof(*obj->data),
+			      struct_size, obj, data, 1 + unconst);
+	check_one_size_helper(SIZE_MAX,
+			      struct_size, obj, data, -3 + unconst);
+	check_one_size_helper(SIZE_MAX,
+			      struct_size, obj, data, SIZE_MAX - 3 + unconst);
+
+	kunit_info(test, "%d overflow size helper tests finished\n", count);
+#undef check_one_size_helper
 }
 
-static int __init test_module_init(void)
-{
-	int err = 0;
-
-	err |= test_overflow_calculation();
-	err |= test_overflow_shift();
-	err |= test_overflow_size_helpers();
-	err |= test_overflow_allocation();
-
-	if (err) {
-		pr_warn("FAIL!\n");
-		err = -EINVAL;
-	} else {
-		pr_info("all tests passed\n");
-	}
+static struct kunit_case overflow_test_cases[] = {
+	KUNIT_CASE(u8_overflow_test),
+	KUNIT_CASE(s8_overflow_test),
+	KUNIT_CASE(u16_overflow_test),
+	KUNIT_CASE(s16_overflow_test),
+	KUNIT_CASE(u32_overflow_test),
+	KUNIT_CASE(s32_overflow_test),
+#if BITS_PER_LONG == 64
+	KUNIT_CASE(u64_overflow_test),
+	KUNIT_CASE(s64_overflow_test),
+#endif
+	KUNIT_CASE(overflow_shift_test),
+	KUNIT_CASE(overflow_allocation_test),
+	KUNIT_CASE(overflow_size_helpers_test),
+	{}
+};
 
-	return err;
-}
+static struct kunit_suite overflow_test_suite = {
+	.name = "overflow",
+	.test_cases = overflow_test_cases,
+};
 
-static void __exit test_module_exit(void)
-{ }
+kunit_test_suite(overflow_test_suite);
 
-module_init(test_module_init);
-module_exit(test_module_exit);
 MODULE_LICENSE("Dual MIT/GPL");
-- 
GitLab


From e52432e164230929fe1f7b5a67bda0cc870f66d5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 19 Jan 2022 10:43:54 -0800
Subject: [PATCH 0690/1586] tpm: vtpm_proxy: Check length to avoid compiler
 warning

When building with -Warray-bounds under GCC 11.2, this warning was
emitted:

In function 'memset',
    inlined from 'vtpm_proxy_fops_read' at drivers/char/tpm/tpm_vtpm_proxy.c:102:2:
./include/linux/fortify-string.h:43:33: warning: '__builtin_memset' pointer overflow between offset 164 and size [2147483648, 4294967295]
[-Warray-bounds]
   43 | #define __underlying_memset     __builtin_memset
      |                                 ^

This warning appears to be triggered due to the "count < len"
check in vtpm_proxy_fops_read() splitting the CFG[1], and the compiler
attempting to reason about the possible value range in len compared
to the buffer size.

In order to silence this warning, and to keep this code robust if the
use of proxy_dev->req_len ever changes in the future, explicitly check
the size of len before reaching the memset().

[1] https://lore.kernel.org/lkml/CAG48ez1iTF9KegKJrW5a3WzXgCPZJ73nS2_e5esKJRppdzvv8g@mail.gmail.com

Cc: Peter Huewe <peterhuewe@gmx.de>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: linux-integrity@vger.kernel.org
Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
Link: https://lore.kernel.org/lkml/4b59d305-6858-1514-751a-37853ad777be@linux.ibm.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220119184354.3367603-1-keescook@chromium.org
---
 drivers/char/tpm/tpm_vtpm_proxy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 91c772e38bb54..5c865987ba5c1 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -91,7 +91,7 @@ static ssize_t vtpm_proxy_fops_read(struct file *filp, char __user *buf,
 
 	len = proxy_dev->req_len;
 
-	if (count < len) {
+	if (count < len || len > sizeof(proxy_dev->buffer)) {
 		mutex_unlock(&proxy_dev->buf_lock);
 		pr_debug("Invalid size in recv: count=%zd, req_len=%zd\n",
 			 count, len);
-- 
GitLab


From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 24 May 2021 21:15:15 -0700
Subject: [PATCH 0691/1586] media: omap3isp: Use struct_group() for memcpy()
 region

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields. Wrap the target region
in struct_group(). This additionally fixes a theoretical misalignment
of the copy (since the size of "buf" changes between 64-bit and 32-bit,
but this is likely never built for 64-bit).

FWIW, I think this code is totally broken on 64-bit (which appears to
not be a "real" build configuration): it would either always fail (with
an uninitialized data->buf_size) or would cause corruption in userspace
due to the copy_to_user() in the call path against an uninitialized
data->buf value:

omap3isp_stat_request_statistics_time32(...)
    struct omap3isp_stat_data data64;
    ...
    omap3isp_stat_request_statistics(stat, &data64);

int omap3isp_stat_request_statistics(struct ispstat *stat,
                                     struct omap3isp_stat_data *data)
    ...
    buf = isp_stat_buf_get(stat, data);

static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat,
                                               struct omap3isp_stat_data *data)
...
    if (buf->buf_size > data->buf_size) {
            ...
            return ERR_PTR(-EINVAL);
    }
    ...
    rval = copy_to_user(data->buf,
                        buf->virt_addr,
                        buf->buf_size);

Regardless, additionally initialize data64 to be zero-filled to avoid
undefined behavior.

Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: linux-media@vger.kernel.org
Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data")
Cc: stable@vger.kernel.org
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/media/platform/omap3isp/ispstat.c |  5 +++--
 include/uapi/linux/omap3isp.h             | 21 +++++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c
index 5b9b57f4d9bf8..68cf68dbcace2 100644
--- a/drivers/media/platform/omap3isp/ispstat.c
+++ b/drivers/media/platform/omap3isp/ispstat.c
@@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat,
 int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
 					struct omap3isp_stat_data_time32 *data)
 {
-	struct omap3isp_stat_data data64;
+	struct omap3isp_stat_data data64 = { };
 	int ret;
 
 	ret = omap3isp_stat_request_statistics(stat, &data64);
@@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat,
 
 	data->ts.tv_sec = data64.ts.tv_sec;
 	data->ts.tv_usec = data64.ts.tv_usec;
-	memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts));
+	data->buf = (uintptr_t)data64.buf;
+	memcpy(&data->frame, &data64.frame, sizeof(data->frame));
 
 	return 0;
 }
diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h
index 87b55755f4ffe..d9db7ad438908 100644
--- a/include/uapi/linux/omap3isp.h
+++ b/include/uapi/linux/omap3isp.h
@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config {
  * struct omap3isp_stat_data - Statistic data sent to or received from user
  * @ts: Timestamp of returned framestats.
  * @buf: Pointer to pass to user.
+ * @buf_size: Size of buffer.
  * @frame_number: Frame number of requested stats.
  * @cur_frame: Current frame number being processed.
  * @config_counter: Number of the configuration associated with the data.
@@ -176,10 +177,12 @@ struct omap3isp_stat_data {
 	struct timeval ts;
 #endif
 	void __user *buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 
 #ifdef __KERNEL__
@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 {
 		__s32	tv_usec;
 	} ts;
 	__u32 buf;
-	__u32 buf_size;
-	__u16 frame_number;
-	__u16 cur_frame;
-	__u16 config_counter;
+	__struct_group(/* no tag */, frame, /* no attrs */,
+		__u32 buf_size;
+		__u16 frame_number;
+		__u16 cur_frame;
+		__u16 config_counter;
+	);
 };
 #endif
 
-- 
GitLab


From 451f0b6f4c44d7b649ae609157b114b71f6d7875 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Feb 2022 19:14:40 +0100
Subject: [PATCH 0692/1586] block: default BLOCK_LEGACY_AUTOLOAD to y

As Luis reported, losetup currently doesn't properly create the loop
device without this if the device node already exists because old
scripts created it manually.  So default to y for now and remove the
aggressive removal schedule.

Reported-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220225181440.1351591-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig | 8 +++-----
 block/bdev.c  | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index 168b873eb666d..7eb5d6d53b3fc 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -28,15 +28,13 @@ if BLOCK
 
 config BLOCK_LEGACY_AUTOLOAD
 	bool "Legacy autoloading support"
+	default y
 	help
 	  Enable loading modules and creating block device instances based on
 	  accesses through their device special file.  This is a historic Linux
 	  feature and makes no sense in a udev world where device files are
-	  created on demand.
-
-	  Say N here unless booting or other functionality broke without it, in
-	  which case you should also send a report to your distribution and
-	  linux-block@vger.kernel.org.
+	  created on demand, but scripts that manually create device nodes and
+	  then call losetup might rely on this behavior.
 
 config BLK_RQ_ALLOC_TIME
 	bool
diff --git a/block/bdev.c b/block/bdev.c
index c687726445660..a3632317c8aae 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -738,7 +738,7 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 		inode = ilookup(blockdev_superblock, dev);
 		if (inode)
 			pr_warn_ratelimited(
-"block device autoloading is deprecated. It will be removed in Linux 5.19\n");
+"block device autoloading is deprecated and will be removed.\n");
 	}
 	if (!inode)
 		return NULL;
-- 
GitLab


From 4a09a845c1773fe3cd96c2afe737b58a88e3d30b Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@ionos.com>
Date: Fri, 14 Jan 2022 16:58:54 +0100
Subject: [PATCH 0693/1586] block/rnbd-clt: fix CHECK:BRACES warning

This patch fix the "CHECK:BRACES: braces {} should be used on all
arms of this statement" warning from checkpatch

Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Link: https://lore.kernel.org/r/20220114155855.984144-2-haris.iqbal@ionos.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index c08971de369fc..f0370a345c3e7 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1262,9 +1262,9 @@ find_and_get_or_create_sess(const char *sessname,
 	struct rtrs_clt_ops rtrs_ops;
 
 	sess = find_or_create_sess(sessname, &first);
-	if (sess == ERR_PTR(-ENOMEM))
+	if (sess == ERR_PTR(-ENOMEM)) {
 		return ERR_PTR(-ENOMEM);
-	else if ((nr_poll_queues && !first) ||  (!nr_poll_queues && sess->nr_poll_queues)) {
+	} else if ((nr_poll_queues && !first) ||  (!nr_poll_queues && sess->nr_poll_queues)) {
 		/*
 		 * A device MUST have its own session to use the polling-mode.
 		 * It must fail to map new device with the same session.
-- 
GitLab


From 030ce8ba97d2e85e5310b1ae8236cd640bd384d0 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@ionos.com>
Date: Fri, 14 Jan 2022 16:58:55 +0100
Subject: [PATCH 0694/1586] block/rnbd: client device does not care
 queue/rotational

On client side, the device is a network device. There is no reason
to set rotational even-if the target device on server is rotational.

Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Link: https://lore.kernel.org/r/20220114155855.984144-3-haris.iqbal@ionos.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c   | 11 ++++++-----
 drivers/block/rnbd/rnbd-clt.h   |  1 -
 drivers/block/rnbd/rnbd-proto.h |  4 ++--
 drivers/block/rnbd/rnbd-srv.c   |  1 -
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index f0370a345c3e7..9a880d559ab86 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -87,7 +87,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
 	dev->discard_granularity    = le32_to_cpu(rsp->discard_granularity);
 	dev->discard_alignment	    = le32_to_cpu(rsp->discard_alignment);
 	dev->secure_discard	    = le16_to_cpu(rsp->secure_discard);
-	dev->rotational		    = rsp->rotational;
 	dev->wc			    = !!(rsp->cache_policy & RNBD_WRITEBACK);
 	dev->fua		    = !!(rsp->cache_policy & RNBD_FUA);
 
@@ -1410,8 +1409,10 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
 		dev->read_only = false;
 	}
 
-	if (!dev->rotational)
-		blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
+	/*
+	 * Network device does not need rotational
+	 */
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
 	err = add_disk(dev->gd);
 	if (err)
 		blk_cleanup_disk(dev->gd);
@@ -1610,13 +1611,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
 	}
 
 	rnbd_clt_info(dev,
-		       "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n",
+		       "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
 		       dev->gd->disk_name, dev->nsectors,
 		       dev->logical_block_size, dev->physical_block_size,
 		       dev->max_write_same_sectors, dev->max_discard_sectors,
 		       dev->discard_granularity, dev->discard_alignment,
 		       dev->secure_discard, dev->max_segments,
-		       dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua);
+		       dev->max_hw_sectors, dev->wc, dev->fua);
 
 	mutex_unlock(&dev->lock);
 	rnbd_clt_put_sess(sess);
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 0c2cae7f39b9f..62bf7c3fa63c6 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -118,7 +118,6 @@ struct rnbd_clt_dev {
 	enum rnbd_access_mode	access_mode;
 	u32			nr_poll_queues;
 	bool			read_only;
-	bool			rotational;
 	bool			wc;
 	bool			fua;
 	u32			max_hw_sectors;
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index de5d5a8df81d7..c4a68b3a1cbe8 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -128,7 +128,7 @@ enum rnbd_cache_policy {
  * @logical_block_size: logical block size device supports in bytes
  * @max_segments:	max segments hardware support in one transfer
  * @secure_discard:	supports secure discard
- * @rotation:		is a rotational disc?
+ * @obsolete_rotational: obsolete, not in used.
  * @cache_policy: 	support write-back caching or FUA?
  */
 struct rnbd_msg_open_rsp {
@@ -144,7 +144,7 @@ struct rnbd_msg_open_rsp {
 	__le16			logical_block_size;
 	__le16			max_segments;
 	__le16			secure_discard;
-	u8			rotational;
+	u8			obsolete_rotational;
 	u8			cache_policy;
 	u8			reserved[10];
 };
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 132e950685d59..6499efae5c43e 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -558,7 +558,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
 		cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev));
 	rsp->secure_discard =
 		cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
-	rsp->rotational = !blk_queue_nonrot(q);
 	rsp->cache_policy = 0;
 	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		rsp->cache_policy |= RNBD_WRITEBACK;
-- 
GitLab


From 24afc15dbe218f860994f627b4ba1fb09225a298 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 7 Feb 2022 21:48:19 +0100
Subject: [PATCH 0695/1586] block/rnbd: Remove a useless mutex

According to lib/idr.c,
   The IDA handles its own locking.  It is safe to call any of the IDA
   functions without synchronisation in your code.

so the 'ida_lock' mutex can just be removed.
It is here only to protect some ida_simple_get()/ida_simple_remove() calls.

While at it, switch to ida_alloc_XXX()/ida_free() instead to
ida_simple_get()/ida_simple_remove().
The latter is deprecated and more verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Link: https://lore.kernel.org/r/7f9eccd8b1fce1bac45ac9b01a78cf72f54c0a61.1644266862.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 9a880d559ab86..1f63f308eb394 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -23,7 +23,6 @@ MODULE_LICENSE("GPL");
 
 static int rnbd_client_major;
 static DEFINE_IDA(index_ida);
-static DEFINE_MUTEX(ida_lock);
 static DEFINE_MUTEX(sess_lock);
 static LIST_HEAD(sess_list);
 
@@ -55,9 +54,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev)
 	if (!refcount_dec_and_test(&dev->refcount))
 		return;
 
-	mutex_lock(&ida_lock);
-	ida_simple_remove(&index_ida, dev->clt_device_id);
-	mutex_unlock(&ida_lock);
+	ida_free(&index_ida, dev->clt_device_id);
 	kfree(dev->hw_queues);
 	kfree(dev->pathname);
 	rnbd_clt_put_sess(dev->sess);
@@ -1460,10 +1457,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
 		goto out_alloc;
 	}
 
-	mutex_lock(&ida_lock);
-	ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS),
-			     GFP_KERNEL);
-	mutex_unlock(&ida_lock);
+	ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS),
+			    GFP_KERNEL);
 	if (ret < 0) {
 		pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",
 		       pathname, sess->sessname, ret);
-- 
GitLab


From d9a74051a73c4fbd065ae806b8da151cbded84f1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 8 Feb 2022 11:46:56 +0000
Subject: [PATCH 0696/1586] loop: clean up grammar in warning message

The phrase "has still" should be "still has" to clean up the grammar.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20220208114656.61629-1-colin.i.king@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 01cbbfc4e9e24..bdea448d24194 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1262,7 +1262,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
 		/* If any pages were dirtied after invalidate_bdev(), try again */
 		err = -EAGAIN;
-		pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+		pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
 			__func__, lo->lo_number, lo->lo_file_name,
 			lo->lo_device->bd_inode->i_mapping->nrpages);
 		goto out_unfreeze;
@@ -1482,7 +1482,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 	/* invalidate_bdev should have truncated all the pages */
 	if (lo->lo_device->bd_inode->i_mapping->nrpages) {
 		err = -EAGAIN;
-		pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+		pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
 			__func__, lo->lo_number, lo->lo_file_name,
 			lo->lo_device->bd_inode->i_mapping->nrpages);
 		goto out_unfreeze;
-- 
GitLab


From a75110c3b36959d04d3a586ba43510ddf9b410b5 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 03:59:51 -0800
Subject: [PATCH 0697/1586] null_blk: fix return value from null_add_dev()

The function nullb_device_power_store() returns -ENOMEM when
null_add_dev() fails. null_add_dev() can fail with return value
other than -ENOMEM such as -EINVAL when Zoned Block Device option
is used, see :

nullb_device_power_store()
 null_add_dev()
  null_init_zoned_dev()
	return -EINVAL;

When trying to load the module having -ENOMEM value returned on the
command line creates confusion when pleanty of memory is free on the
machine.

Instead of hardcoding -ENOMEM return the value of null_add_dev()
function.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220215115951.15945-1-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 13004beb48cab..90b6bd2a114b2 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -431,9 +431,10 @@ static ssize_t nullb_device_power_store(struct config_item *item,
 	if (!dev->power && newp) {
 		if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
 			return count;
-		if (null_add_dev(dev)) {
+		ret = null_add_dev(dev);
+		if (ret) {
 			clear_bit(NULLB_DEV_FL_UP, &dev->flags);
-			return -ENOMEM;
+			return ret;
 		}
 
 		set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
-- 
GitLab


From b27824d31f09ea7b4a6ba2c1b18bd328df3e8bed Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 13:33:07 -0800
Subject: [PATCH 0698/1586] loop: use sysfs_emit() in the sysfs xxx show()

sprintf does not know the PAGE_SIZE maximum of the temporary buffer
used for outputting sysfs content and it's possible to overrun the
PAGE_SIZE buffer length.

Use a generic sysfs_emit function that knows the size of the
temporary buffer and ensures that no overrun is done for offset
attribute in
loop_attr_[offset|sizelimit|autoclear|partscan|dio]_show() callbacks.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20220215213310.7264-2-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bdea448d24194..a55e5eda1d174 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -680,33 +680,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
 
 static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
 {
-	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+	return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
 }
 
 static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
 {
-	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+	return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
 }
 
 static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
 {
 	int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
 
-	return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+	return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
 }
 
 static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
 {
 	int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
 
-	return sprintf(buf, "%s\n", partscan ? "1" : "0");
+	return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
 }
 
 static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
 {
 	int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
 
-	return sprintf(buf, "%s\n", dio ? "1" : "0");
+	return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
 }
 
 LOOP_ATTR_RO(backing_file);
-- 
GitLab


From 0aab29b85478b994422f1551ce36e5640b09db2b Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 13:33:08 -0800
Subject: [PATCH 0699/1586] loop: remove extra variable in lo_fallocate()

The local variable q is used to pass it to the blk_queue_discard(). We
can get away with using lo->lo_queue instead of storing in a local
variable which is not used anywhere else.

No functional change in this patch.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20220215213310.7264-3-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a55e5eda1d174..77c61eaaa6e4e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -308,12 +308,11 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
 	 * a.k.a. discard/zerorange.
 	 */
 	struct file *file = lo->lo_backing_file;
-	struct request_queue *q = lo->lo_queue;
 	int ret;
 
 	mode |= FALLOC_FL_KEEP_SIZE;
 
-	if (!blk_queue_discard(q)) {
+	if (!blk_queue_discard(lo->lo_queue)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
-- 
GitLab


From 9c64e38cc639537ad9b542635af1733e8cb5e19b Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 13:33:09 -0800
Subject: [PATCH 0700/1586] loop: remove extra variable in lo_req_flush

The local variable file is used to pass it to the vfs_fsync(). We can
get away with using lo->lo_backing_file instead of storing in a local
variable which is not used anywhere else.

No functional change in this patch.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20220215213310.7264-4-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 77c61eaaa6e4e..18b30a56bfc47 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -326,8 +326,7 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
 
 static int lo_req_flush(struct loop_device *lo, struct request *rq)
 {
-	struct file *file = lo->lo_backing_file;
-	int ret = vfs_fsync(file, 0);
+	int ret = vfs_fsync(lo->lo_backing_file, 0);
 	if (unlikely(ret && ret != -EINVAL))
 		ret = -EIO;
 
-- 
GitLab


From ef44c50837ab7818920bd9994b36d6e573312abc Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 15 Feb 2022 13:33:10 -0800
Subject: [PATCH 0701/1586] loop: allow user to set the queue depth

Instead of hardcoding queue depth allow user to set the hw queue depth
using module parameter. Set default value to 128 to retain the existing
behavior.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20220215213310.7264-5-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 18b30a56bfc47..8fb89d0624fc5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -85,6 +85,7 @@
 #include <linux/uaccess.h>
 
 #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
+#define LOOP_DEFAULT_HW_Q_DEPTH (128)
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
@@ -1785,6 +1786,24 @@ module_param(max_loop, int, 0444);
 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
+
+static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
+
+static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
+{
+	int ret = kstrtoint(s, 10, &hw_queue_depth);
+
+	return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
+}
+
+static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
+	.set	= loop_set_hw_queue_depth,
+	.get	= param_get_int,
+};
+
+device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
+
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
 
@@ -1979,7 +1998,7 @@ static int loop_add(int i)
 
 	lo->tag_set.ops = &loop_mq_ops;
 	lo->tag_set.nr_hw_queues = 1;
-	lo->tag_set.queue_depth = 128;
+	lo->tag_set.queue_depth = hw_queue_depth;
 	lo->tag_set.numa_node = NUMA_NO_NODE;
 	lo->tag_set.cmd_size = sizeof(struct loop_cmd);
 	lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
-- 
GitLab


From 3d3472f3ed419fe1a9d3f881a4905fa8e03d750c Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Wed, 16 Feb 2022 09:29:45 -0800
Subject: [PATCH 0702/1586] null_blk: remove hardcoded alloc_cmd() parameter

Only caller of alloc_cmd() is null_submit_bio() unconditionally sets
second parameter to true and that is statically hard-coded in null_blk.
There is no point in having statically hardcoded function parameter.

Remove the unnecessary parameter can_wait and adjust the code so it
can retain existing behavior of waiting when we don't get valid
nullb_cmd from __alloc_cmd() in alloc_cmd().

The restructured code avoids multiple return statements, multiple
calls to __alloc_cmd() and resulting a fast path call to
prepare_to_wait() due to removal of first alloc_cmd() call.

Follow the pattern that we have in bio_alloc() to set the structure
members in the structure allocation function in alloc_cmd() and pass
bio to initialize newly allocated cmd->bio member.

Follow the pattern in copy_to_nullb() to use result of one function call
(null_cache_active()) to be used as a parameter to another function call
(null_insert_page()), use result of alloc_cmd() as a first parameter to
the null_handle_cmd() in null_submit_bio() function. This allow us to
remove the local variable cmd on stack in null_submit_bio() that is in
fast path.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Link: https://lore.kernel.org/r/20220216172945.31124-2-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 90b6bd2a114b2..29e183719e773 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -720,26 +720,25 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
 	return NULL;
 }
 
-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
 {
 	struct nullb_cmd *cmd;
 	DEFINE_WAIT(wait);
 
-	cmd = __alloc_cmd(nq);
-	if (cmd || !can_wait)
-		return cmd;
-
 	do {
-		prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+		/*
+		 * This avoids multiple return statements, multiple calls to
+		 * __alloc_cmd() and a fast path call to prepare_to_wait().
+		 */
 		cmd = __alloc_cmd(nq);
-		if (cmd)
-			break;
-
+		if (cmd) {
+			cmd->bio = bio;
+			return cmd;
+		}
+		prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
 		io_schedule();
+		finish_wait(&nq->wait, &wait);
 	} while (1);
-
-	finish_wait(&nq->wait, &wait);
-	return cmd;
 }
 
 static void end_cmd(struct nullb_cmd *cmd)
@@ -1477,12 +1476,8 @@ static void null_submit_bio(struct bio *bio)
 	sector_t nr_sectors = bio_sectors(bio);
 	struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
 	struct nullb_queue *nq = nullb_to_queue(nullb);
-	struct nullb_cmd *cmd;
-
-	cmd = alloc_cmd(nq, 1);
-	cmd->bio = bio;
 
-	null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
+	null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
 }
 
 static bool should_timeout_request(struct request *rq)
-- 
GitLab


From c90b6b50b42dfdfeb27b02e670e812b69ff364f5 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 22 Feb 2022 07:28:51 -0800
Subject: [PATCH 0703/1586] null_blk: remove hardcoded null_alloc_page() param

Only caller of null_alloc_page() is null_insert_page() unconditionally
sets only parameter to GFP_NOIO and that is statically hard-coded in
null_blk. There is no point in having statically hardcoded function
parameter.

Remove the unnecessary parameter gfp_flags and adjust the code, so it
can retain existing behavior null_alloc_page() with GFP_NOIO.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220222152852.26043-2-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 29e183719e773..80f9a6ba376d5 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -777,15 +777,15 @@ static void null_complete_rq(struct request *rq)
 	end_cmd(blk_mq_rq_to_pdu(rq));
 }
 
-static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+static struct nullb_page *null_alloc_page(void)
 {
 	struct nullb_page *t_page;
 
-	t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+	t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
 	if (!t_page)
 		goto out;
 
-	t_page->page = alloc_pages(gfp_flags, 0);
+	t_page->page = alloc_pages(GFP_NOIO, 0);
 	if (!t_page->page)
 		goto out_freepage;
 
@@ -932,7 +932,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb,
 
 	spin_unlock_irq(&nullb->lock);
 
-	t_page = null_alloc_page(GFP_NOIO);
+	t_page = null_alloc_page();
 	if (!t_page)
 		goto out_lock;
 
-- 
GitLab


From df00b1d26c3c3ff9dae4b572a6ad878ab65334e1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 22 Feb 2022 07:28:52 -0800
Subject: [PATCH 0704/1586] null_blk: null_alloc_page() cleanup

Remove goto labels and use direct returns as error unwinding code only
needs to free t_page variable if we alloc_pages() call fails as having
two labels for one kfree() can be avoided easily.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220222152852.26043-3-kch@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 80f9a6ba376d5..05b1120e66234 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -783,18 +783,16 @@ static struct nullb_page *null_alloc_page(void)
 
 	t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
 	if (!t_page)
-		goto out;
+		return NULL;
 
 	t_page->page = alloc_pages(GFP_NOIO, 0);
-	if (!t_page->page)
-		goto out_freepage;
+	if (!t_page->page) {
+		kfree(t_page);
+		return NULL;
+	}
 
 	memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
 	return t_page;
-out_freepage:
-	kfree(t_page);
-out:
-	return NULL;
 }
 
 static void null_free_page(struct nullb_page *t_page)
-- 
GitLab


From 77c436de01c0f29ba0b745196160184b22adaf60 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Feb 2022 16:46:32 +0100
Subject: [PATCH 0705/1586] mpage: pass the operation to bio_alloc

Refactor the mpage read/write page code to pass the op to bio_alloc
instead of setting it just before the submission.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220222154634.597067-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/mpage.c | 50 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/fs/mpage.c b/fs/mpage.c
index dbfc02e23d97f..6c4b810a21d0a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -57,10 +57,9 @@ static void mpage_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
+static struct bio *mpage_bio_submit(struct bio *bio)
 {
 	bio->bi_end_io = mpage_end_io;
-	bio_set_op_attrs(bio, op, op_flags);
 	guard_bio_eod(bio);
 	submit_bio(bio);
 	return NULL;
@@ -146,16 +145,15 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	struct block_device *bdev = NULL;
 	int length;
 	int fully_mapped = 1;
-	int op_flags;
+	int op = REQ_OP_READ;
 	unsigned nblocks;
 	unsigned relative_block;
 	gfp_t gfp;
 
 	if (args->is_readahead) {
-		op_flags = REQ_RAHEAD;
+		op |= REQ_RAHEAD;
 		gfp = readahead_gfp_mask(page->mapping);
 	} else {
-		op_flags = 0;
 		gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 	}
 
@@ -264,7 +262,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
-		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+		args->bio = mpage_bio_submit(args->bio);
 
 alloc_new:
 	if (args->bio == NULL) {
@@ -273,7 +271,7 @@ alloc_new:
 								page))
 				goto out;
 		}
-		args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), 0,
+		args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), op,
 				      gfp);
 		if (args->bio == NULL)
 			goto confused;
@@ -282,7 +280,7 @@ alloc_new:
 
 	length = first_hole << blkbits;
 	if (bio_add_page(args->bio, page, length, 0) < length) {
-		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+		args->bio = mpage_bio_submit(args->bio);
 		goto alloc_new;
 	}
 
@@ -290,7 +288,7 @@ alloc_new:
 	nblocks = map_bh->b_size >> blkbits;
 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
 	    (first_hole != blocks_per_page))
-		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+		args->bio = mpage_bio_submit(args->bio);
 	else
 		args->last_block_in_bio = blocks[blocks_per_page - 1];
 out:
@@ -298,7 +296,7 @@ out:
 
 confused:
 	if (args->bio)
-		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
+		args->bio = mpage_bio_submit(args->bio);
 	if (!PageUptodate(page))
 		block_read_full_page(page, args->get_block);
 	else
@@ -361,7 +359,7 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
 		put_page(page);
 	}
 	if (args.bio)
-		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
+		mpage_bio_submit(args.bio);
 }
 EXPORT_SYMBOL(mpage_readahead);
 
@@ -378,7 +376,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 
 	args.bio = do_mpage_readpage(&args);
 	if (args.bio)
-		mpage_bio_submit(REQ_OP_READ, 0, args.bio);
+		mpage_bio_submit(args.bio);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpage);
@@ -469,7 +467,6 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
 	int ret = 0;
-	int op_flags = wbc_to_write_flags(wbc);
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -577,7 +574,7 @@ page_is_mapped:
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (bio && mpd->last_block_in_bio != blocks[0] - 1)
-		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+		bio = mpage_bio_submit(bio);
 
 alloc_new:
 	if (bio == NULL) {
@@ -586,9 +583,10 @@ alloc_new:
 								page, wbc))
 				goto out;
 		}
-		bio = bio_alloc(bdev, BIO_MAX_VECS, 0, GFP_NOFS);
+		bio = bio_alloc(bdev, BIO_MAX_VECS,
+				REQ_OP_WRITE | wbc_to_write_flags(wbc),
+				GFP_NOFS);
 		bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
-
 		wbc_init_bio(wbc, bio);
 		bio->bi_write_hint = inode->i_write_hint;
 	}
@@ -601,7 +599,7 @@ alloc_new:
 	wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
 	length = first_unmapped << blkbits;
 	if (bio_add_page(bio, page, length, 0) < length) {
-		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+		bio = mpage_bio_submit(bio);
 		goto alloc_new;
 	}
 
@@ -611,7 +609,7 @@ alloc_new:
 	set_page_writeback(page);
 	unlock_page(page);
 	if (boundary || (first_unmapped != blocks_per_page)) {
-		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+		bio = mpage_bio_submit(bio);
 		if (boundary_block) {
 			write_boundary_block(boundary_bdev,
 					boundary_block, 1 << blkbits);
@@ -623,7 +621,7 @@ alloc_new:
 
 confused:
 	if (bio)
-		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
+		bio = mpage_bio_submit(bio);
 
 	if (mpd->use_writepage) {
 		ret = mapping->a_ops->writepage(page, wbc);
@@ -679,11 +677,8 @@ mpage_writepages(struct address_space *mapping,
 		};
 
 		ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
-		if (mpd.bio) {
-			int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
-				  REQ_SYNC : 0);
-			mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
-		}
+		if (mpd.bio)
+			mpage_bio_submit(mpd.bio);
 	}
 	blk_finish_plug(&plug);
 	return ret;
@@ -700,11 +695,8 @@ int mpage_writepage(struct page *page, get_block_t get_block,
 		.use_writepage = 0,
 	};
 	int ret = __mpage_writepage(page, wbc, &mpd);
-	if (mpd.bio) {
-		int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
-			  REQ_SYNC : 0);
-		mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
-	}
+	if (mpd.bio)
+		mpage_bio_submit(mpd.bio);
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepage);
-- 
GitLab


From 4c4dad11ff85e25b64dd62adee63463e40cc596b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Feb 2022 16:46:33 +0100
Subject: [PATCH 0706/1586] ext4: pass the operation to bio_alloc

Refactor the readpage code to pass the op to bio_alloc instead of setting
it just before the submission.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Link: https://lore.kernel.org/r/20220222154634.597067-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/ext4/page-io.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1253982268730..35ada7baf41e5 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -372,10 +372,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	struct bio *bio = io->io_bio;
 
 	if (bio) {
-		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
-				  REQ_SYNC : 0;
+		if (io->io_wbc->sync_mode == WB_SYNC_ALL)
+			io->io_bio->bi_opf |= REQ_SYNC;
 		io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
-		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 		submit_bio(io->io_bio);
 	}
 	io->io_bio = NULL;
@@ -398,7 +397,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
 	 * bio_alloc will _always_ be able to allocate a bio if
 	 * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
 	 */
-	bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, 0, GFP_NOIO);
+	bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO);
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_end_io = ext4_end_bio;
-- 
GitLab


From fbe7c2ef5e1d1c218190c059432cdc87043c1275 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Feb 2022 16:46:34 +0100
Subject: [PATCH 0707/1586] nilfs2: pass the operation to bio_alloc

Refactor the segbuf write code to pass the op to bio_alloc instead of
setting it just before the submission.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Link: https://lore.kernel.org/r/20220222154634.597067-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/nilfs2/segbuf.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 4f71faacd8253..a3bb0c856ec80 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -337,8 +337,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 }
 
 static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
-				   struct nilfs_write_info *wi, int mode,
-				   int mode_flags)
+				   struct nilfs_write_info *wi)
 {
 	struct bio *bio = wi->bio;
 	int err;
@@ -356,7 +355,6 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
 
 	bio->bi_end_io = nilfs_end_bio_write;
 	bio->bi_private = segbuf;
-	bio_set_op_attrs(bio, mode, mode_flags);
 	submit_bio(bio);
 	segbuf->sb_nbio++;
 
@@ -384,15 +382,15 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
 
 static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
 				  struct nilfs_write_info *wi,
-				  struct buffer_head *bh, int mode)
+				  struct buffer_head *bh)
 {
 	int len, err;
 
 	BUG_ON(wi->nr_vecs <= 0);
  repeat:
 	if (!wi->bio) {
-		wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs, 0,
-				    GFP_NOIO);
+		wi->bio = bio_alloc(wi->nilfs->ns_bdev, wi->nr_vecs,
+				    REQ_OP_WRITE, GFP_NOIO);
 		wi->bio->bi_iter.bi_sector = (wi->blocknr + wi->end) <<
 			(wi->nilfs->ns_blocksize_bits - 9);
 	}
@@ -403,7 +401,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
 		return 0;
 	}
 	/* bio is FULL */
-	err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0);
+	err = nilfs_segbuf_submit_bio(segbuf, wi);
 	/* never submit current bh */
 	if (likely(!err))
 		goto repeat;
@@ -433,13 +431,13 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 	nilfs_segbuf_prepare_write(segbuf, &wi);
 
 	list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
-		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
+		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
 		if (unlikely(res))
 			goto failed_bio;
 	}
 
 	list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
-		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
+		res = nilfs_segbuf_submit_bh(segbuf, &wi, bh);
 		if (unlikely(res))
 			goto failed_bio;
 	}
@@ -449,8 +447,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE,
-					      REQ_SYNC);
+		wi.bio->bi_opf |= REQ_SYNC;
+		res = nilfs_segbuf_submit_bio(segbuf, &wi);
 	}
 
  failed_bio:
-- 
GitLab


From 483546c11d702fd6f74c8c3f8123b7672def10b2 Mon Sep 17 00:00:00 2001
From: Nian Yanchuan <yanchuan@nfschina.com>
Date: Mon, 28 Feb 2022 01:01:24 +0800
Subject: [PATCH 0708/1586] block: remove redundant semicolon

Remove redundant semicolon from block/bdev.c

Signed-off-by: Nian Yanchuan <yanchuan@nfschina.com>
Link: https://lore.kernel.org/r/20220227170124.GA14658@localhost.localdomain
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bdev.c b/block/bdev.c
index a3632317c8aae..ce8de42a89bea 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -678,7 +678,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
 		bdev_disk_changed(disk, false);
 	bdev->bd_openers++;
-	return 0;;
+	return 0;
 }
 
 static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
-- 
GitLab


From 686d303ee6301261b422ea51e64833d7909a2c36 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 25 Feb 2022 17:06:09 +0100
Subject: [PATCH 0709/1586] hwmon: (pmbus) Add mutex to regulator ops

On PMBUS devices with multiple pages, the regulator ops need to be
protected with the update mutex. This prevents accidentally changing
the page in a separate thread while operating on the PMBUS_OPERATION
register.

Tested on Infineon xdpe11280 while a separate thread polls for sensor
data.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/b991506bcbf665f7af185945f70bf9d5cf04637c.1645804976.git.sylv@sylv.io
Fixes: ddbb4db4ced1b ("hwmon: (pmbus) Add regulator support")
Cc: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index ac2fbee1ba9c0..b1386a4df4cc4 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2391,10 +2391,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
 {
 	struct device *dev = rdev_get_dev(rdev);
 	struct i2c_client *client = to_i2c_client(dev->parent);
+	struct pmbus_data *data = i2c_get_clientdata(client);
 	u8 page = rdev_get_id(rdev);
 	int ret;
 
+	mutex_lock(&data->update_lock);
 	ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+	mutex_unlock(&data->update_lock);
+
 	if (ret < 0)
 		return ret;
 
@@ -2405,11 +2409,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
 {
 	struct device *dev = rdev_get_dev(rdev);
 	struct i2c_client *client = to_i2c_client(dev->parent);
+	struct pmbus_data *data = i2c_get_clientdata(client);
 	u8 page = rdev_get_id(rdev);
+	int ret;
 
-	return pmbus_update_byte_data(client, page, PMBUS_OPERATION,
-				      PB_OPERATION_CONTROL_ON,
-				      enable ? PB_OPERATION_CONTROL_ON : 0);
+	mutex_lock(&data->update_lock);
+	ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION,
+				     PB_OPERATION_CONTROL_ON,
+				     enable ? PB_OPERATION_CONTROL_ON : 0);
+	mutex_unlock(&data->update_lock);
+
+	return ret;
 }
 
 static int pmbus_regulator_enable(struct regulator_dev *rdev)
-- 
GitLab


From 7f3cc8f897634b7e2d79bc2b7105e9ae6eaf4ac2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 10 Jan 2022 14:56:25 -0800
Subject: [PATCH 0710/1586] hwmon: Report attribute name with udev events

Up to now udev events only report the affected hwmon device if an alert
is reported. This requires userspace to read all attributes if it wants
to know what triggered the event. Provide the attribute name with the
NAME property to help userspace find the attribute causing the event.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 3ae961986fc31..0b79c4e1bf99f 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -625,7 +625,9 @@ static const int __templates_size[] = {
 int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
 		       u32 attr, int channel)
 {
+	char event[MAX_SYSFS_ATTR_NAME_LENGTH + 5];
 	char sattr[MAX_SYSFS_ATTR_NAME_LENGTH];
+	char *envp[] = { event, NULL };
 	const char * const *templates;
 	const char *template;
 	int base;
@@ -641,8 +643,9 @@ int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
 	base = hwmon_attr_base(type);
 
 	scnprintf(sattr, MAX_SYSFS_ATTR_NAME_LENGTH, template, base + channel);
+	scnprintf(event, sizeof(event), "NAME=%s", sattr);
 	sysfs_notify(&dev->kobj, NULL, sattr);
-	kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+	kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
 
 	if (type == hwmon_temp)
 		hwmon_thermal_notify(dev, channel);
-- 
GitLab


From 7c68c2c761d157203b64ebbb61dd7b5b6c32df61 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 11:42:38 -0800
Subject: [PATCH 0711/1586] hwmon: (lm83) Reorder include files to be in
 alphabetic order

Reorder include files to be in alphabetic order to simplify
driver maintenance.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 74fd7aa373a3e..44d720af24731 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -18,15 +18,15 @@
  * http://www.national.com/pf/LM/LM82.html
  */
 
-#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/hwmon.h>
-#include <linux/err.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <linux/sysfs.h>
 
 /*
-- 
GitLab


From 11e3377b9a439a5cf989bdb2b16d90e237542ec2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 11:49:04 -0800
Subject: [PATCH 0712/1586] hwmon: (lm83) Move lm83_id to avoid forward
 declaration

There is no need to keep lm83_id at the end of the driver. Move it
forward to where it is needed to avoid a forward declaration.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 44d720af24731..2bb4bceef551a 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -317,7 +317,12 @@ static int lm83_detect(struct i2c_client *new_client,
 	return 0;
 }
 
-static const struct i2c_device_id lm83_id[];
+static const struct i2c_device_id lm83_id[] = {
+	{ "lm83", lm83 },
+	{ "lm82", lm82 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, lm83_id);
 
 static int lm83_probe(struct i2c_client *new_client)
 {
@@ -352,13 +357,6 @@ static int lm83_probe(struct i2c_client *new_client)
  * Driver data (common to all clients)
  */
 
-static const struct i2c_device_id lm83_id[] = {
-	{ "lm83", lm83 },
-	{ "lm82", lm82 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, lm83_id);
-
 static struct i2c_driver lm83_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
-- 
GitLab


From 81de0eea2bbc1b1334d40c4bb420219b603b4c45 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 16:23:31 -0800
Subject: [PATCH 0713/1586] hwmon: (lm83) Replace new_client with client

It has no value to name a variable 'new_client' in probe and detect
functions; it is obvious that the client is new. Use 'client' as
variable name instead.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 2bb4bceef551a..fdd89cc481fa2 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -272,10 +272,10 @@ static const struct attribute_group lm83_group_opt = {
  */
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
-static int lm83_detect(struct i2c_client *new_client,
+static int lm83_detect(struct i2c_client *client,
 		       struct i2c_board_info *info)
 {
-	struct i2c_adapter *adapter = new_client->adapter;
+	struct i2c_adapter *adapter = client->adapter;
 	const char *name;
 	u8 man_id, chip_id;
 
@@ -283,20 +283,20 @@ static int lm83_detect(struct i2c_client *new_client,
 		return -ENODEV;
 
 	/* Detection */
-	if ((i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS1) & 0xA8) ||
-	    (i2c_smbus_read_byte_data(new_client, LM83_REG_R_STATUS2) & 0x48) ||
-	    (i2c_smbus_read_byte_data(new_client, LM83_REG_R_CONFIG) & 0x41)) {
+	if ((i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1) & 0xA8) ||
+	    (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2) & 0x48) ||
+	    (i2c_smbus_read_byte_data(client, LM83_REG_R_CONFIG) & 0x41)) {
 		dev_dbg(&adapter->dev, "LM83 detection failed at 0x%02x\n",
-			new_client->addr);
+			client->addr);
 		return -ENODEV;
 	}
 
 	/* Identification */
-	man_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_MAN_ID);
+	man_id = i2c_smbus_read_byte_data(client, LM83_REG_R_MAN_ID);
 	if (man_id != 0x01)	/* National Semiconductor */
 		return -ENODEV;
 
-	chip_id = i2c_smbus_read_byte_data(new_client, LM83_REG_R_CHIP_ID);
+	chip_id = i2c_smbus_read_byte_data(client, LM83_REG_R_CHIP_ID);
 	switch (chip_id) {
 	case 0x03:
 		name = "lm83";
@@ -324,17 +324,17 @@ static const struct i2c_device_id lm83_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lm83_id);
 
-static int lm83_probe(struct i2c_client *new_client)
+static int lm83_probe(struct i2c_client *client)
 {
 	struct device *hwmon_dev;
 	struct lm83_data *data;
 
-	data = devm_kzalloc(&new_client->dev, sizeof(struct lm83_data),
+	data = devm_kzalloc(&client->dev, sizeof(struct lm83_data),
 			    GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->client = new_client;
+	data->client = client;
 	mutex_init(&data->update_lock);
 
 	/*
@@ -344,11 +344,11 @@ static int lm83_probe(struct i2c_client *new_client)
 	 * declare 1 and 3 common, and then 2 and 4 only for the LM83.
 	 */
 	data->groups[0] = &lm83_group;
-	if (i2c_match_id(lm83_id, new_client)->driver_data == lm83)
+	if (i2c_match_id(lm83_id, client)->driver_data == lm83)
 		data->groups[1] = &lm83_group_opt;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(&new_client->dev,
-							   new_client->name,
+	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
+							   client->name,
 							   data, data->groups);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
-- 
GitLab


From 719af4f1a40bdf46cab7e4db216af7084a70897b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 16:22:00 -0800
Subject: [PATCH 0714/1586] hwmon: (lm83) Use regmap

Using local caching in this driver had few benefits. It used cached values
for two seconds and then re-read all registers from the chip even if the
user only accessed a single attribute. On top of that, alarm attributes
were stale for up to four seconds (the first status register read reports
and clears an alarm, the second reports it cleared). Use regmap instead
for caching. Do not re-read non-volatile registers, and do not cache
volatile registers.

As part of this change, handle register read and write address differences
in regmap code. This is necessary to avoid problems with caching in the
regmap core, and ultimately simplifies the code.

Also, errors observed when reading from and writing to registers are no
longer ignored.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig |   1 +
 drivers/hwmon/lm83.c  | 176 ++++++++++++++++++++++++++----------------
 2 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 8df25f1079bac..01ab80a2cc4a1 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1288,6 +1288,7 @@ config SENSORS_LM80
 config SENSORS_LM83
 	tristate "National Semiconductor LM83 and compatibles"
 	depends on I2C
+	select REGMAP
 	help
 	  If you say yes here you get support for National Semiconductor
 	  LM82 and LM83 sensor chips.
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index fdd89cc481fa2..c9605957e400e 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -21,11 +21,11 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
-#include <linux/jiffies.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 
@@ -77,7 +77,7 @@ enum chips { lm83, lm82 };
 				 (val) < 0 ? ((val) - 500) / 1000 : \
 				 ((val) + 500) / 1000)
 
-static const u8 LM83_REG_R_TEMP[] = {
+static const u8 LM83_REG_TEMP[] = {
 	LM83_REG_R_LOCAL_TEMP,
 	LM83_REG_R_REMOTE1_TEMP,
 	LM83_REG_R_REMOTE2_TEMP,
@@ -89,62 +89,82 @@ static const u8 LM83_REG_R_TEMP[] = {
 	LM83_REG_R_TCRIT,
 };
 
-static const u8 LM83_REG_W_HIGH[] = {
-	LM83_REG_W_LOCAL_HIGH,
-	LM83_REG_W_REMOTE1_HIGH,
-	LM83_REG_W_REMOTE2_HIGH,
-	LM83_REG_W_REMOTE3_HIGH,
-	LM83_REG_W_TCRIT,
-};
-
 /*
  * Client data (each client gets its own)
  */
 
 struct lm83_data {
-	struct i2c_client *client;
+	struct regmap *regmap;
 	const struct attribute_group *groups[3];
-	struct mutex update_lock;
-	bool valid; /* false until following fields are valid */
-	unsigned long last_updated; /* in jiffies */
-
-	/* registers values */
-	s8 temp[9];	/* 0..3: input 1-4,
-			   4..7: high limit 1-4,
-			   8   : critical limit */
-	u16 alarms; /* bitvector, combined */
 };
 
-static struct lm83_data *lm83_update_device(struct device *dev)
+/* regmap code */
+
+static int lm83_regmap_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
-	struct lm83_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated + HZ * 2) || !data->valid) {
-		int nr;
-
-		dev_dbg(&client->dev, "Updating lm83 data.\n");
-		for (nr = 0; nr < 9; nr++) {
-			data->temp[nr] =
-			    i2c_smbus_read_byte_data(client,
-			    LM83_REG_R_TEMP[nr]);
-		}
-		data->alarms =
-		    i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS1)
-		    + (i2c_smbus_read_byte_data(client, LM83_REG_R_STATUS2)
-		    << 8);
-
-		data->last_updated = jiffies;
-		data->valid = true;
+	struct i2c_client *client = context;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret;
+	return 0;
+}
+
+/*
+ * The regmap write function maps read register addresses to write register
+ * addresses. This is necessary for regmap register caching to work.
+ * An alternative would be to clear the regmap cache whenever a register is
+ * written, but that would be much more expensive.
+ */
+static int lm83_regmap_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct i2c_client *client = context;
+
+	switch (reg) {
+	case LM83_REG_R_CONFIG:
+	case LM83_REG_R_LOCAL_HIGH:
+	case LM83_REG_R_REMOTE2_HIGH:
+		reg += 0x06;
+		break;
+	case LM83_REG_R_REMOTE1_HIGH:
+	case LM83_REG_R_REMOTE3_HIGH:
+	case LM83_REG_R_TCRIT:
+		reg += 0x18;
+		break;
+	default:
+		break;
 	}
 
-	mutex_unlock(&data->update_lock);
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
 
-	return data;
+static bool lm83_regmap_is_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LM83_REG_R_LOCAL_TEMP:
+	case LM83_REG_R_REMOTE1_TEMP:
+	case LM83_REG_R_REMOTE2_TEMP:
+	case LM83_REG_R_REMOTE3_TEMP:
+	case LM83_REG_R_STATUS1:
+	case LM83_REG_R_STATUS2:
+		return true;
+	default:
+		return false;
+	}
 }
 
+static const struct regmap_config lm83_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = lm83_regmap_is_volatile,
+	.reg_read = lm83_regmap_reg_read,
+	.reg_write = lm83_regmap_reg_write,
+};
+
 /*
  * Sysfs stuff
  */
@@ -153,8 +173,15 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *devattr,
 			 char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm83_data *data = lm83_update_device(dev);
-	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[attr->index]));
+	struct lm83_data *data = dev_get_drvdata(dev);
+	unsigned int regval;
+	int ret;
+
+	ret = regmap_read(data->regmap, LM83_REG_TEMP[attr->index], &regval);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", TEMP_FROM_REG((s8)regval));
 }
 
 static ssize_t temp_store(struct device *dev,
@@ -163,38 +190,57 @@ static ssize_t temp_store(struct device *dev,
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct lm83_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
+	unsigned int regval;
 	long val;
-	int nr = attr->index;
 	int err;
 
 	err = kstrtol(buf, 10, &val);
 	if (err < 0)
 		return err;
 
-	mutex_lock(&data->update_lock);
-	data->temp[nr] = TEMP_TO_REG(val);
-	i2c_smbus_write_byte_data(client, LM83_REG_W_HIGH[nr - 4],
-				  data->temp[nr]);
-	mutex_unlock(&data->update_lock);
-	return count;
+	regval = TEMP_TO_REG(val);
+	err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval);
+	return err ? : count;
 }
 
 static ssize_t alarms_show(struct device *dev, struct device_attribute *dummy,
 			   char *buf)
 {
-	struct lm83_data *data = lm83_update_device(dev);
-	return sprintf(buf, "%d\n", data->alarms);
+	struct lm83_data *data = dev_get_drvdata(dev);
+	unsigned int alarms, regval;
+	int err;
+
+	err = regmap_read(data->regmap, LM83_REG_R_STATUS1, &regval);
+	if (err < 0)
+		return err;
+	alarms = regval;
+	err = regmap_read(data->regmap, LM83_REG_R_STATUS2, &regval);
+	if (err < 0)
+		return err;
+	alarms |= regval << 8;
+
+	return sprintf(buf, "%u\n", alarms);
 }
 
 static ssize_t alarm_show(struct device *dev,
 			  struct device_attribute *devattr, char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm83_data *data = lm83_update_device(dev);
+	struct lm83_data *data = dev_get_drvdata(dev);
 	int bitnr = attr->index;
-
-	return sprintf(buf, "%d\n", (data->alarms >> bitnr) & 1);
+	unsigned int alarm;
+	int reg, err;
+
+	if (bitnr < 8) {
+		reg = LM83_REG_R_STATUS1;
+	} else {
+		reg = LM83_REG_R_STATUS2;
+		bitnr -= 8;
+	}
+	err = regmap_read(data->regmap, reg, &alarm);
+	if (err < 0)
+		return err;
+	return sprintf(buf, "%d\n", (alarm >> bitnr) & 1);
 }
 
 static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0);
@@ -326,16 +372,17 @@ MODULE_DEVICE_TABLE(i2c, lm83_id);
 
 static int lm83_probe(struct i2c_client *client)
 {
+	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
 	struct lm83_data *data;
 
-	data = devm_kzalloc(&client->dev, sizeof(struct lm83_data),
-			    GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(struct lm83_data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->client = client;
-	mutex_init(&data->update_lock);
+	data->regmap = devm_regmap_init(dev, NULL, client, &lm83_regmap_config);
+	if (IS_ERR(data->regmap))
+		return PTR_ERR(data->regmap);
 
 	/*
 	 * Register sysfs hooks
@@ -347,8 +394,7 @@ static int lm83_probe(struct i2c_client *client)
 	if (i2c_match_id(lm83_id, client)->driver_data == lm83)
 		data->groups[1] = &lm83_group_opt;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
-							   client->name,
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   data, data->groups);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
-- 
GitLab


From 362c5663e8760b145a67adb3a7704de401f6ceab Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 19:03:04 -0800
Subject: [PATCH 0715/1586] hwmon: (lm83) Replace temperature conversion macros
 with standard functions

Replace TEMP_FROM_REG with direct calculation and TEMP_TO_REG
with standard functions/macros.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index c9605957e400e..434bd5b903d2a 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -66,17 +66,6 @@ enum chips { lm83, lm82 };
 #define LM83_REG_R_TCRIT		0x42
 #define LM83_REG_W_TCRIT		0x5A
 
-/*
- * Conversions and various macros
- * The LM83 uses signed 8-bit values with LSB = 1 degree Celsius.
- */
-
-#define TEMP_FROM_REG(val)	((val) * 1000)
-#define TEMP_TO_REG(val)	((val) <= -128000 ? -128 : \
-				 (val) >= 127000 ? 127 : \
-				 (val) < 0 ? ((val) - 500) / 1000 : \
-				 ((val) + 500) / 1000)
-
 static const u8 LM83_REG_TEMP[] = {
 	LM83_REG_R_LOCAL_TEMP,
 	LM83_REG_R_REMOTE1_TEMP,
@@ -181,7 +170,7 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *devattr,
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%d\n", TEMP_FROM_REG((s8)regval));
+	return sprintf(buf, "%d\n", (s8)regval * 1000);
 }
 
 static ssize_t temp_store(struct device *dev,
@@ -198,7 +187,7 @@ static ssize_t temp_store(struct device *dev,
 	if (err < 0)
 		return err;
 
-	regval = TEMP_TO_REG(val);
+	regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000);
 	err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval);
 	return err ? : count;
 }
-- 
GitLab


From 4d63c2d31a8f4c68458422fc5d0639a16237b426 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 23:49:59 -0800
Subject: [PATCH 0716/1586] hwmon: (lm83) Demote log message if chip
 identification fails

There should be no message in the kernel function if the detect function
fails to identify a chip; this is perfectly normal and does not warrant
a kernel log entry. Demote message to debug.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 434bd5b903d2a..82d7ef264f6f1 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -341,9 +341,9 @@ static int lm83_detect(struct i2c_client *client,
 		break;
 	default:
 		/* identification failed */
-		dev_info(&adapter->dev,
-			 "Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n",
-			 man_id, chip_id);
+		dev_dbg(&adapter->dev,
+			"Unsupported chip (man_id=0x%02X, chip_id=0x%02X)\n",
+			man_id, chip_id);
 		return -ENODEV;
 	}
 
-- 
GitLab


From 913ac02ade57493fae1fa030f5a33934ef80a254 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 24 Dec 2021 11:31:32 -0800
Subject: [PATCH 0717/1586] hwmon: (lm83) Explain why LM82 may be misdetected
 as LM83

According to the March 2013 revision of the LM82 datasheet, the latest
LM82 die revision is 0x03. This was confirmed and observed with a real
chip. Further details in this revision of the LM82 datasheet suggest that
LM82 is now just a repackaged LM83. Such versions of LM82 will be detected
as LM83. Add comment to the code explaining why this may happen.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 82d7ef264f6f1..d9ee01ca8aed3 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -334,6 +334,14 @@ static int lm83_detect(struct i2c_client *client,
 	chip_id = i2c_smbus_read_byte_data(client, LM83_REG_R_CHIP_ID);
 	switch (chip_id) {
 	case 0x03:
+		/*
+		 * According to the LM82 datasheet dated March 2013, recent
+		 * revisions of LM82 have a die revision of 0x03. This was
+		 * confirmed with a real chip. Further details in this revision
+		 * of the LM82 datasheet strongly suggest that LM82 is just a
+		 * repackaged LM83. It is therefore impossible to distinguish
+		 * those chips from LM83, and they will be misdetected as LM83.
+		 */
 		name = "lm83";
 		break;
 	case 0x01:
-- 
GitLab


From c291f612a813a1f295266a3de5cb418e48f41440 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Dec 2021 20:24:02 -0800
Subject: [PATCH 0718/1586] hwmon: (lm83) Convert to use with_info API

Use with_info API to reduce code size and simplify the code.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm83.c | 315 +++++++++++++++++++++++++------------------
 1 file changed, 183 insertions(+), 132 deletions(-)

diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index d9ee01ca8aed3..12370dcefa6ab 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -18,11 +18,11 @@
  * http://www.national.com/pf/LM/LM82.html
  */
 
+#include <linux/bits.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/regmap.h>
@@ -71,11 +71,30 @@ static const u8 LM83_REG_TEMP[] = {
 	LM83_REG_R_REMOTE1_TEMP,
 	LM83_REG_R_REMOTE2_TEMP,
 	LM83_REG_R_REMOTE3_TEMP,
+};
+
+static const u8 LM83_REG_MAX[] = {
 	LM83_REG_R_LOCAL_HIGH,
 	LM83_REG_R_REMOTE1_HIGH,
 	LM83_REG_R_REMOTE2_HIGH,
 	LM83_REG_R_REMOTE3_HIGH,
-	LM83_REG_R_TCRIT,
+};
+
+/* alarm and fault registers and bits, indexed by channel */
+static const u8 LM83_ALARM_REG[] = {
+	LM83_REG_R_STATUS1, LM83_REG_R_STATUS2, LM83_REG_R_STATUS1, LM83_REG_R_STATUS2
+};
+
+static const u8 LM83_MAX_ALARM_BIT[] = {
+	BIT(6), BIT(7), BIT(4), BIT(4)
+};
+
+static const u8 LM83_CRIT_ALARM_BIT[] = {
+	BIT(0), BIT(0), BIT(1), BIT(1)
+};
+
+static const u8 LM83_FAULT_BIT[] = {
+	0, BIT(5), BIT(2), BIT(2)
 };
 
 /*
@@ -84,7 +103,7 @@ static const u8 LM83_REG_TEMP[] = {
 
 struct lm83_data {
 	struct regmap *regmap;
-	const struct attribute_group *groups[3];
+	enum chips type;
 };
 
 /* regmap code */
@@ -154,157 +173,197 @@ static const struct regmap_config lm83_regmap_config = {
 	.reg_write = lm83_regmap_reg_write,
 };
 
-/*
- * Sysfs stuff
- */
+/* hwmon API */
 
-static ssize_t temp_show(struct device *dev, struct device_attribute *devattr,
-			 char *buf)
+static int lm83_temp_read(struct device *dev, u32 attr, int channel, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct lm83_data *data = dev_get_drvdata(dev);
 	unsigned int regval;
-	int ret;
-
-	ret = regmap_read(data->regmap, LM83_REG_TEMP[attr->index], &regval);
-	if (ret)
-		return ret;
+	int err;
 
-	return sprintf(buf, "%d\n", (s8)regval * 1000);
+	switch (attr) {
+	case hwmon_temp_input:
+		err = regmap_read(data->regmap, LM83_REG_TEMP[channel], &regval);
+		if (err < 0)
+			return err;
+		*val = (s8)regval * 1000;
+		break;
+	case hwmon_temp_max:
+		err = regmap_read(data->regmap, LM83_REG_MAX[channel], &regval);
+		if (err < 0)
+			return err;
+		*val = (s8)regval * 1000;
+		break;
+	case hwmon_temp_crit:
+		err = regmap_read(data->regmap, LM83_REG_R_TCRIT, &regval);
+		if (err < 0)
+			return err;
+		*val = (s8)regval * 1000;
+		break;
+	case hwmon_temp_max_alarm:
+		err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+		if (err < 0)
+			return err;
+		*val = !!(regval & LM83_MAX_ALARM_BIT[channel]);
+		break;
+	case hwmon_temp_crit_alarm:
+		err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+		if (err < 0)
+			return err;
+		*val = !!(regval & LM83_CRIT_ALARM_BIT[channel]);
+		break;
+	case hwmon_temp_fault:
+		err = regmap_read(data->regmap, LM83_ALARM_REG[channel], &regval);
+		if (err < 0)
+			return err;
+		*val = !!(regval & LM83_FAULT_BIT[channel]);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
 }
 
-static ssize_t temp_store(struct device *dev,
-			  struct device_attribute *devattr, const char *buf,
-			  size_t count)
+static int lm83_temp_write(struct device *dev, u32 attr, int channel, long val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct lm83_data *data = dev_get_drvdata(dev);
 	unsigned int regval;
-	long val;
 	int err;
 
-	err = kstrtol(buf, 10, &val);
-	if (err < 0)
-		return err;
-
 	regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000);
-	err = regmap_write(data->regmap, LM83_REG_TEMP[attr->index], regval);
-	return err ? : count;
+
+	switch (attr) {
+	case hwmon_temp_max:
+		err = regmap_write(data->regmap, LM83_REG_MAX[channel], regval);
+		if (err < 0)
+			return err;
+		break;
+	case hwmon_temp_crit:
+		err = regmap_write(data->regmap, LM83_REG_R_TCRIT, regval);
+		if (err < 0)
+			return err;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
 }
 
-static ssize_t alarms_show(struct device *dev, struct device_attribute *dummy,
-			   char *buf)
+static int lm83_chip_read(struct device *dev, u32 attr, int channel, long *val)
 {
 	struct lm83_data *data = dev_get_drvdata(dev);
-	unsigned int alarms, regval;
+	unsigned int regval;
 	int err;
 
-	err = regmap_read(data->regmap, LM83_REG_R_STATUS1, &regval);
-	if (err < 0)
-		return err;
-	alarms = regval;
-	err = regmap_read(data->regmap, LM83_REG_R_STATUS2, &regval);
-	if (err < 0)
-		return err;
-	alarms |= regval << 8;
+	switch (attr) {
+	case hwmon_chip_alarms:
+		err = regmap_read(data->regmap, LM83_REG_R_STATUS1, &regval);
+		if (err < 0)
+			return err;
+		*val = regval;
+		err = regmap_read(data->regmap, LM83_REG_R_STATUS2, &regval);
+		if (err < 0)
+			return err;
+		*val |= regval << 8;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 
-	return sprintf(buf, "%u\n", alarms);
+	return 0;
 }
 
-static ssize_t alarm_show(struct device *dev,
-			  struct device_attribute *devattr, char *buf)
+static int lm83_read(struct device *dev, enum hwmon_sensor_types type,
+		     u32 attr, int channel, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm83_data *data = dev_get_drvdata(dev);
-	int bitnr = attr->index;
-	unsigned int alarm;
-	int reg, err;
-
-	if (bitnr < 8) {
-		reg = LM83_REG_R_STATUS1;
-	} else {
-		reg = LM83_REG_R_STATUS2;
-		bitnr -= 8;
+	switch (type) {
+	case hwmon_chip:
+		return lm83_chip_read(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm83_temp_read(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
 	}
-	err = regmap_read(data->regmap, reg, &alarm);
-	if (err < 0)
-		return err;
-	return sprintf(buf, "%d\n", (alarm >> bitnr) & 1);
 }
 
-static SENSOR_DEVICE_ATTR_RO(temp1_input, temp, 0);
-static SENSOR_DEVICE_ATTR_RO(temp2_input, temp, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_input, temp, 2);
-static SENSOR_DEVICE_ATTR_RO(temp4_input, temp, 3);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, temp, 4);
-static SENSOR_DEVICE_ATTR_RW(temp2_max, temp, 5);
-static SENSOR_DEVICE_ATTR_RW(temp3_max, temp, 6);
-static SENSOR_DEVICE_ATTR_RW(temp4_max, temp, 7);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp2_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RW(temp3_crit, temp, 8);
-static SENSOR_DEVICE_ATTR_RO(temp4_crit, temp, 8);
-
-/* Individual alarm files */
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, alarm, 0);
-static SENSOR_DEVICE_ATTR_RO(temp3_crit_alarm, alarm, 1);
-static SENSOR_DEVICE_ATTR_RO(temp3_fault, alarm, 2);
-static SENSOR_DEVICE_ATTR_RO(temp3_max_alarm, alarm, 4);
-static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, alarm, 6);
-static SENSOR_DEVICE_ATTR_RO(temp2_crit_alarm, alarm, 8);
-static SENSOR_DEVICE_ATTR_RO(temp4_crit_alarm, alarm, 9);
-static SENSOR_DEVICE_ATTR_RO(temp4_fault, alarm, 10);
-static SENSOR_DEVICE_ATTR_RO(temp4_max_alarm, alarm, 12);
-static SENSOR_DEVICE_ATTR_RO(temp2_fault, alarm, 13);
-static SENSOR_DEVICE_ATTR_RO(temp2_max_alarm, alarm, 15);
-/* Raw alarm file for compatibility */
-static DEVICE_ATTR_RO(alarms);
-
-static struct attribute *lm83_attributes[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp3_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp3_crit.dev_attr.attr,
-
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp3_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp3_fault.dev_attr.attr,
-	&sensor_dev_attr_temp3_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
-	&dev_attr_alarms.attr,
-	NULL
-};
+static int lm83_write(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, long val)
+{
+	switch (type) {
+	case hwmon_temp:
+		return lm83_temp_write(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-static const struct attribute_group lm83_group = {
-	.attrs = lm83_attributes,
-};
+static umode_t lm83_is_visible(const void *_data, enum hwmon_sensor_types type,
+			       u32 attr, int channel)
+{
+	const struct lm83_data *data = _data;
 
-static struct attribute *lm83_attributes_opt[] = {
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp4_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_max.dev_attr.attr,
-	&sensor_dev_attr_temp4_max.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit.dev_attr.attr,
-	&sensor_dev_attr_temp4_crit.dev_attr.attr,
-
-	&sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp4_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp4_fault.dev_attr.attr,
-	&sensor_dev_attr_temp4_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_fault.dev_attr.attr,
-	&sensor_dev_attr_temp2_max_alarm.dev_attr.attr,
+	/*
+	 * LM82 only supports a single external channel, modeled as channel 2.
+	 */
+	if (data->type == lm82 && (channel == 1 || channel == 3))
+		return 0;
+
+	switch (type) {
+	case hwmon_chip:
+		if (attr == hwmon_chip_alarms)
+			return 0444;
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+		case hwmon_temp_max_alarm:
+		case hwmon_temp_crit_alarm:
+			return 0444;
+		case hwmon_temp_fault:
+			if (channel)
+				return 0444;
+			break;
+		case hwmon_temp_max:
+			return 0644;
+		case hwmon_temp_crit:
+			if (channel == 2)
+				return 0644;
+			return 0444;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static const struct hwmon_channel_info *lm83_info[] = {
+	HWMON_CHANNEL_INFO(chip, HWMON_C_ALARMS),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+			   HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+			   HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+			   HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT |
+			   HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT
+			   ),
 	NULL
 };
 
-static const struct attribute_group lm83_group_opt = {
-	.attrs = lm83_attributes_opt,
+static const struct hwmon_ops lm83_hwmon_ops = {
+	.is_visible = lm83_is_visible,
+	.read = lm83_read,
+	.write = lm83_write,
 };
 
-/*
- * Real code
- */
+static const struct hwmon_chip_info lm83_chip_info = {
+	.ops = &lm83_hwmon_ops,
+	.info = lm83_info,
+};
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int lm83_detect(struct i2c_client *client,
@@ -381,18 +440,10 @@ static int lm83_probe(struct i2c_client *client)
 	if (IS_ERR(data->regmap))
 		return PTR_ERR(data->regmap);
 
-	/*
-	 * Register sysfs hooks
-	 * The LM82 can only monitor one external diode which is
-	 * at the same register as the LM83 temp3 entry - so we
-	 * declare 1 and 3 common, and then 2 and 4 only for the LM83.
-	 */
-	data->groups[0] = &lm83_group;
-	if (i2c_match_id(lm83_id, client)->driver_data == lm83)
-		data->groups[1] = &lm83_group_opt;
+	data->type = i2c_match_id(lm83_id, client)->driver_data;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data, data->groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data, &lm83_chip_info, NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
-- 
GitLab


From b68437ace4b8636c5c8686c0334cbe5d029557bc Mon Sep 17 00:00:00 2001
From: Zev Weiss <zev@bewilderbeest.net>
Date: Thu, 13 Jan 2022 08:46:29 -0800
Subject: [PATCH 0719/1586] hwmon: (nct6775) add support for TSI temperature
 registers

These registers report CPU temperatures (and, depending on the system,
sometimes chipset temperatures) via the TSI interface on AMD systems.
They're distinct from most of the other Super-IO temperature readings
(CPUTIN, SYSTIN, etc.) in that they're not a selectable source for
monitoring and are in a different (higher resolution) format, but can
still provide useful temperature data.

Signed-off-by: Zev Weiss <zev@bewilderbeest.net>
Tested-by: Renze Nicolai <renze@rnplus.nl>
Link: https://lore.kernel.org/r/20220113164629.21924-1-zev@bewilderbeest.net
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775.c | 129 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 6 deletions(-)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 098d12b9ecdad..63e15b5f93a0e 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -308,6 +308,7 @@ static void superio_exit(struct nct6775_sio_data *sio_data)
 
 #define NUM_TEMP	10	/* Max number of temp attribute sets w/ limits*/
 #define NUM_TEMP_FIXED	6	/* Max number of fixed temp attribute sets */
+#define NUM_TSI_TEMP	8	/* Max number of TSI temp register pairs */
 
 #define NUM_REG_ALARM	7	/* Max number of alarm registers */
 #define NUM_REG_BEEP	5	/* Max number of beep registers */
@@ -498,6 +499,8 @@ static const u16 NCT6775_REG_TEMP_CRIT[32] = {
 	[11] = 0xa07
 };
 
+static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 };
+
 /* NCT6776 specific data */
 
 /* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */
@@ -581,6 +584,9 @@ static const u16 NCT6776_REG_TEMP_CRIT[32] = {
 	[12] = 0x70a,
 };
 
+static const u16 NCT6776_REG_TSI_TEMP[] = {
+	0x409, 0x40b, 0x40d, 0x40f, 0x411, 0x413, 0x415, 0x417 };
+
 /* NCT6779 specific data */
 
 static const u16 NCT6779_REG_IN[] = {
@@ -864,6 +870,8 @@ static const char *const nct6796_temp_label[] = {
 #define NCT6796_TEMP_MASK	0xbfff0ffe
 #define NCT6796_VIRT_TEMP_MASK	0x80000c00
 
+static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b };
+
 static const char *const nct6798_temp_label[] = {
 	"",
 	"SYSTIN",
@@ -1005,6 +1013,8 @@ static const u16 NCT6106_REG_TEMP_CRIT[32] = {
 	[12] = 0x205,
 };
 
+static const u16 NCT6106_REG_TSI_TEMP[] = { 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x67 };
+
 /* NCT6112D/NCT6114D/NCT6116D specific data */
 
 static const u16 NCT6116_REG_FAN[] = { 0x20, 0x22, 0x24, 0x26, 0x28 };
@@ -1069,6 +1079,8 @@ static const s8 NCT6116_BEEP_BITS[] = {
 	34, -1				/* intrusion0, intrusion1 */
 };
 
+static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b };
+
 static enum pwm_enable reg_to_pwm_enable(int pwm, int mode)
 {
 	if (mode == 0 && pwm == 255)
@@ -1169,6 +1181,12 @@ static inline u8 in_to_reg(u32 val, u8 nr)
 	return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255);
 }
 
+/* TSI temperatures are in 8.3 format */
+static inline unsigned int tsi_temp_from_reg(unsigned int reg)
+{
+	return (reg >> 5) * 125;
+}
+
 /*
  * Data structures and manipulation thereof
  */
@@ -1179,7 +1197,7 @@ struct nct6775_data {
 	enum kinds kind;
 	const char *name;
 
-	const struct attribute_group *groups[6];
+	const struct attribute_group *groups[7];
 
 	u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
 				    * 3=temp_crit, 4=temp_lcrit
@@ -1240,6 +1258,8 @@ struct nct6775_data {
 	const u16 *REG_ALARM;
 	const u16 *REG_BEEP;
 
+	const u16 *REG_TSI_TEMP;
+
 	unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg);
 	unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg);
 
@@ -1267,6 +1287,7 @@ struct nct6775_data {
 	s8 temp_offset[NUM_TEMP_FIXED];
 	s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst,
 				* 3=temp_crit, 4=temp_lcrit */
+	s16 tsi_temp[NUM_TSI_TEMP];
 	u64 alarms;
 	u64 beeps;
 
@@ -1315,6 +1336,7 @@ struct nct6775_data {
 
 	u16 have_temp;
 	u16 have_temp_fixed;
+	u16 have_tsi_temp;
 	u16 have_in;
 
 	/* Remember extra register values over suspend/resume */
@@ -1464,13 +1486,15 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 	switch (data->kind) {
 	case nct6106:
 		return reg == 0x20 || reg == 0x22 || reg == 0x24 ||
+		  (reg >= 0x59 && reg < 0x69 && (reg & 1)) ||
 		  reg == 0xe0 || reg == 0xe2 || reg == 0xe4 ||
 		  reg == 0x111 || reg == 0x121 || reg == 0x131;
 	case nct6116:
 		return reg == 0x20 || reg == 0x22 || reg == 0x24 ||
-		  reg == 0x26 || reg == 0x28 || reg == 0xe0 || reg == 0xe2 ||
-		  reg == 0xe4 || reg == 0xe6 || reg == 0xe8 || reg == 0x111 ||
-		  reg == 0x121 || reg == 0x131 || reg == 0x191 || reg == 0x1a1;
+		  reg == 0x26 || reg == 0x28 || reg == 0x59 || reg == 0x5b ||
+		  reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0xe6 ||
+		  reg == 0xe8 || reg == 0x111 || reg == 0x121 || reg == 0x131 ||
+		  reg == 0x191 || reg == 0x1a1;
 	case nct6775:
 		return (((reg & 0xff00) == 0x100 ||
 		    (reg & 0xff00) == 0x200) &&
@@ -1479,7 +1503,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 		    (reg & 0x00ff) == 0x55)) ||
 		  (reg & 0xfff0) == 0x630 ||
 		  reg == 0x640 || reg == 0x642 ||
-		  reg == 0x662 ||
+		  reg == 0x662 || reg == 0x669 ||
 		  ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) ||
 		  reg == 0x73 || reg == 0x75 || reg == 0x77;
 	case nct6776:
@@ -1490,6 +1514,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 		    (reg & 0x00ff) == 0x55)) ||
 		  (reg & 0xfff0) == 0x630 ||
 		  reg == 0x402 ||
+		  (reg >= 0x409 && reg < 0x419 && (reg & 1)) ||
 		  reg == 0x640 || reg == 0x642 ||
 		  ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) ||
 		  reg == 0x73 || reg == 0x75 || reg == 0x77;
@@ -1504,6 +1529,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 		return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
 		  (reg & 0xfff0) == 0x4c0 ||
 		  reg == 0x402 ||
+		  (reg >= 0x409 && reg < 0x419 && (reg & 1)) ||
 		  reg == 0x63a || reg == 0x63c || reg == 0x63e ||
 		  reg == 0x640 || reg == 0x642 || reg == 0x64a ||
 		  reg == 0x64c ||
@@ -1987,6 +2013,12 @@ static struct nct6775_data *nct6775_update_device(struct device *dev)
 								   data->REG_TEMP_OFFSET[i]);
 		}
 
+		for (i = 0; i < NUM_TSI_TEMP; i++) {
+			if (!(data->have_tsi_temp & BIT(i)))
+				continue;
+			data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]);
+		}
+
 		data->alarms = 0;
 		for (i = 0; i < NUM_REG_ALARM; i++) {
 			u8 alarm;
@@ -2670,6 +2702,44 @@ static const struct sensor_template_group nct6775_temp_template_group = {
 	.base = 1,
 };
 
+static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct nct6775_data *data = nct6775_update_device(dev);
+	struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
+
+	return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index]));
+}
+
+static ssize_t show_tsi_temp_label(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
+
+	return sysfs_emit(buf, "TSI%d_TEMP\n", sattr->index);
+}
+
+SENSOR_TEMPLATE(tsi_temp_input, "temp%d_input", 0444, show_tsi_temp, NULL, 0);
+SENSOR_TEMPLATE(tsi_temp_label, "temp%d_label", 0444, show_tsi_temp_label, NULL, 0);
+
+static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribute *attr,
+					       int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct nct6775_data *data = dev_get_drvdata(dev);
+	int temp = index / 2;
+
+	return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0;
+}
+
+/*
+ * The index calculation in nct6775_tsi_temp_is_visible() must be kept in
+ * sync with the size of this array.
+ */
+static struct sensor_device_template *nct6775_tsi_temp_template[] = {
+	&sensor_dev_template_tsi_temp_input,
+	&sensor_dev_template_tsi_temp_label,
+	NULL
+};
+
 static ssize_t
 show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -3948,10 +4018,11 @@ static int nct6775_probe(struct platform_device *pdev)
 	const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config;
 	const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit;
 	const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL;
-	int num_reg_temp, num_reg_temp_mon;
+	int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp;
 	u8 cr2a;
 	struct attribute_group *group;
 	struct device *hwmon_dev;
+	struct sensor_template_group tsi_temp_tg;
 	int num_attr_groups = 0;
 
 	if (sio_data->access == access_direct) {
@@ -4043,11 +4114,13 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->ALARM_BITS = NCT6106_ALARM_BITS;
 		data->REG_BEEP = NCT6106_REG_BEEP;
 		data->BEEP_BITS = NCT6106_BEEP_BITS;
+		data->REG_TSI_TEMP = NCT6106_REG_TSI_TEMP;
 
 		reg_temp = NCT6106_REG_TEMP;
 		reg_temp_mon = NCT6106_REG_TEMP_MON;
 		num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP);
 		num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON);
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6106_REG_TSI_TEMP);
 		reg_temp_over = NCT6106_REG_TEMP_OVER;
 		reg_temp_hyst = NCT6106_REG_TEMP_HYST;
 		reg_temp_config = NCT6106_REG_TEMP_CONFIG;
@@ -4116,11 +4189,13 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->ALARM_BITS = NCT6116_ALARM_BITS;
 		data->REG_BEEP = NCT6106_REG_BEEP;
 		data->BEEP_BITS = NCT6116_BEEP_BITS;
+		data->REG_TSI_TEMP = NCT6116_REG_TSI_TEMP;
 
 		reg_temp = NCT6106_REG_TEMP;
 		reg_temp_mon = NCT6106_REG_TEMP_MON;
 		num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP);
 		num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON);
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6116_REG_TSI_TEMP);
 		reg_temp_over = NCT6106_REG_TEMP_OVER;
 		reg_temp_hyst = NCT6106_REG_TEMP_HYST;
 		reg_temp_config = NCT6106_REG_TEMP_CONFIG;
@@ -4191,11 +4266,13 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
 		data->REG_ALARM = NCT6775_REG_ALARM;
 		data->REG_BEEP = NCT6775_REG_BEEP;
+		data->REG_TSI_TEMP = NCT6775_REG_TSI_TEMP;
 
 		reg_temp = NCT6775_REG_TEMP;
 		reg_temp_mon = NCT6775_REG_TEMP_MON;
 		num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP);
 		num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON);
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6775_REG_TSI_TEMP);
 		reg_temp_over = NCT6775_REG_TEMP_OVER;
 		reg_temp_hyst = NCT6775_REG_TEMP_HYST;
 		reg_temp_config = NCT6775_REG_TEMP_CONFIG;
@@ -4264,11 +4341,13 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
 		data->REG_ALARM = NCT6775_REG_ALARM;
 		data->REG_BEEP = NCT6776_REG_BEEP;
+		data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
 
 		reg_temp = NCT6775_REG_TEMP;
 		reg_temp_mon = NCT6775_REG_TEMP_MON;
 		num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP);
 		num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON);
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
 		reg_temp_over = NCT6775_REG_TEMP_OVER;
 		reg_temp_hyst = NCT6775_REG_TEMP_HYST;
 		reg_temp_config = NCT6776_REG_TEMP_CONFIG;
@@ -4341,11 +4420,13 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE;
 		data->REG_ALARM = NCT6779_REG_ALARM;
 		data->REG_BEEP = NCT6776_REG_BEEP;
+		data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
 
 		reg_temp = NCT6779_REG_TEMP;
 		reg_temp_mon = NCT6779_REG_TEMP_MON;
 		num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
 		num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON);
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
 		reg_temp_over = NCT6779_REG_TEMP_OVER;
 		reg_temp_hyst = NCT6779_REG_TEMP_HYST;
 		reg_temp_config = NCT6779_REG_TEMP_CONFIG;
@@ -4460,6 +4541,24 @@ static int nct6775_probe(struct platform_device *pdev)
 			data->REG_BEEP = NCT6776_REG_BEEP;
 		else
 			data->REG_BEEP = NCT6792_REG_BEEP;
+		switch (data->kind) {
+		case nct6791:
+		case nct6792:
+		case nct6793:
+			data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP;
+			num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP);
+			break;
+		case nct6795:
+		case nct6796:
+		case nct6797:
+		case nct6798:
+			data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
+			num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
+			break;
+		default:
+			num_reg_tsi_temp = 0;
+			break;
+		}
 
 		reg_temp = NCT6779_REG_TEMP;
 		num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
@@ -4659,6 +4758,12 @@ static int nct6775_probe(struct platform_device *pdev)
 	}
 #endif /* USE_ALTERNATE */
 
+	/* Check which TSIx_TEMP registers are active */
+	for (i = 0; i < num_reg_tsi_temp; i++) {
+		if (data->read_value(data, data->REG_TSI_TEMP[i]))
+			data->have_tsi_temp |= BIT(i);
+	}
+
 	/* Initialize the chip */
 	nct6775_init_device(data);
 
@@ -4766,6 +4871,18 @@ static int nct6775_probe(struct platform_device *pdev)
 		return PTR_ERR(group);
 
 	data->groups[num_attr_groups++] = group;
+
+	if (data->have_tsi_temp) {
+		tsi_temp_tg.templates = nct6775_tsi_temp_template;
+		tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible;
+		tsi_temp_tg.base = fls(data->have_temp) + 1;
+		group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp));
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+
+		data->groups[num_attr_groups++] = group;
+	}
+
 	data->groups[num_attr_groups++] = &nct6775_group_other;
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name,
-- 
GitLab


From 32b9a19a1966fb3124d7ddd60ebd08688be2f3fa Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 10 Jan 2022 18:22:55 +0000
Subject: [PATCH 0720/1586] ABI: hwmon: Document "label" sysfs attribute

Add the "label" sysfs attribute, which can contain a descriptive label
that allows to uniquely identify a device within the system.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20220110182256.30763-2-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/ABI/testing/sysfs-class-hwmon | 8 ++++++++
 Documentation/hwmon/sysfs-interface.rst     | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-hwmon b/Documentation/ABI/testing/sysfs-class-hwmon
index 1f20687def44c..653d4c75eddb3 100644
--- a/Documentation/ABI/testing/sysfs-class-hwmon
+++ b/Documentation/ABI/testing/sysfs-class-hwmon
@@ -9,6 +9,14 @@ Description:
 
 		RO
 
+What:		/sys/class/hwmon/hwmonX/label
+Description:
+		A descriptive label that allows to uniquely identify a
+		device within the system.
+		The contents of the label are free-form.
+
+		RO
+
 What:		/sys/class/hwmon/hwmonX/update_interval
 Description:
 		The interval at which the chip will update readings.
diff --git a/Documentation/hwmon/sysfs-interface.rst b/Documentation/hwmon/sysfs-interface.rst
index 85652a6aaa3e7..209626fb24052 100644
--- a/Documentation/hwmon/sysfs-interface.rst
+++ b/Documentation/hwmon/sysfs-interface.rst
@@ -99,6 +99,10 @@ Global attributes
 `name`
 		The chip name.
 
+`label`
+		A descriptive label that allows to uniquely identify a device
+		within the system.
+
 `update_interval`
 		The interval at which the chip will update readings.
 
-- 
GitLab


From e1c9d6d61ddf3f34f14d3de51d6eea68683b5841 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 10 Jan 2022 18:22:56 +0000
Subject: [PATCH 0721/1586] hwmon: Add "label" attribute

If a label is defined in the device tree for this device add that
to the device specific attributes. This is useful for userspace to
be able to identify an individual device when multiple identical
chips are present in the system.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20220110182256.30763-3-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 0b79c4e1bf99f..6c3a8c65390d8 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/thermal.h>
@@ -30,6 +31,7 @@
 
 struct hwmon_device {
 	const char *name;
+	const char *label;
 	struct device dev;
 	const struct hwmon_chip_info *chip;
 	struct list_head tzdata;
@@ -71,17 +73,29 @@ name_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(name);
 
+static ssize_t
+label_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", to_hwmon_device(dev)->label);
+}
+static DEVICE_ATTR_RO(label);
+
 static struct attribute *hwmon_dev_attrs[] = {
 	&dev_attr_name.attr,
+	&dev_attr_label.attr,
 	NULL
 };
 
-static umode_t hwmon_dev_name_is_visible(struct kobject *kobj,
+static umode_t hwmon_dev_attr_is_visible(struct kobject *kobj,
 					 struct attribute *attr, int n)
 {
 	struct device *dev = kobj_to_dev(kobj);
+	struct hwmon_device *hdev = to_hwmon_device(dev);
 
-	if (to_hwmon_device(dev)->name == NULL)
+	if (attr == &dev_attr_name.attr && hdev->name == NULL)
+		return 0;
+
+	if (attr == &dev_attr_label.attr && hdev->label == NULL)
 		return 0;
 
 	return attr->mode;
@@ -89,7 +103,7 @@ static umode_t hwmon_dev_name_is_visible(struct kobject *kobj,
 
 static const struct attribute_group hwmon_dev_attr_group = {
 	.attrs		= hwmon_dev_attrs,
-	.is_visible	= hwmon_dev_name_is_visible,
+	.is_visible	= hwmon_dev_attr_is_visible,
 };
 
 static const struct attribute_group *hwmon_dev_attr_groups[] = {
@@ -117,6 +131,7 @@ static void hwmon_dev_release(struct device *dev)
 	if (hwdev->group.attrs)
 		hwmon_free_attrs(hwdev->group.attrs);
 	kfree(hwdev->groups);
+	kfree(hwdev->label);
 	kfree(hwdev);
 }
 
@@ -738,6 +753,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 			const struct attribute_group **groups)
 {
 	struct hwmon_device *hwdev;
+	const char *label;
 	struct device *hdev;
 	int i, err, id;
 
@@ -793,6 +809,18 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 		hdev->groups = groups;
 	}
 
+	if (device_property_present(dev, "label")) {
+		err = device_property_read_string(dev, "label", &label);
+		if (err < 0)
+			goto free_hwmon;
+
+		hwdev->label = kstrdup(label, GFP_KERNEL);
+		if (hwdev->label == NULL) {
+			err = -ENOMEM;
+			goto free_hwmon;
+		}
+	}
+
 	hwdev->name = name;
 	hdev->class = &hwmon_class;
 	hdev->parent = dev;
-- 
GitLab


From 23a8d76e5ec5fea267d5c822477b8b11681c2b45 Mon Sep 17 00:00:00 2001
From: Denis Pauk <pauk.denis@gmail.com>
Date: Wed, 12 Jan 2022 23:50:13 +0200
Subject: [PATCH 0722/1586] hwmon: (nct6775) add ASUS Pro B550M-C/PRIME B550M-A
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ASUS Pro B550M-C/PRIME B550M-A  boards have got an nct6775 chip, but
by default there's no use of it because of resource conflict with WMI
method.

This commit adds "Pro B550M-C" and "PRIME B550M-A" to
the list of boards that can be monitored using ASUS WMI.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807
Signed-off-by: Denis Pauk <pauk.denis@gmail.com>
Tested-by: Gregory P. Smith <greg@krypto.org>
Tested-by: Joel Wirāmu <jwp@redhat.com>
Tested-by: Jonathan Farrugia <jonfarr87@gmail.com>
Link: https://lore.kernel.org/r/20220112215013.11694-1-pauk.denis@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 63e15b5f93a0e..4ef468b413a1f 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -5102,9 +5102,12 @@ static struct platform_device *pdev[2];
 
 static const char * const asus_wmi_boards[] = {
 	"ProArt X570-CREATOR WIFI",
+	"Pro B550M-C",
 	"Pro WS X570-ACE",
 	"PRIME B360-PLUS",
 	"PRIME B460-PLUS",
+	"PRIME B550M-A",
+	"PRIME B550M-A (WI-FI)",
 	"PRIME X570-PRO",
 	"ROG CROSSHAIR VIII DARK HERO",
 	"ROG CROSSHAIR VIII FORMULA",
-- 
GitLab


From de00c068c4ac9391f3c604ad2f06ac1e73005754 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 10 Jan 2022 05:47:12 +0300
Subject: [PATCH 0723/1586] hwmon: (nct6775) add PRIME B550-PLUS motherboard to
 whitelist

Asus PRIME B550-PLUS motherboards have got an nct6775 chip. Its resource
range is covered by the \AMW0.SHWM OpRegion, so the chip is unusable
when using SIO. However ASUS WMI access works.

Add PRIME B550-PLUS to the list of motherboards using ASUS WMI to read
data.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220110024712.753492-1-dmitry.baryshkov@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 4ef468b413a1f..fbf6266c7ba7b 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -5106,6 +5106,7 @@ static const char * const asus_wmi_boards[] = {
 	"Pro WS X570-ACE",
 	"PRIME B360-PLUS",
 	"PRIME B460-PLUS",
+	"PRIME B550-PLUS",
 	"PRIME B550M-A",
 	"PRIME B550M-A (WI-FI)",
 	"PRIME X570-PRO",
-- 
GitLab


From 1b089084ec654c05df202896d54df8d92b16fc1e Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 9 Jan 2022 22:42:46 +0100
Subject: [PATCH 0724/1586] Documentation: admin-guide: Update i8k driver name

The driver should be called dell_smm_hwmon, i8k is only
an alias now.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20220109214248.61759-2-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 .../admin-guide/kernel-parameters.txt         | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f5a27f067db9e..7189402e9fc7c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -944,6 +944,24 @@
 			dump out devices still on the deferred probe list after
 			retrying.
 
+	dell_smm_hwmon.ignore_dmi=
+			[HW] Continue probing hardware even if DMI data
+			indicates that the driver is running on unsupported
+			hardware.
+
+	dell_smm_hwmon.force=
+			[HW] Activate driver even if SMM BIOS signature does
+			not match list of supported models and enable otherwise
+			blacklisted features.
+
+	dell_smm_hwmon.power_status=
+			[HW] Report power status in /proc/i8k
+			(disabled by default).
+
+	dell_smm_hwmon.restricted=
+			[HW] Allow controlling fans only if SYS_ADMIN
+			capability is set.
+
 	dfltcc=		[HW,S390]
 			Format: { on | off | def_only | inf_only | always }
 			on:       s390 zlib hardware support for compression on
@@ -1703,17 +1721,6 @@
 
 	i810=		[HW,DRM]
 
-	i8k.ignore_dmi	[HW] Continue probing hardware even if DMI data
-			indicates that the driver is running on unsupported
-			hardware.
-	i8k.force	[HW] Activate i8k driver even if SMM BIOS signature
-			does not match list of supported models.
-	i8k.power_status
-			[HW] Report power status in /proc/i8k
-			(disabled by default)
-	i8k.restricted	[HW] Allow controlling fans only if SYS_ADMIN
-			capability is set.
-
 	i915.invert_brightness=
 			[DRM] Invert the sense of the variable that is used to
 			set the brightness of the panel backlight. Normally a
-- 
GitLab


From 99fdc5875b005b6014be2bd2a7c6aaf0abe2896d Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 9 Jan 2022 22:42:47 +0100
Subject: [PATCH 0725/1586] Documentation: admin-guide: Add Documentation for
 undocumented dell_smm_hwmon parameters

Add documentation for fan_mult and fan_max.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220109214248.61759-3-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7189402e9fc7c..d68053db21cc6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -962,6 +962,12 @@
 			[HW] Allow controlling fans only if SYS_ADMIN
 			capability is set.
 
+	dell_smm_hwmon.fan_mult=
+			[HW] Factor to multiply fan speed with.
+
+	dell_smm_hwmon.fan_max=
+			[HW] Maximum configurable fan speed.
+
 	dfltcc=		[HW,S390]
 			Format: { on | off | def_only | inf_only | always }
 			on:       s390 zlib hardware support for compression on
-- 
GitLab


From 34781a6bec382dd11254caff0d379c7be70576fd Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 9 Jan 2022 22:42:48 +0100
Subject: [PATCH 0726/1586] Documentation: ABI: Add ABI file for legacy
 /proc/i8k interface

Add ABI file for informing remaining users of the
deprecation of the legacy /proc/i8k interface.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220109214248.61759-4-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/ABI/obsolete/procfs-i8k | 10 ++++++++++
 MAINTAINERS                           |  1 +
 2 files changed, 11 insertions(+)
 create mode 100644 Documentation/ABI/obsolete/procfs-i8k

diff --git a/Documentation/ABI/obsolete/procfs-i8k b/Documentation/ABI/obsolete/procfs-i8k
new file mode 100644
index 0000000000000..32df4d5bdd153
--- /dev/null
+++ b/Documentation/ABI/obsolete/procfs-i8k
@@ -0,0 +1,10 @@
+What:		/proc/i8k
+Date:		November 2001
+KernelVersion:	2.4.14
+Contact:	Pali Rohár <pali@kernel.org>
+Description:	Legacy interface for getting/setting sensor information like
+		fan speed, temperature, serial number, hotkey status etc
+		on Dell Laptops.
+		Since the driver is now using the standard hwmon sysfs interface,
+		the procfs interface is deprecated.
+Users:		https://github.com/vitorafsr/i8kutils
diff --git a/MAINTAINERS b/MAINTAINERS
index 1ba1e4af2cbc8..293c925b6beb7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5410,6 +5410,7 @@ F:	drivers/platform/x86/dell/dell-rbtn.*
 DELL LAPTOP SMM DRIVER
 M:	Pali Rohár <pali@kernel.org>
 S:	Maintained
+F:	Documentation/ABI/obsolete/procfs-i8k
 F:	drivers/hwmon/dell-smm-hwmon.c
 F:	include/uapi/linux/i8k.h
 
-- 
GitLab


From 848da7b58796a2583e88dfa0c974bcaa7e40fbec Mon Sep 17 00:00:00 2001
From: Anthony DeRossi <ajderossi@gmail.com>
Date: Mon, 10 Jan 2022 21:18:42 -0800
Subject: [PATCH 0727/1586] hwmon: (asus_wmi_ec_sensors) Support T_Sensor on
 Prime X570-Pro

Asus Prime X570-Pro motherboards have a T_Sensor header that can be
connected to an optional temperature probe.

Signed-off-by: Anthony DeRossi <ajderossi@gmail.com>
Link: https://lore.kernel.org/r/20220111051842.25634-1-ajderossi@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus_wmi_ec_sensors.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/asus_wmi_ec_sensors.c b/drivers/hwmon/asus_wmi_ec_sensors.c
index 22a1459305a79..a3a2f014dec03 100644
--- a/drivers/hwmon/asus_wmi_ec_sensors.c
+++ b/drivers/hwmon/asus_wmi_ec_sensors.c
@@ -112,7 +112,8 @@ struct asus_wmi_data {
 /* boards with EC support */
 static struct asus_wmi_data sensors_board_PW_X570_P = {
 	.known_board_sensors = {
-		SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB, SENSOR_TEMP_VRM,
+		SENSOR_TEMP_CHIPSET, SENSOR_TEMP_CPU, SENSOR_TEMP_MB,
+		SENSOR_TEMP_T_SENSOR, SENSOR_TEMP_VRM,
 		SENSOR_FAN_CHIPSET,
 		SENSOR_MAX
 	},
-- 
GitLab


From 6dd0ea4c565f73dca55a20bc1ab1587a6587d687 Mon Sep 17 00:00:00 2001
From: Denis Pauk <pauk.denis@gmail.com>
Date: Wed, 12 Jan 2022 23:49:17 +0200
Subject: [PATCH 0728/1586] hwmon: (asus_wmi_sensors) add ASUS ROG STRIX B450-F
 GAMING II

ASUS ROG STRIX B450-F GAMING II has support of the same WMI
monitoring method as ASUS ROG STRIX B450-F GAMING.

This commit adds "ASUS ROG STRIX B450-F GAMING II" to
the list of boards that can be monitored using ASUS WMI.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807
Signed-off-by: Denis Pauk <pauk.denis@gmail.com>
Tested-by: Aleksa Savic <savicaleksa83@gmail.com>
Link: https://lore.kernel.org/r/20220112214917.11662-1-pauk.denis@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus_wmi_sensors.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c
index c80eee874b6c0..8fdcb62ae52de 100644
--- a/drivers/hwmon/asus_wmi_sensors.c
+++ b/drivers/hwmon/asus_wmi_sensors.c
@@ -77,6 +77,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = {
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"),
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-E GAMING"),
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING"),
+	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING II"),
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-I GAMING"),
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X399-E GAMING"),
 	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X470-F GAMING"),
-- 
GitLab


From e505e44fb09aa2024c7f1df0551e7c6b50b0f8db Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 17 Jan 2022 17:12:49 +0100
Subject: [PATCH 0729/1586] hwmon: (pmbus) Remove trailing whitespaces from
 Kconfig file

Fix checkpatch issues by removing trailing whitespaces in Kconfig.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/c984b88b136a1cde16ce52c5f818886653b0f84a.1642434222.git.sylv@sylv.io
[groeck: Updated subject]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 41f6cbf96d3b9..c96f7b7338bd7 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -189,8 +189,8 @@ config SENSORS_LTC2978_REGULATOR
 	depends on SENSORS_LTC2978 && REGULATOR
 	help
 	  If you say yes here you get regulator support for Linear Technology
-	  LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880, 
-	  LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686, 
+	  LTC3880, LTC3883, LTC3884, LTC3886, LTC3887, LTC3889, LTC7880,
+	  LTM4644, LTM4675, LTM4676, LTM4677, LTM4678, LTM4680, LTM4686,
 	  and LTM4700.
 
 config SENSORS_LTC3815
-- 
GitLab


From f86380b797834bed6cd55c6c1f49a590356ec828 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jan 2022 19:55:13 -0600
Subject: [PATCH 0730/1586] dt-bindings: hwmon: lm90: Drop Tegra specifics from
 example

There's no need to complicate examples with a platform specific macro.
It also complicates example parsing to figure out the number of
interrupt cells in examples (based on bracketing).

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220119015514.2441231-1-robh@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/hwmon/national,lm90.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
index 6e1d54ff5d5b6..30db92977937b 100644
--- a/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
+++ b/Documentation/devicetree/bindings/hwmon/national,lm90.yaml
@@ -60,7 +60,6 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/gpio/tegra-gpio.h>
     #include <dt-bindings/interrupt-controller/irq.h>
 
     i2c {
@@ -71,8 +70,7 @@ examples:
             compatible = "onnn,nct1008";
             reg = <0x4c>;
             vcc-supply = <&palmas_ldo6_reg>;
-            interrupt-parent = <&gpio>;
-            interrupts = <TEGRA_GPIO(O, 4) IRQ_TYPE_LEVEL_LOW>;
+            interrupts = <4 IRQ_TYPE_LEVEL_LOW>;
             #thermal-sensor-cells = <1>;
         };
     };
-- 
GitLab


From 15b1c188f8cf2ff9f296c9781d1e4fc061aaf371 Mon Sep 17 00:00:00 2001
From: Michael Shych <michaelsh@nvidia.com>
Date: Tue, 18 Jan 2022 09:56:09 +0200
Subject: [PATCH 0731/1586] hwmon: (powr1220) Cosmetic changes

Update code alignments.

Signed-off-by: Michael Shych <michaelsh@nvidia.com>
Link: https://lore.kernel.org/r/20220118075611.10665-2-michaelsh@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/powr1220.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c
index 9e086338dcbaa..542e4a7b02341 100644
--- a/drivers/hwmon/powr1220.c
+++ b/drivers/hwmon/powr1220.c
@@ -111,7 +111,7 @@ static int powr1220_read_adc(struct device *dev, int ch_num)
 	mutex_lock(&data->update_lock);
 
 	if (time_after(jiffies, data->adc_last_updated[ch_num] + HZ) ||
-			!data->adc_valid[ch_num]) {
+	    !data->adc_valid[ch_num]) {
 		/*
 		 * figure out if we need to use the attenuator for
 		 * high inputs or inputs that we don't yet have a measurement
@@ -119,12 +119,12 @@ static int powr1220_read_adc(struct device *dev, int ch_num)
 		 * max reading.
 		 */
 		if (data->adc_maxes[ch_num] > ADC_MAX_LOW_MEASUREMENT_MV ||
-				data->adc_maxes[ch_num] == 0)
+		    data->adc_maxes[ch_num] == 0)
 			adc_range = 1 << 4;
 
 		/* set the attenuator and mux */
 		result = i2c_smbus_write_byte_data(data->client, ADC_MUX,
-				adc_range | ch_num);
+						   adc_range | ch_num);
 		if (result)
 			goto exit;
 
-- 
GitLab


From 915d4664b7158d8d0f44da810186742c69300f02 Mon Sep 17 00:00:00 2001
From: Michael Shych <michaelsh@nvidia.com>
Date: Tue, 18 Jan 2022 09:56:10 +0200
Subject: [PATCH 0732/1586] hwmon: (powr1220) Upgrade driver to support hwmon
 info infrastructure

Reduce code by using devm_hwmon_device_register_with_groups() API by
devm_hwmon_device_register_with_info() API.
The motivation is to reduce code and to allow easy support for similar
devices by the same driver.

Signed-off-by: Michael Shych <michaelsh@nvidia.com>
Reviewed-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20220118075611.10665-3-michaelsh@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/powr1220.c | 207 +++++++++++++++++----------------------
 1 file changed, 92 insertions(+), 115 deletions(-)

diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c
index 542e4a7b02341..0fa1a136eec84 100644
--- a/drivers/hwmon/powr1220.c
+++ b/drivers/hwmon/powr1220.c
@@ -167,135 +167,109 @@ exit:
 	return result;
 }
 
-/* Shows the voltage associated with the specified ADC channel */
-static ssize_t powr1220_voltage_show(struct device *dev,
-				     struct device_attribute *dev_attr,
-				     char *buf)
+static umode_t
+powr1220_is_visible(const void *data, enum hwmon_sensor_types type, u32
+		    attr, int channel)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
-	int adc_val = powr1220_read_adc(dev, attr->index);
-
-	if (adc_val < 0)
-		return adc_val;
+	switch (type) {
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+		case hwmon_in_highest:
+		case hwmon_in_label:
+			return 0444;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
 
-	return sprintf(buf, "%d\n", adc_val);
+	return 0;
 }
 
-/* Shows the maximum setting associated with the specified ADC channel */
-static ssize_t powr1220_max_show(struct device *dev,
-				 struct device_attribute *dev_attr, char *buf)
+static int
+powr1220_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+		     int channel, const char **str)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
-	struct powr1220_data *data = dev_get_drvdata(dev);
+	switch (type) {
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_label:
+			*str = input_names[channel];
+			return 0;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
-	return sprintf(buf, "%d\n", data->adc_maxes[attr->index]);
+	return -EOPNOTSUPP;
 }
 
-/* Shows the label associated with the specified ADC channel */
-static ssize_t powr1220_label_show(struct device *dev,
-				   struct device_attribute *dev_attr,
-				   char *buf)
+static int
+powr1220_read(struct device *dev, enum hwmon_sensor_types type, u32
+	      attr, int channel, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(dev_attr);
+	struct powr1220_data *data = dev_get_drvdata(dev);
+	int ret;
+
+	switch (type) {
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+			ret = powr1220_read_adc(dev, channel);
+			if (ret < 0)
+				return ret;
+			*val = ret;
+			break;
+		case hwmon_in_highest:
+			*val = data->adc_maxes[channel];
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+}
 
-	return sprintf(buf, "%s\n", input_names[attr->index]);
+	return 0;
 }
 
-static SENSOR_DEVICE_ATTR_RO(in0_input, powr1220_voltage, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_input, powr1220_voltage, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_input, powr1220_voltage, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_input, powr1220_voltage, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_input, powr1220_voltage, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_input, powr1220_voltage, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_input, powr1220_voltage, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_input, powr1220_voltage, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_input, powr1220_voltage, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_input, powr1220_voltage, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_input, powr1220_voltage, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_input, powr1220_voltage, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_input, powr1220_voltage, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_input, powr1220_voltage, VCCINP);
-
-static SENSOR_DEVICE_ATTR_RO(in0_highest, powr1220_max, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_highest, powr1220_max, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_highest, powr1220_max, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_highest, powr1220_max, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_highest, powr1220_max, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_highest, powr1220_max, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_highest, powr1220_max, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_highest, powr1220_max, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_highest, powr1220_max, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_highest, powr1220_max, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_highest, powr1220_max, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_highest, powr1220_max, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_highest, powr1220_max, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_highest, powr1220_max, VCCINP);
-
-static SENSOR_DEVICE_ATTR_RO(in0_label, powr1220_label, VMON1);
-static SENSOR_DEVICE_ATTR_RO(in1_label, powr1220_label, VMON2);
-static SENSOR_DEVICE_ATTR_RO(in2_label, powr1220_label, VMON3);
-static SENSOR_DEVICE_ATTR_RO(in3_label, powr1220_label, VMON4);
-static SENSOR_DEVICE_ATTR_RO(in4_label, powr1220_label, VMON5);
-static SENSOR_DEVICE_ATTR_RO(in5_label, powr1220_label, VMON6);
-static SENSOR_DEVICE_ATTR_RO(in6_label, powr1220_label, VMON7);
-static SENSOR_DEVICE_ATTR_RO(in7_label, powr1220_label, VMON8);
-static SENSOR_DEVICE_ATTR_RO(in8_label, powr1220_label, VMON9);
-static SENSOR_DEVICE_ATTR_RO(in9_label, powr1220_label, VMON10);
-static SENSOR_DEVICE_ATTR_RO(in10_label, powr1220_label, VMON11);
-static SENSOR_DEVICE_ATTR_RO(in11_label, powr1220_label, VMON12);
-static SENSOR_DEVICE_ATTR_RO(in12_label, powr1220_label, VCCA);
-static SENSOR_DEVICE_ATTR_RO(in13_label, powr1220_label, VCCINP);
-
-static struct attribute *powr1220_attrs[] = {
-	&sensor_dev_attr_in0_input.dev_attr.attr,
-	&sensor_dev_attr_in1_input.dev_attr.attr,
-	&sensor_dev_attr_in2_input.dev_attr.attr,
-	&sensor_dev_attr_in3_input.dev_attr.attr,
-	&sensor_dev_attr_in4_input.dev_attr.attr,
-	&sensor_dev_attr_in5_input.dev_attr.attr,
-	&sensor_dev_attr_in6_input.dev_attr.attr,
-	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-	&sensor_dev_attr_in10_input.dev_attr.attr,
-	&sensor_dev_attr_in11_input.dev_attr.attr,
-	&sensor_dev_attr_in12_input.dev_attr.attr,
-	&sensor_dev_attr_in13_input.dev_attr.attr,
-
-	&sensor_dev_attr_in0_highest.dev_attr.attr,
-	&sensor_dev_attr_in1_highest.dev_attr.attr,
-	&sensor_dev_attr_in2_highest.dev_attr.attr,
-	&sensor_dev_attr_in3_highest.dev_attr.attr,
-	&sensor_dev_attr_in4_highest.dev_attr.attr,
-	&sensor_dev_attr_in5_highest.dev_attr.attr,
-	&sensor_dev_attr_in6_highest.dev_attr.attr,
-	&sensor_dev_attr_in7_highest.dev_attr.attr,
-	&sensor_dev_attr_in8_highest.dev_attr.attr,
-	&sensor_dev_attr_in9_highest.dev_attr.attr,
-	&sensor_dev_attr_in10_highest.dev_attr.attr,
-	&sensor_dev_attr_in11_highest.dev_attr.attr,
-	&sensor_dev_attr_in12_highest.dev_attr.attr,
-	&sensor_dev_attr_in13_highest.dev_attr.attr,
-
-	&sensor_dev_attr_in0_label.dev_attr.attr,
-	&sensor_dev_attr_in1_label.dev_attr.attr,
-	&sensor_dev_attr_in2_label.dev_attr.attr,
-	&sensor_dev_attr_in3_label.dev_attr.attr,
-	&sensor_dev_attr_in4_label.dev_attr.attr,
-	&sensor_dev_attr_in5_label.dev_attr.attr,
-	&sensor_dev_attr_in6_label.dev_attr.attr,
-	&sensor_dev_attr_in7_label.dev_attr.attr,
-	&sensor_dev_attr_in8_label.dev_attr.attr,
-	&sensor_dev_attr_in9_label.dev_attr.attr,
-	&sensor_dev_attr_in10_label.dev_attr.attr,
-	&sensor_dev_attr_in11_label.dev_attr.attr,
-	&sensor_dev_attr_in12_label.dev_attr.attr,
-	&sensor_dev_attr_in13_label.dev_attr.attr,
+static const struct hwmon_channel_info *powr1220_info[] = {
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_HIGHEST | HWMON_I_LABEL),
 
 	NULL
 };
 
-ATTRIBUTE_GROUPS(powr1220);
+static const struct hwmon_ops powr1220_hwmon_ops = {
+	.read = powr1220_read,
+	.read_string = powr1220_read_string,
+	.is_visible = powr1220_is_visible,
+};
+
+static const struct hwmon_chip_info powr1220_chip_info = {
+	.ops = &powr1220_hwmon_ops,
+	.info = powr1220_info,
+};
 
 static int powr1220_probe(struct i2c_client *client)
 {
@@ -312,8 +286,11 @@ static int powr1220_probe(struct i2c_client *client)
 	mutex_init(&data->update_lock);
 	data->client = client;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
-			client->name, data, powr1220_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(&client->dev,
+							 client->name,
+							 data,
+							 &powr1220_chip_info,
+							 NULL);
 
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
-- 
GitLab


From 9f93aa1005fa1b960f10e0ee3ed8c4e697526053 Mon Sep 17 00:00:00 2001
From: Michael Shych <michaelsh@nvidia.com>
Date: Tue, 18 Jan 2022 09:56:11 +0200
Subject: [PATCH 0733/1586] hwmon: (powr1220) Add support for Lattice's
 POWR1014 power manager IC

This patch adds support for Lattice's POWR1014 power manager IC.
Read access to all the ADCs on the chip are supported through
the "hwmon" "sysfs" files.

The main differences of POWR1014 compared to POWR1220 are
amount of VMON input lines: 10 on POWR1014 and 12 lines on POWR1220 and
number of output control signals: 14 on POWR1014 and 20 on POWR1220.

Signed-off-by: Michael Shych <michaelsh@nvidia.com>
Reviewed-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20220118075611.10665-4-michaelsh@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/powr1220.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/powr1220.c b/drivers/hwmon/powr1220.c
index 0fa1a136eec84..f77dc6db31ac5 100644
--- a/drivers/hwmon/powr1220.c
+++ b/drivers/hwmon/powr1220.c
@@ -22,6 +22,8 @@
 #define ADC_STEP_MV			2
 #define ADC_MAX_LOW_MEASUREMENT_MV	2000
 
+enum powr1xxx_chips { powr1014, powr1220 };
+
 enum powr1220_regs {
 	VMON_STATUS0,
 	VMON_STATUS1,
@@ -74,6 +76,7 @@ enum powr1220_adc_values {
 struct powr1220_data {
 	struct i2c_client *client;
 	struct mutex update_lock;
+	u8 max_channels;
 	bool adc_valid[MAX_POWR1220_ADC_VALUES];
 	 /* the next value is in jiffies */
 	unsigned long adc_last_updated[MAX_POWR1220_ADC_VALUES];
@@ -171,6 +174,11 @@ static umode_t
 powr1220_is_visible(const void *data, enum hwmon_sensor_types type, u32
 		    attr, int channel)
 {
+	struct powr1220_data *chip_data = (struct powr1220_data *)data;
+
+	if (channel >= chip_data->max_channels)
+		return 0;
+
 	switch (type) {
 	case hwmon_in:
 		switch (attr) {
@@ -271,6 +279,8 @@ static const struct hwmon_chip_info powr1220_chip_info = {
 	.info = powr1220_info,
 };
 
+static const struct i2c_device_id powr1220_ids[];
+
 static int powr1220_probe(struct i2c_client *client)
 {
 	struct powr1220_data *data;
@@ -283,6 +293,15 @@ static int powr1220_probe(struct i2c_client *client)
 	if (!data)
 		return -ENOMEM;
 
+	switch (i2c_match_id(powr1220_ids, client)->driver_data) {
+	case powr1014:
+		data->max_channels = 10;
+		break;
+	default:
+		data->max_channels = 12;
+		break;
+	}
+
 	mutex_init(&data->update_lock);
 	data->client = client;
 
@@ -296,7 +315,8 @@ static int powr1220_probe(struct i2c_client *client)
 }
 
 static const struct i2c_device_id powr1220_ids[] = {
-	{ "powr1220", 0, },
+	{ "powr1014", powr1014, },
+	{ "powr1220", powr1220, },
 	{ }
 };
 
-- 
GitLab


From 07320c91565658e117f2f86a190eec9bb64abeb6 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Mon, 31 Jan 2022 16:27:40 +0000
Subject: [PATCH 0734/1586] hwmon: Fix possible NULL pointer

The recent addition of the label attribute added some code that read the
"label" device property, without checking first that "dev" was non-NULL.

Fix this issue by first checking that "dev" is non-NULL.

Fixes: ccd98cba6a18 ("hwmon: Add "label" attribute")
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 6c3a8c65390d8..0d6c6809f26cc 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -809,7 +809,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 		hdev->groups = groups;
 	}
 
-	if (device_property_present(dev, "label")) {
+	if (dev && device_property_present(dev, "label")) {
 		err = device_property_read_string(dev, "label", &label);
 		if (err < 0)
 			goto free_hwmon;
-- 
GitLab


From 4db3c09228a0899cdd791b4e34cd102a3288fd8e Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 31 Jan 2022 22:19:32 +0100
Subject: [PATCH 0735/1586] hwmon: (sch56xx) Autoload modules on platform
 device creation

Right now, when sch56xx-common has detected a SCH5627/SCH5636
superio chip, the corresponding module is not automatically
loaded.
Fix that by adding the necessary device tables to both modules.

Tested on a Fujitsu Esprimo P720.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220131211935.3656-2-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch5627.c | 10 ++++++++++
 drivers/hwmon/sch5636.c | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c
index 8f1b569c69e7b..72c3f6757e348 100644
--- a/drivers/hwmon/sch5627.c
+++ b/drivers/hwmon/sch5627.c
@@ -7,6 +7,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
@@ -456,11 +457,20 @@ static int sch5627_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct platform_device_id sch5627_device_id[] = {
+	{
+		.name = "sch5627",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, sch5627_device_id);
+
 static struct platform_driver sch5627_driver = {
 	.driver = {
 		.name	= DRVNAME,
 	},
 	.probe		= sch5627_probe,
+	.id_table	= sch5627_device_id,
 };
 
 module_platform_driver(sch5627_driver);
diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c
index 39ff1c9b1df54..269757bc3a9e0 100644
--- a/drivers/hwmon/sch5636.c
+++ b/drivers/hwmon/sch5636.c
@@ -7,6 +7,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
@@ -501,12 +502,21 @@ error:
 	return err;
 }
 
+static const struct platform_device_id sch5636_device_id[] = {
+	{
+		.name = "sch5636",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, sch5636_device_id);
+
 static struct platform_driver sch5636_driver = {
 	.driver = {
 		.name	= DRVNAME,
 	},
 	.probe		= sch5636_probe,
 	.remove		= sch5636_remove,
+	.id_table	= sch5636_device_id,
 };
 
 module_platform_driver(sch5636_driver);
-- 
GitLab


From 393935baa45e5ccb9603cf7f9f020ed1bc0915f7 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 31 Jan 2022 22:19:33 +0100
Subject: [PATCH 0736/1586] hwmon: (sch56xx-common) Add automatic module
 loading on supported devices

This patch enables the sch56xx-common module to get automatically
loaded on supported machines.
If a machine supports Fujitsu's SCH56XX-based hardware monitoring
solutions, it contains a "Antiope"/" Antiope" dmi onboard device
in case of the sch5627 or a "Theseus"/" Theseus" dmi onboard device
in case of the sch5636.
Since some machines like the Esprimo C700 have a seemingly faulty
DMI table containing both onboard devices, the driver still needs
to probe for the individual superio chip, which in presence of at
least one DMI onboard device however can be considered safe.
Also add a module parameter allowing for bypassing the
DMI check.

Tested on a Fujitsu Esprimo P720.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220131211935.3656-3-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 40 ++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 40cdadad35e52..0172aa16dc0cc 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -7,8 +7,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/dmi.h>
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/acpi.h>
@@ -19,7 +21,10 @@
 #include <linux/slab.h>
 #include "sch56xx-common.h"
 
-/* Insmod parameters */
+static bool ignore_dmi;
+module_param(ignore_dmi, bool, 0);
+MODULE_PARM_DESC(ignore_dmi, "Omit DMI check for supported devices (default=0)");
+
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
@@ -518,11 +523,42 @@ static int __init sch56xx_device_add(int address, const char *name)
 	return PTR_ERR_OR_ZERO(sch56xx_pdev);
 }
 
+/* For autoloading only */
+static const struct dmi_system_id sch56xx_dmi_table[] __initconst = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+		},
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(dmi, sch56xx_dmi_table);
+
 static int __init sch56xx_init(void)
 {
-	int address;
 	const char *name = NULL;
+	int address;
 
+	if (!ignore_dmi) {
+		if (!dmi_check_system(sch56xx_dmi_table))
+			return -ENODEV;
+
+		/*
+		 * Some machines like the Esprimo P720 and Esprimo C700 have
+		 * onboard devices named " Antiope"/" Theseus" instead of
+		 * "Antiope"/"Theseus", so we need to check for both.
+		 */
+		if (!dmi_find_device(DMI_DEV_TYPE_OTHER, "Antiope", NULL) &&
+		    !dmi_find_device(DMI_DEV_TYPE_OTHER, " Antiope", NULL) &&
+		    !dmi_find_device(DMI_DEV_TYPE_OTHER, "Theseus", NULL) &&
+		    !dmi_find_device(DMI_DEV_TYPE_OTHER, " Theseus", NULL))
+			return -ENODEV;
+	}
+
+	/*
+	 * Some devices like the Esprimo C700 have both onboard devices,
+	 * so we still have to check manually
+	 */
 	address = sch56xx_find(0x4e, &name);
 	if (address < 0)
 		address = sch56xx_find(0x2e, &name);
-- 
GitLab


From 799c3e1e5348effda4312764bb484a084d561949 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 31 Jan 2022 22:19:34 +0100
Subject: [PATCH 0737/1586] hwmon: (sch56xx-common) Replace msleep() with
 usleep_range()

msleep(1) will often sleep more than 20ms, slowing down sensor
and watchdog reads/writes. Use usleep_range() as recommended
in timers-howto.rst to fix that.

Tested on a Fujitsu Esprimo P720.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220131211935.3656-4-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 0172aa16dc0cc..82602b74c7edb 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -139,7 +139,7 @@ static int sch56xx_send_cmd(u16 addr, u8 cmd, u16 reg, u8 v)
 	/* EM Interface Polling "Algorithm" */
 	for (i = 0; i < max_busy_polls + max_lazy_polls; i++) {
 		if (i >= max_busy_polls)
-			msleep(1);
+			usleep_range(1000, 2000);
 		/* Read Interrupt source Register */
 		val = inb(addr + 8);
 		/* Write Clear the interrupt source bits */
-- 
GitLab


From 647d6f09bea7dacf4cdb6d4ea7e3051883955297 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 31 Jan 2022 22:19:35 +0100
Subject: [PATCH 0738/1586] hwmon: (sch56xx-common) Replace WDOG_ACTIVE with
 WDOG_HW_RUNNING

If the watchdog was already enabled by the BIOS after booting, the
watchdog infrastructure needs to regularly send keepalives to
prevent a unexpected reset.
WDOG_ACTIVE only serves as an status indicator for userspace,
we want to use WDOG_HW_RUNNING instead.

Since my Fujitsu Esprimo P720 does not support the watchdog,
this change is compile-tested only.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Fixes: fb551405c0f8 (watchdog: sch56xx: Use watchdog core)
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220131211935.3656-5-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 82602b74c7edb..3ece53adabd62 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -427,7 +427,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
 	data->wddev.max_timeout = 255 * 60;
 	watchdog_set_nowayout(&data->wddev, nowayout);
 	if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)
-		set_bit(WDOG_ACTIVE, &data->wddev.status);
+		set_bit(WDOG_HW_RUNNING, &data->wddev.status);
 
 	/* Since the watchdog uses a downcounter there is no register to read
 	   the BIOS set timeout from (if any was set at all) ->
-- 
GitLab


From d0ddfd241e5719d696bc0b081e260db69d368668 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Mon, 24 Jan 2022 02:56:43 +0100
Subject: [PATCH 0739/1586] hwmon: (asus-ec-sensors) add driver for ASUS EC

This driver provides the same data as the asus_wmi_ec_sensors driver
(and gets it from the same source) but does not use WMI, polling
the ACPI EC directly.

That provides two enhancements: sensor reading became quicker (on some
systems or kernel configuration it took almost a full second to read
all the sensors, that transfers less than 15 bytes of data), the driver
became more flexible. The driver now relies on ACPI mutex to lock access
to the EC in the same way as the WMI code does.

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220124015658.687309-2-eugene.shalygin@gmail.com
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Denis Pauk <pauk.denis@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 MAINTAINERS                     |   6 +
 drivers/hwmon/Kconfig           |  11 +
 drivers/hwmon/Makefile          |   1 +
 drivers/hwmon/asus-ec-sensors.c | 694 ++++++++++++++++++++++++++++++++
 4 files changed, 712 insertions(+)
 create mode 100644 drivers/hwmon/asus-ec-sensors.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 293c925b6beb7..27a87a6646357 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3056,6 +3056,12 @@ L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/asus_wmi_ec_sensors.c
 
+ASUS EC HARDWARE MONITOR DRIVER
+M:	Eugene Shalygin <eugene.shalygin@gmail.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	drivers/hwmon/asus-ec-sensors.c
+
 ASUS WIRELESS RADIO CONTROL DRIVER
 M:	João Paulo Rechi Vita <jprvita@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 01ab80a2cc4a1..b75ec220da5ad 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -2263,6 +2263,17 @@ config SENSORS_ASUS_WMI_EC
 	  This driver can also be built as a module. If so, the module
 	  will be called asus_wmi_sensors_ec.
 
+config SENSORS_ASUS_EC
+	tristate "ASUS EC Sensors"
+	help
+	  If you say yes here you get support for the ACPI embedded controller
+	  hardware monitoring interface found in ASUS motherboards. The driver
+	  currently supports B550/X570 boards, although other ASUS boards might
+	  provide this monitoring interface as well.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called asus_ec_sensors.
+
 endif # ACPI
 
 endif # HWMON
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 185f946d698b0..7bba3415ca3df 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_HWMON_VID)		+= hwmon-vid.o
 # APCI drivers
 obj-$(CONFIG_SENSORS_ACPI_POWER) += acpi_power_meter.o
 obj-$(CONFIG_SENSORS_ATK0110)	+= asus_atk0110.o
+obj-$(CONFIG_SENSORS_ASUS_EC)	+= asus-ec-sensors.o
 obj-$(CONFIG_SENSORS_ASUS_WMI)	+= asus_wmi_sensors.o
 obj-$(CONFIG_SENSORS_ASUS_WMI_EC)	+= asus_wmi_ec_sensors.o
 
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
new file mode 100644
index 0000000000000..7285334c7d801
--- /dev/null
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * HWMON driver for ASUS motherboards that publish some sensor values
+ * via the embedded controller registers.
+ *
+ * Copyright (C) 2021 Eugene Shalygin <eugene.shalygin@gmail.com>
+
+ * EC provides:
+ * - Chipset temperature
+ * - CPU temperature
+ * - Motherboard temperature
+ * - T_Sensor temperature
+ * - VRM temperature
+ * - Water In temperature
+ * - Water Out temperature
+ * - CPU Optional fan RPM
+ * - Chipset fan RPM
+ * - VRM Heat Sink fan RPM
+ * - Water Flow fan RPM
+ * - CPU current
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/dev_printk.h>
+#include <linux/dmi.h>
+#include <linux/hwmon.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sort.h>
+#include <linux/units.h>
+
+#include <asm/unaligned.h>
+
+static char *mutex_path_override;
+
+/* Writing to this EC register switches EC bank */
+#define ASUS_EC_BANK_REGISTER	0xff
+#define SENSOR_LABEL_LEN	16
+
+/*
+ * Arbitrary set max. allowed bank number. Required for sorting banks and
+ * currently is overkill with just 2 banks used at max, but for the sake
+ * of alignment let's set it to a higher value.
+ */
+#define ASUS_EC_MAX_BANK	3
+
+#define ACPI_LOCK_DELAY_MS	500
+
+/* ACPI mutex for locking access to the EC for the firmware */
+#define ASUS_HW_ACCESS_MUTEX_ASMX	"\\AMW0.ASMX"
+
+/* There are two variants of the vendor spelling */
+#define VENDOR_ASUS_UPPER_CASE	"ASUSTeK COMPUTER INC."
+
+typedef union {
+	u32 value;
+	struct {
+		u8 index;
+		u8 bank;
+		u8 size;
+		u8 dummy;
+	} components;
+} sensor_address;
+
+#define MAKE_SENSOR_ADDRESS(size, bank, index) {                               \
+		.value = (size << 16) + (bank << 8) + index                    \
+	}
+
+static u32 hwmon_attributes[hwmon_max] = {
+	[hwmon_chip] = HWMON_C_REGISTER_TZ,
+	[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
+	[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
+	[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
+	[hwmon_fan] = HWMON_F_INPUT | HWMON_F_LABEL,
+};
+
+struct ec_sensor_info {
+	char label[SENSOR_LABEL_LEN];
+	enum hwmon_sensor_types type;
+	sensor_address addr;
+};
+
+#define EC_SENSOR(sensor_label, sensor_type, size, bank, index) {              \
+		.label = sensor_label, .type = sensor_type,                    \
+		.addr = MAKE_SENSOR_ADDRESS(size, bank, index),                \
+	}
+
+enum ec_sensors {
+	/* chipset temperature [℃] */
+	ec_sensor_temp_chipset,
+	/* CPU temperature [℃] */
+	ec_sensor_temp_cpu,
+	/* motherboard temperature [℃] */
+	ec_sensor_temp_mb,
+	/* "T_Sensor" temperature sensor reading [℃] */
+	ec_sensor_temp_t_sensor,
+	/* VRM temperature [℃] */
+	ec_sensor_temp_vrm,
+	/* CPU_Opt fan [RPM] */
+	ec_sensor_fan_cpu_opt,
+	/* VRM heat sink fan [RPM] */
+	ec_sensor_fan_vrm_hs,
+	/* Chipset fan [RPM] */
+	ec_sensor_fan_chipset,
+	/* Water flow sensor reading [RPM] */
+	ec_sensor_fan_water_flow,
+	/* CPU current [A] */
+	ec_sensor_curr_cpu,
+	/* "Water_In" temperature sensor reading [℃] */
+	ec_sensor_temp_water_in,
+	/* "Water_Out" temperature sensor reading [℃] */
+	ec_sensor_temp_water_out,
+};
+
+#define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset)
+#define SENSOR_TEMP_CPU BIT(ec_sensor_temp_cpu)
+#define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb)
+#define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor)
+#define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm)
+#define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt)
+#define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs)
+#define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset)
+#define SENSOR_FAN_WATER_FLOW BIT(ec_sensor_fan_water_flow)
+#define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu)
+#define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in)
+#define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out)
+
+/* All the known sensors for ASUS EC controllers */
+static const struct ec_sensor_info known_ec_sensors[] = {
+	[ec_sensor_temp_chipset] =
+		EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a),
+	[ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b),
+	[ec_sensor_temp_mb] =
+		EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c),
+	[ec_sensor_temp_t_sensor] =
+		EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
+	[ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
+	[ec_sensor_fan_cpu_opt] =
+		EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0),
+	[ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2),
+	[ec_sensor_fan_chipset] =
+		EC_SENSOR("Chipset", hwmon_fan, 2, 0x00, 0xb4),
+	[ec_sensor_fan_water_flow] =
+		EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc),
+	[ec_sensor_curr_cpu] = EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4),
+	[ec_sensor_temp_water_in] =
+		EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00),
+	[ec_sensor_temp_water_out] =
+		EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01),
+};
+
+/* Shortcuts for common combinations */
+#define SENSOR_SET_TEMP_CHIPSET_CPU_MB                                         \
+	(SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB)
+#define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT)
+
+#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) {                         \
+	.matches = {                                                           \
+		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor),                     \
+		DMI_EXACT_MATCH(DMI_BOARD_NAME, name),                         \
+	},                                                                     \
+	.driver_data = (void *)(sensors), \
+}
+
+static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+			      "ROG CROSSHAIR VIII DARK HERO",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+		SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+			      "ROG CROSSHAIR VIII FORMULA",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+		SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+			      "ROG CROSSHAIR VIII IMPACT",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+		SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+		SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+		SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING",
+		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
+		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+	{}
+};
+
+struct ec_sensor {
+	unsigned int info_index;
+	u32 cached_value;
+};
+
+struct ec_sensors_data {
+	unsigned long board_sensors;
+	struct ec_sensor *sensors;
+	/* EC registers to read from */
+	u16 *registers;
+	u8 *read_buffer;
+	/* sorted list of unique register banks */
+	u8 banks[ASUS_EC_MAX_BANK + 1];
+	/* in jiffies */
+	unsigned long last_updated;
+	acpi_handle aml_mutex;
+	/* number of board EC sensors */
+	u8 nr_sensors;
+	/*
+	 * number of EC registers to read
+	 * (sensor might span more than 1 register)
+	 */
+	u8 nr_registers;
+	/* number of unique register banks */
+	u8 nr_banks;
+};
+
+static u8 register_bank(u16 reg)
+{
+	return reg >> 8;
+}
+
+static u8 register_index(u16 reg)
+{
+	return reg & 0x00ff;
+}
+
+static const struct ec_sensor_info *
+get_sensor_info(const struct ec_sensors_data *state, int index)
+{
+	return &known_ec_sensors[state->sensors[index].info_index];
+}
+
+static int find_ec_sensor_index(const struct ec_sensors_data *ec,
+				enum hwmon_sensor_types type, int channel)
+{
+	unsigned int i;
+
+	for (i = 0; i < ec->nr_sensors; i++) {
+		if (get_sensor_info(ec, i)->type == type) {
+			if (channel == 0)
+				return i;
+			channel--;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __init bank_compare(const void *a, const void *b)
+{
+	return *((const s8 *)a) - *((const s8 *)b);
+}
+
+static int __init board_sensors_count(unsigned long sensors)
+{
+	return hweight_long(sensors);
+}
+
+static void __init setup_sensor_data(struct ec_sensors_data *ec)
+{
+	struct ec_sensor *s = ec->sensors;
+	bool bank_found;
+	int i, j;
+	u8 bank;
+
+	ec->nr_banks = 0;
+	ec->nr_registers = 0;
+
+	for_each_set_bit(i, &ec->board_sensors,
+			  BITS_PER_TYPE(ec->board_sensors)) {
+		s->info_index = i;
+		s->cached_value = 0;
+		ec->nr_registers +=
+			known_ec_sensors[s->info_index].addr.components.size;
+		bank_found = false;
+		bank = known_ec_sensors[s->info_index].addr.components.bank;
+		for (j = 0; j < ec->nr_banks; j++) {
+			if (ec->banks[j] == bank) {
+				bank_found = true;
+				break;
+			}
+		}
+		if (!bank_found) {
+			ec->banks[ec->nr_banks++] = bank;
+		}
+		s++;
+	}
+	sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL);
+}
+
+static void __init fill_ec_registers(struct ec_sensors_data *ec)
+{
+	const struct ec_sensor_info *si;
+	unsigned int i, j, register_idx = 0;
+
+	for (i = 0; i < ec->nr_sensors; ++i) {
+		si = get_sensor_info(ec, i);
+		for (j = 0; j < si->addr.components.size; ++j, ++register_idx) {
+			ec->registers[register_idx] =
+				(si->addr.components.bank << 8) +
+				si->addr.components.index + j;
+		}
+	}
+}
+
+static acpi_handle __init asus_hw_access_mutex(struct device *dev)
+{
+	const char *mutex_path;
+	acpi_handle res;
+	int status;
+
+	mutex_path = mutex_path_override ?
+		mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX;
+
+	status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev,
+			"Could not get hardware access guard mutex '%s': error %d",
+			mutex_path, status);
+		return NULL;
+	}
+	return res;
+}
+
+static int asus_ec_bank_switch(u8 bank, u8 *old)
+{
+	int status = 0;
+
+	if (old) {
+		status = ec_read(ASUS_EC_BANK_REGISTER, old);
+	}
+	if (status || (old && (*old == bank)))
+		return status;
+	return ec_write(ASUS_EC_BANK_REGISTER, bank);
+}
+
+static int asus_ec_block_read(const struct device *dev,
+			      struct ec_sensors_data *ec)
+{
+	int ireg, ibank, status;
+	u8 bank, reg_bank, prev_bank;
+
+	bank = 0;
+	status = asus_ec_bank_switch(bank, &prev_bank);
+	if (status) {
+		dev_warn(dev, "EC bank switch failed");
+		return status;
+	}
+
+	if (prev_bank) {
+		/* oops... somebody else is working with the EC too */
+		dev_warn(dev,
+			"Concurrent access to the ACPI EC detected.\nRace condition possible.");
+	}
+
+	/* read registers minimizing bank switches. */
+	for (ibank = 0; ibank < ec->nr_banks; ibank++) {
+		if (bank != ec->banks[ibank]) {
+			bank = ec->banks[ibank];
+			if (asus_ec_bank_switch(bank, NULL)) {
+				dev_warn(dev, "EC bank switch to %d failed",
+					 bank);
+				break;
+			}
+		}
+		for (ireg = 0; ireg < ec->nr_registers; ireg++) {
+			reg_bank = register_bank(ec->registers[ireg]);
+			if (reg_bank < bank) {
+				continue;
+			}
+			ec_read(register_index(ec->registers[ireg]),
+				ec->read_buffer + ireg);
+		}
+	}
+
+	status = asus_ec_bank_switch(prev_bank, NULL);
+	return status;
+}
+
+static inline u32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
+{
+	switch (si->addr.components.size) {
+	case 1:
+		return *data;
+	case 2:
+		return get_unaligned_be16(data);
+	case 4:
+		return get_unaligned_be32(data);
+	default:
+		return 0;
+	}
+}
+
+static void update_sensor_values(struct ec_sensors_data *ec, u8 *data)
+{
+	const struct ec_sensor_info *si;
+	struct ec_sensor *s;
+
+	for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) {
+		si = &known_ec_sensors[s->info_index];
+		s->cached_value = get_sensor_value(si, data);
+		data += si->addr.components.size;
+	}
+}
+
+static int update_ec_sensors(const struct device *dev,
+			     struct ec_sensors_data *ec)
+{
+	int status;
+
+	/*
+	 * ASUS DSDT does not specify that access to the EC has to be guarded,
+	 * but firmware does access it via ACPI
+	 */
+	if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL,
+					    ACPI_LOCK_DELAY_MS))) {
+		dev_err(dev, "Failed to acquire AML mutex");
+		status = -EBUSY;
+		goto cleanup;
+	}
+
+	status = asus_ec_block_read(dev, ec);
+
+	if (!status) {
+		update_sensor_values(ec, ec->read_buffer);
+	}
+	if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) {
+		dev_err(dev, "Failed to release AML mutex");
+	}
+cleanup:
+	return status;
+}
+
+static int scale_sensor_value(u32 value, int data_type)
+{
+	switch (data_type) {
+	case hwmon_curr:
+	case hwmon_temp:
+	case hwmon_in:
+		return value * MILLI;
+	default:
+		return value;
+	}
+}
+
+static int get_cached_value_or_update(const struct device *dev,
+				      int sensor_index,
+				      struct ec_sensors_data *state, u32 *value)
+{
+	if (time_after(jiffies, state->last_updated + HZ)) {
+		if (update_ec_sensors(dev, state)) {
+			dev_err(dev, "update_ec_sensors() failure\n");
+			return -EIO;
+		}
+
+		state->last_updated = jiffies;
+	}
+
+	*value = state->sensors[sensor_index].cached_value;
+	return 0;
+}
+
+/*
+ * Now follow the functions that implement the hwmon interface
+ */
+
+static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			      u32 attr, int channel, long *val)
+{
+	int ret;
+	u32 value = 0;
+
+	struct ec_sensors_data *state = dev_get_drvdata(dev);
+	int sidx = find_ec_sensor_index(state, type, channel);
+
+	if (sidx < 0) {
+		return sidx;
+	}
+
+	ret = get_cached_value_or_update(dev, sidx, state, &value);
+	if (!ret) {
+		*val = scale_sensor_value(value,
+					  get_sensor_info(state, sidx)->type);
+	}
+
+	return ret;
+}
+
+static int asus_ec_hwmon_read_string(struct device *dev,
+				     enum hwmon_sensor_types type, u32 attr,
+				     int channel, const char **str)
+{
+	struct ec_sensors_data *state = dev_get_drvdata(dev);
+	int sensor_index = find_ec_sensor_index(state, type, channel);
+	*str = get_sensor_info(state, sensor_index)->label;
+
+	return 0;
+}
+
+static umode_t asus_ec_hwmon_is_visible(const void *drvdata,
+					enum hwmon_sensor_types type, u32 attr,
+					int channel)
+{
+	const struct ec_sensors_data *state = drvdata;
+
+	return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0;
+}
+
+static int __init
+asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan,
+			     struct device *dev, int num,
+			     enum hwmon_sensor_types type, u32 config)
+{
+	int i;
+	u32 *cfg = devm_kcalloc(dev, num + 1, sizeof(*cfg), GFP_KERNEL);
+
+	if (!cfg)
+		return -ENOMEM;
+
+	asus_ec_hwmon_chan->type = type;
+	asus_ec_hwmon_chan->config = cfg;
+	for (i = 0; i < num; i++, cfg++)
+		*cfg = config;
+
+	return 0;
+}
+
+static const struct hwmon_ops asus_ec_hwmon_ops = {
+	.is_visible = asus_ec_hwmon_is_visible,
+	.read = asus_ec_hwmon_read,
+	.read_string = asus_ec_hwmon_read_string,
+};
+
+static struct hwmon_chip_info asus_ec_chip_info = {
+	.ops = &asus_ec_hwmon_ops,
+};
+
+static unsigned long __init
+get_board_sensors(const struct device *dev)
+{
+	const struct dmi_system_id *dmi_entry;
+
+	dmi_entry = dmi_first_match(asus_ec_dmi_table);
+	if (!dmi_entry) {
+		dev_info(dev, "Unsupported board");
+		return 0;
+	}
+
+	return (unsigned long)dmi_entry->driver_data;
+}
+
+static int __init configure_sensor_setup(struct device *dev)
+{
+	struct ec_sensors_data *ec_data = dev_get_drvdata(dev);
+	int nr_count[hwmon_max] = { 0 }, nr_types = 0;
+	struct device *hwdev;
+	struct hwmon_channel_info *asus_ec_hwmon_chan;
+	const struct hwmon_channel_info **ptr_asus_ec_ci;
+	const struct hwmon_chip_info *chip_info;
+	const struct ec_sensor_info *si;
+	enum hwmon_sensor_types type;
+	unsigned int i;
+
+	ec_data->board_sensors = get_board_sensors(dev);
+	if (!ec_data->board_sensors) {
+		return -ENODEV;
+	}
+
+	ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors);
+	ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
+					sizeof(struct ec_sensor), GFP_KERNEL);
+
+	setup_sensor_data(ec_data);
+	ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers,
+					  sizeof(u16), GFP_KERNEL);
+	ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers,
+					    sizeof(u8), GFP_KERNEL);
+
+	if (!ec_data->registers || !ec_data->read_buffer) {
+		return -ENOMEM;
+	}
+
+	fill_ec_registers(ec_data);
+
+	ec_data->aml_mutex = asus_hw_access_mutex(dev);
+
+	for (i = 0; i < ec_data->nr_sensors; ++i) {
+		si = get_sensor_info(ec_data, i);
+		if (!nr_count[si->type])
+			++nr_types;
+		++nr_count[si->type];
+	}
+
+	if (nr_count[hwmon_temp])
+		nr_count[hwmon_chip]++, nr_types++;
+
+	asus_ec_hwmon_chan = devm_kcalloc(
+		dev, nr_types, sizeof(*asus_ec_hwmon_chan), GFP_KERNEL);
+	if (!asus_ec_hwmon_chan)
+		return -ENOMEM;
+
+	ptr_asus_ec_ci = devm_kcalloc(dev, nr_types + 1,
+				       sizeof(*ptr_asus_ec_ci), GFP_KERNEL);
+	if (!ptr_asus_ec_ci)
+		return -ENOMEM;
+
+	asus_ec_chip_info.info = ptr_asus_ec_ci;
+	chip_info = &asus_ec_chip_info;
+
+	for (type = 0; type < hwmon_max; ++type) {
+		if (!nr_count[type])
+			continue;
+
+		asus_ec_hwmon_add_chan_info(asus_ec_hwmon_chan, dev,
+					     nr_count[type], type,
+					     hwmon_attributes[type]);
+		*ptr_asus_ec_ci++ = asus_ec_hwmon_chan++;
+	}
+
+	dev_info(dev, "board has %d EC sensors that span %d registers",
+		 ec_data->nr_sensors, ec_data->nr_registers);
+
+	hwdev = devm_hwmon_device_register_with_info(dev, "asusec",
+						     ec_data, chip_info, NULL);
+
+	return PTR_ERR_OR_ZERO(hwdev);
+}
+
+static int __init asus_ec_probe(struct platform_device *pdev)
+{
+	struct asus_ec_sensors *state;
+	int status = 0;
+
+	state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data),
+			     GFP_KERNEL);
+
+	if (!state) {
+		return -ENOMEM;
+	}
+
+	dev_set_drvdata(&pdev->dev, state);
+	status = configure_sensor_setup(&pdev->dev);
+	return status;
+}
+
+static const struct acpi_device_id acpi_ec_ids[] = {
+	/* Embedded Controller Device */
+	{ "PNP0C09", 0 },
+	{}
+};
+
+static struct platform_driver asus_ec_sensors_platform_driver = {
+	.driver = {
+		.name	= "asus-ec-sensors",
+		.acpi_match_table = acpi_ec_ids,
+	},
+};
+
+MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table);
+module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe);
+
+module_param_named(mutex_path, mutex_path_override, charp, 0);
+MODULE_PARM_DESC(mutex_path,
+		 "Override ACPI mutex path used to guard access to hardware");
+
+MODULE_AUTHOR("Eugene Shalygin <eugene.shalygin@gmail.com>");
+MODULE_DESCRIPTION(
+	"HWMON driver for sensors accessible via ACPI EC in ASUS motherboards");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 0314c6ac9e98d7551554fe422128726cd2849a8e Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Mon, 24 Jan 2022 02:56:44 +0100
Subject: [PATCH 0740/1586] hwmon: (asus-ec-sensors) update documentation

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220124015658.687309-3-eugene.shalygin@gmail.com
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Denis Pauk <pauk.denis@gmail.com>
[groeck: update index.rst, do not drop asus_wmi_ec_sensors.rst]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/asus_ec_sensors.rst | 52 +++++++++++++++++++++++++
 Documentation/hwmon/index.rst           |  1 +
 2 files changed, 53 insertions(+)
 create mode 100644 Documentation/hwmon/asus_ec_sensors.rst

diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
new file mode 100644
index 0000000000000..b12ac7ebeb1aa
--- /dev/null
+++ b/Documentation/hwmon/asus_ec_sensors.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver asus_ec_sensors
+=================================
+
+Supported boards:
+ * PRIME X570-PRO,
+ * Pro WS X570-ACE,
+ * ROG CROSSHAIR VIII DARK HERO,
+ * ROG CROSSHAIR VIII FORMULA,
+ * ROG CROSSHAIR VIII HERO,
+ * ROG CROSSHAIR VIII IMPACT,
+ * ROG STRIX B550-E GAMING,
+ * ROG STRIX B550-I GAMING,
+ * ROG STRIX X570-E GAMING,
+ * ROG STRIX X570-F GAMING,
+ * ROG STRIX X570-I GAMING
+
+Authors:
+    - Eugene Shalygin <eugene.shalygin@gmail.com>
+
+Description:
+------------
+ASUS mainboards publish hardware monitoring information via Super I/O
+chip and the ACPI embedded controller (EC) registers. Some of the sensors
+are only available via the EC.
+
+The driver is aware of and reads the following sensors:
+
+1. Chipset (PCH) temperature
+2. CPU package temperature
+3. Motherboard temperature
+4. Readings from the T_Sensor header
+5. VRM temperature
+6. CPU_Opt fan RPM
+7. VRM heatsink fan RPM
+8. Chipset fan RPM
+9. Readings from the "Water flow meter" header (RPM)
+10. Readings from the "Water In" and "Water Out" temperature headers
+11. CPU current
+
+Sensor values are read from EC registers, and to avoid race with the board
+firmware the driver acquires ACPI mutex, the one used by the WMI when its
+methods access the EC.
+
+Module Parameters
+-----------------
+ * mutex_path: string
+		The driver holds path to the ACPI mutex for each board (actually,
+		the path is mostly identical for them). If ASUS changes this path
+		in a future BIOS update, this parameter can be used to override
+		the stored in the driver value until it gets updated.
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index df20022c741f1..b69fdaf1af82e 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -43,6 +43,7 @@ Hardware Monitoring Kernel Drivers
    asb100
    asc7621
    aspeed-pwm-tacho
+   asus_ec_sensors
    asus_wmi_ec_sensors
    asus_wmi_sensors
    bcm54140
-- 
GitLab


From 4408d3600f5af8abc3258900526ae8c16b106e81 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Mon, 24 Jan 2022 02:56:45 +0100
Subject: [PATCH 0741/1586] hwmon: deprecate asis_wmi_ec_sensors driver

Deprecate the asus_wmi_ec_sensors driver in favor of the asus_ec_sensors

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Reviewed-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Link: https://lore.kernel.org/r/20220124015658.687309-4-eugene.shalygin@gmail.com
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Denis Pauk <pauk.denis@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index b75ec220da5ad..5f1506cca68f9 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -2253,13 +2253,16 @@ config SENSORS_ASUS_WMI
 
 config SENSORS_ASUS_WMI_EC
 	tristate "ASUS WMI B550/X570"
-	depends on ACPI_WMI
+	depends on ACPI_WMI && SENSORS_ASUS_EC=n
 	help
 	  If you say yes here you get support for the ACPI embedded controller
 	  hardware monitoring interface found in B550/X570 ASUS motherboards.
 	  This driver will provide readings of fans, voltages and temperatures
 	  through the system firmware.
 
+	  This driver is deprecated in favor of the ASUS EC Sensors driver
+	  which provides fully compatible output.
+
 	  This driver can also be built as a module. If so, the module
 	  will be called asus_wmi_sensors_ec.
 
-- 
GitLab


From da74944d3a469ffc0e8229520afbf41ad01219b6 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Wed, 26 Jan 2022 16:18:25 +0200
Subject: [PATCH 0742/1586] hwmon: (mlxreg-fan) Use pwm attribute for setting
 fan speed low limit

Recently 'cur_state' user space 'sysfs' interface 'sysfs' has been
deprecated. This interface is used in Nvidia systems for setting fan
speed limit. Currently fan speed limit is set from the user space by
setting 'sysfs' 'cur_state' attribute to 'max_state + n', where 'n' is
required limit, for example: 15 for 50% speed limit, 20 for full fan
speed enforcement.
The purpose of this feature is to provides ability to limit fan speed
according to some system wise considerations, like absence of some
replaceable units (PSU or line cards), high system ambient temperature,
unreliable transceivers temperature sensing or some other factors which
indirectly impacts system's airflow.

The motivation is to support fan low limit feature through 'hwmon'
interface.

Use 'hwmon' 'pwm' attribute for setting low limit for fan speed in
case 'thermal' subsystem is configured in kernel. In this case setting
fan speed through 'hwmon' will never let the 'thermal' subsystem to
select a lower duty cycle than the duty cycle selected with the 'pwm'
attribute.
From other side, fan speed is to be updated in hardware through 'pwm'
only in case the requested fan speed is above last speed set by
'thermal' subsystem, otherwise requested fan speed will be just stored
with no PWM update.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20220126141825.13545-1-vadimp@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/mlxreg-fan.c | 84 ++++++++++++--------------------------
 1 file changed, 27 insertions(+), 57 deletions(-)

diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c
index 4a8becdb0d582..b48bd7c961d66 100644
--- a/drivers/hwmon/mlxreg-fan.c
+++ b/drivers/hwmon/mlxreg-fan.c
@@ -18,15 +18,6 @@
 #define MLXREG_FAN_MAX_STATE		10
 #define MLXREG_FAN_MIN_DUTY		51	/* 20% */
 #define MLXREG_FAN_MAX_DUTY		255	/* 100% */
-/*
- * Minimum and maximum FAN allowed speed in percent: from 20% to 100%. Values
- * MLXREG_FAN_MAX_STATE + x, where x is between 2 and 10 are used for
- * setting FAN speed dynamic minimum. For example, if value is set to 14 (40%)
- * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
- * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
- */
-#define MLXREG_FAN_SPEED_MIN			(MLXREG_FAN_MAX_STATE + 2)
-#define MLXREG_FAN_SPEED_MAX			(MLXREG_FAN_MAX_STATE * 2)
 #define MLXREG_FAN_SPEED_MIN_LEVEL		2	/* 20 percent */
 #define MLXREG_FAN_TACHO_SAMPLES_PER_PULSE_DEF	44
 #define MLXREG_FAN_TACHO_DIV_MIN		283
@@ -87,13 +78,16 @@ struct mlxreg_fan_tacho {
  * @connected: indicates if PWM is connected;
  * @reg: register offset;
  * @cooling: cooling device levels;
+ * @last_hwmon_state: last cooling state set by hwmon subsystem;
+ * @last_thermal_state: last cooling state set by thermal subsystem;
  * @cdev: cooling device;
  */
 struct mlxreg_fan_pwm {
 	struct mlxreg_fan *fan;
 	bool connected;
 	u32 reg;
-	u8 cooling_levels[MLXREG_FAN_MAX_STATE + 1];
+	unsigned long last_hwmon_state;
+	unsigned long last_thermal_state;
 	struct thermal_cooling_device *cdev;
 };
 
@@ -119,6 +113,9 @@ struct mlxreg_fan {
 	int divider;
 };
 
+static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev,
+				    unsigned long state);
+
 static int
 mlxreg_fan_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 		int channel, long *val)
@@ -213,6 +210,18 @@ mlxreg_fan_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 			    val > MLXREG_FAN_MAX_DUTY)
 				return -EINVAL;
 			pwm = &fan->pwm[channel];
+			/* If thermal is configured - handle PWM limit setting. */
+			if (IS_REACHABLE(CONFIG_THERMAL)) {
+				pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(val);
+				/*
+				 * Update PWM only in case requested state is not less than the
+				 * last thermal state.
+				 */
+				if (pwm->last_hwmon_state >= pwm->last_thermal_state)
+					return mlxreg_fan_set_cur_state(pwm->cdev,
+									pwm->last_hwmon_state);
+				return 0;
+			}
 			return regmap_write(fan->regmap, pwm->reg, val);
 		default:
 			return -EOPNOTSUPP;
@@ -338,58 +347,22 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev,
 {
 	struct mlxreg_fan_pwm *pwm = cdev->devdata;
 	struct mlxreg_fan *fan = pwm->fan;
-	unsigned long cur_state;
-	int i, config = 0;
-	u32 regval;
 	int err;
 
-	/*
-	 * Verify if this request is for changing allowed FAN dynamical
-	 * minimum. If it is - update cooling levels accordingly and update
-	 * state, if current state is below the newly requested minimum state.
-	 * For example, if current state is 5, and minimal state is to be
-	 * changed from 4 to 6, fan->cooling_levels[0 to 5] will be changed all
-	 * from 4 to 6. And state 5 (fan->cooling_levels[4]) should be
-	 * overwritten.
-	 */
-	if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) {
-		/*
-		 * This is configuration change, which is only supported through sysfs.
-		 * For configuration non-zero value is to be returned to avoid thermal
-		 * statistics update.
-		 */
-		config = 1;
-		state -= MLXREG_FAN_MAX_STATE;
-		for (i = 0; i < state; i++)
-			pwm->cooling_levels[i] = state;
-		for (i = state; i <= MLXREG_FAN_MAX_STATE; i++)
-			pwm->cooling_levels[i] = i;
-
-		err = regmap_read(fan->regmap, pwm->reg, &regval);
-		if (err) {
-			dev_err(fan->dev, "Failed to query PWM duty\n");
-			return err;
-		}
-
-		cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval);
-		if (state < cur_state)
-			return config;
-
-		state = cur_state;
-	}
-
 	if (state > MLXREG_FAN_MAX_STATE)
 		return -EINVAL;
 
-	/* Normalize the state to the valid speed range. */
-	state = pwm->cooling_levels[state];
+	/* Save thermal state. */
+	pwm->last_thermal_state = state;
+
+	state = max_t(unsigned long, state, pwm->last_hwmon_state);
 	err = regmap_write(fan->regmap, pwm->reg,
 			   MLXREG_FAN_PWM_STATE2DUTY(state));
 	if (err) {
 		dev_err(fan->dev, "Failed to write PWM duty\n");
 		return err;
 	}
-	return config;
+	return 0;
 }
 
 static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = {
@@ -564,7 +537,7 @@ static int mlxreg_fan_config(struct mlxreg_fan *fan,
 
 static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan)
 {
-	int i, j;
+	int i;
 
 	for (i = 0; i < MLXREG_FAN_MAX_PWM; i++) {
 		struct mlxreg_fan_pwm *pwm = &fan->pwm[i];
@@ -579,11 +552,8 @@ static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan)
 			return PTR_ERR(pwm->cdev);
 		}
 
-		/* Init cooling levels per PWM state. */
-		for (j = 0; j < MLXREG_FAN_SPEED_MIN_LEVEL; j++)
-			pwm->cooling_levels[j] = MLXREG_FAN_SPEED_MIN_LEVEL;
-		for (j = MLXREG_FAN_SPEED_MIN_LEVEL; j <= MLXREG_FAN_MAX_STATE; j++)
-			pwm->cooling_levels[j] = j;
+		/* Set minimal PWM speed. */
+		pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(MLXREG_FAN_MIN_DUTY);
 	}
 
 	return 0;
-- 
GitLab


From 06f7d6e75d1232fefc2c3c8567159963c38ec628 Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Thu, 27 Jan 2022 17:17:27 +0100
Subject: [PATCH 0743/1586] hwmon: (max6639) Update Datasheet URL

The old Datasheet does not exist anymore.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/76025f40d2684dc0d3ec02c8899b726b07a0e7da.1643299570.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/max6639.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/hwmon/max6639.rst b/Documentation/hwmon/max6639.rst
index 3da54225f83cd..c85d285a3489d 100644
--- a/Documentation/hwmon/max6639.rst
+++ b/Documentation/hwmon/max6639.rst
@@ -9,7 +9,7 @@ Supported chips:
 
     Addresses scanned: I2C 0x2c, 0x2e, 0x2f
 
-    Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6639.pdf
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX6639-MAX6639F.pdf
 
 Authors:
     - He Changqing <hechangqing@semptian.com>
-- 
GitLab


From 4e2271ea275193a24ea1c905a38fa000b0f8b822 Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Thu, 27 Jan 2022 17:17:28 +0100
Subject: [PATCH 0744/1586] hwmon: (max6639) Add regulator support

Add regulator support for boards where the fan-supply have to be
powered up before it can be used.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/2cb9ed600fb43cdc604799746fbde2e2942cdca6.1643299570.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max6639.c | 62 +++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c
index ccc0f047bd446..14bb7726f8d7e 100644
--- a/drivers/hwmon/max6639.c
+++ b/drivers/hwmon/max6639.c
@@ -87,6 +87,9 @@ struct max6639_data {
 	/* Register values initialized only once */
 	u8 ppr;			/* Pulses per rotation 0..3 for 1..4 ppr */
 	u8 rpm_range;		/* Index in above rpm_ranges table */
+
+	/* Optional regulator for FAN supply */
+	struct regulator *reg;
 };
 
 static struct max6639_data *max6639_update_device(struct device *dev)
@@ -516,6 +519,11 @@ static int max6639_detect(struct i2c_client *client,
 	return 0;
 }
 
+static void max6639_regulator_disable(void *data)
+{
+	regulator_disable(data);
+}
+
 static int max6639_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
@@ -528,6 +536,28 @@ static int max6639_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	data->client = client;
+
+	data->reg = devm_regulator_get_optional(dev, "fan");
+	if (IS_ERR(data->reg)) {
+		if (PTR_ERR(data->reg) != -ENODEV)
+			return PTR_ERR(data->reg);
+
+		data->reg = NULL;
+	} else {
+		/* Spin up fans */
+		err = regulator_enable(data->reg);
+		if (err) {
+			dev_err(dev, "Failed to enable fan supply: %d\n", err);
+			return err;
+		}
+		err = devm_add_action_or_reset(dev, max6639_regulator_disable,
+					       data->reg);
+		if (err) {
+			dev_err(dev, "Failed to register action: %d\n", err);
+			return err;
+		}
+	}
+
 	mutex_init(&data->update_lock);
 
 	/* Initialize the max6639 chip */
@@ -545,23 +575,39 @@ static int max6639_probe(struct i2c_client *client)
 static int max6639_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
-	if (data < 0)
-		return data;
+	struct max6639_data *data = dev_get_drvdata(dev);
+	int ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
+
+	if (ret < 0)
+		return ret;
+
+	if (data->reg)
+		regulator_disable(data->reg);
 
 	return i2c_smbus_write_byte_data(client,
-			MAX6639_REG_GCONFIG, data | MAX6639_GCONFIG_STANDBY);
+			MAX6639_REG_GCONFIG, ret | MAX6639_GCONFIG_STANDBY);
 }
 
 static int max6639_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	int data = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
-	if (data < 0)
-		return data;
+	struct max6639_data *data = dev_get_drvdata(dev);
+	int ret;
+
+	if (data->reg) {
+		ret = regulator_enable(data->reg);
+		if (ret) {
+			dev_err(dev, "Failed to enable fan supply: %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = i2c_smbus_read_byte_data(client, MAX6639_REG_GCONFIG);
+	if (ret < 0)
+		return ret;
 
 	return i2c_smbus_write_byte_data(client,
-			MAX6639_REG_GCONFIG, data & ~MAX6639_GCONFIG_STANDBY);
+			MAX6639_REG_GCONFIG, ret & ~MAX6639_GCONFIG_STANDBY);
 }
 #endif /* CONFIG_PM_SLEEP */
 
-- 
GitLab


From 2f66cb5bf35265a8c9413a019166a0c29823ce57 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Thu, 3 Feb 2022 21:30:52 +0100
Subject: [PATCH 0745/1586] hwmon: (asus-ec-sensors) Add Crosshair VIII Hero
 WiFi

The Wi-Fi variant of Crosshair VIII Hero provides the same sensors,
which was tested by a Libre Hardware Monitor user [1].

[1] https://github.com/LibreHardwareMonitor/LibreHardwareMonitor/pull/453#issuecomment-1028398487

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220203203052.441712-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/asus_ec_sensors.rst | 1 +
 drivers/hwmon/asus-ec-sensors.c         | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
index b12ac7ebeb1aa..22de1b037cfba 100644
--- a/Documentation/hwmon/asus_ec_sensors.rst
+++ b/Documentation/hwmon/asus_ec_sensors.rst
@@ -7,6 +7,7 @@ Supported boards:
  * PRIME X570-PRO,
  * Pro WS X570-ACE,
  * ROG CROSSHAIR VIII DARK HERO,
+ * ROG CROSSHAIR VIII HERO (WI-FI)
  * ROG CROSSHAIR VIII FORMULA,
  * ROG CROSSHAIR VIII HERO,
  * ROG CROSSHAIR VIII IMPACT,
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 7285334c7d801..05244209c0c6e 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -188,6 +188,12 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
 		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
 		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
 		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
+			      "ROG CROSSHAIR VIII HERO (WI-FI)",
+		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
+		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
 			      "ROG CROSSHAIR VIII IMPACT",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-- 
GitLab


From 88846ff7422089f576e29b68153034babbc68ce6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 5 Feb 2022 12:20:15 +0300
Subject: [PATCH 0746/1586] hwmon: (asus-ec-sensors) fix a typo in
 asus_ec_probe()

There is no such struct as "asus_ec_sensors", it was supposed to be
"ec_sensors_data".  This typo does not affect either build or runtime.

Fixes: c4b1687d6897 ("hwmon: (asus-ec-sensors) add driver for ASUS EC")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20220205092015.GA612@kili
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 05244209c0c6e..7330cce46b14a 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -659,7 +659,7 @@ static int __init configure_sensor_setup(struct device *dev)
 
 static int __init asus_ec_probe(struct platform_device *pdev)
 {
-	struct asus_ec_sensors *state;
+	struct ec_sensors_data *state;
 	int status = 0;
 
 	state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data),
-- 
GitLab


From 339f8a998f58e8693f73429e7597f2ffc9b93ec5 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Fri, 4 Feb 2022 17:30:45 +0100
Subject: [PATCH 0747/1586] hwmon: (asus-ec-sensors) read sensors as signed
 ints

Temperature sensor readings are signed, which is hinted by their blank
value (oxd8, 216 as unsigned and -40 as signed). T_Sensor, Crosshair
VIII Hero, and a freezer were used to confirm that.

Here we read fan sensors as signed too, because with their typical
values and 2-byte width, I can't tell a difference between signed and
unsigned, as I don't have a high speed chipset fan.

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220204163045.576903-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 7330cce46b14a..dd7b207d062fd 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -221,7 +221,7 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
 
 struct ec_sensor {
 	unsigned int info_index;
-	u32 cached_value;
+	s32 cached_value;
 };
 
 struct ec_sensors_data {
@@ -408,15 +408,15 @@ static int asus_ec_block_read(const struct device *dev,
 	return status;
 }
 
-static inline u32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
+static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
 {
 	switch (si->addr.components.size) {
 	case 1:
-		return *data;
+		return (s8)*data;
 	case 2:
-		return get_unaligned_be16(data);
+		return (s16)get_unaligned_be16(data);
 	case 4:
-		return get_unaligned_be32(data);
+		return (s32)get_unaligned_be32(data);
 	default:
 		return 0;
 	}
@@ -462,7 +462,7 @@ cleanup:
 	return status;
 }
 
-static int scale_sensor_value(u32 value, int data_type)
+static long scale_sensor_value(s32 value, int data_type)
 {
 	switch (data_type) {
 	case hwmon_curr:
@@ -476,7 +476,7 @@ static int scale_sensor_value(u32 value, int data_type)
 
 static int get_cached_value_or_update(const struct device *dev,
 				      int sensor_index,
-				      struct ec_sensors_data *state, u32 *value)
+				      struct ec_sensors_data *state, s32 *value)
 {
 	if (time_after(jiffies, state->last_updated + HZ)) {
 		if (update_ec_sensors(dev, state)) {
@@ -499,7 +499,7 @@ static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 			      u32 attr, int channel, long *val)
 {
 	int ret;
-	u32 value = 0;
+	s32 value = 0;
 
 	struct ec_sensors_data *state = dev_get_drvdata(dev);
 	int sidx = find_ec_sensor_index(state, type, channel);
-- 
GitLab


From 7979a30ddc560b372c0b9549f0587f56365cf6bf Mon Sep 17 00:00:00 2001
From: Denis Pauk <pauk.denis@gmail.com>
Date: Mon, 7 Feb 2022 23:48:15 +0200
Subject: [PATCH 0748/1586] hwmon: (nct6775) add ASUS ROG STRIX
 Z390/Z490/X570-* / PRIME X570-P

Boards such as
* PRIME X570-P,
* ROG STRIX B550-F GAMING WIFI II,
* ROG STRIX B550-XE GAMING (WI-FI),
* ROG STRIX X570-E GAMING,
* ROG STRIX Z390-F GAMING,
* ROG STRIX Z390-H GAMING,
* ROG STRIX Z390-I GAMING,
* ROG STRIX Z490-A GAMING,
* ROG STRIX Z490-E GAMING,
* ROG STRIX Z490-F GAMING,
* ROG STRIX Z490-G GAMING,
* ROG STRIX Z490-G GAMING (WI-FI),
* ROG STRIX Z490-H GAMING
have got a nct6775 chip, but by default there's no use of it
because of resource conflict with WMI method.

This commit adds such boards to the WMI monitoring list.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204807
Signed-off-by: Denis Pauk <pauk.denis@gmail.com>
Tested-by: Per Melin <kernel@melin.net>
Tested-by: Jaap de Haan <jaap.dehaan@freenet.de>
Link: https://lore.kernel.org/r/20220207214815.10995-1-pauk.denis@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index fbf6266c7ba7b..2b91f7e05126e 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -5109,6 +5109,7 @@ static const char * const asus_wmi_boards[] = {
 	"PRIME B550-PLUS",
 	"PRIME B550M-A",
 	"PRIME B550M-A (WI-FI)",
+	"PRIME X570-P",
 	"PRIME X570-PRO",
 	"ROG CROSSHAIR VIII DARK HERO",
 	"ROG CROSSHAIR VIII FORMULA",
@@ -5118,10 +5119,22 @@ static const char * const asus_wmi_boards[] = {
 	"ROG STRIX B550-E GAMING",
 	"ROG STRIX B550-F GAMING",
 	"ROG STRIX B550-F GAMING (WI-FI)",
+	"ROG STRIX B550-F GAMING WIFI II",
 	"ROG STRIX B550-I GAMING",
+	"ROG STRIX B550-XE GAMING (WI-FI)",
+	"ROG STRIX X570-E GAMING",
 	"ROG STRIX X570-F GAMING",
 	"ROG STRIX X570-I GAMING",
 	"ROG STRIX Z390-E GAMING",
+	"ROG STRIX Z390-F GAMING",
+	"ROG STRIX Z390-H GAMING",
+	"ROG STRIX Z390-I GAMING",
+	"ROG STRIX Z490-A GAMING",
+	"ROG STRIX Z490-E GAMING",
+	"ROG STRIX Z490-F GAMING",
+	"ROG STRIX Z490-G GAMING",
+	"ROG STRIX Z490-G GAMING (WI-FI)",
+	"ROG STRIX Z490-H GAMING",
 	"ROG STRIX Z490-I GAMING",
 	"TUF GAMING B550M-PLUS",
 	"TUF GAMING B550M-PLUS (WI-FI)",
-- 
GitLab


From f53207017fd99aaeecef6e61b4a3d8e868f53756 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 23 Dec 2021 10:32:05 -0800
Subject: [PATCH 0749/1586] hwmon: (adt7x10) Convert to use regmap

Using regmap lets us use the regmap subsystem for SPI vs. I2C register
accesses. It lets us hide access differences in backend code and lets
the common code just access registers without knowing their size.
We can also use regmap for register caching.

Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig   |   1 +
 drivers/hwmon/adt7310.c |  88 +++++++++++++---
 drivers/hwmon/adt7410.c |  77 ++++++++++----
 drivers/hwmon/adt7x10.c | 225 +++++++++++++++-------------------------
 drivers/hwmon/adt7x10.h |  10 +-
 5 files changed, 213 insertions(+), 188 deletions(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 5f1506cca68f9..387253ae6c149 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -174,6 +174,7 @@ config SENSORS_ADM9240
 
 config SENSORS_ADT7X10
 	tristate
+	select REGMAP
 	help
 	  This module contains common code shared by the ADT7310/ADT7320 and
 	  ADT7410/ADT7420 temperature monitoring chip drivers.
diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c
index c40cac16af683..a83092470bce4 100644
--- a/drivers/hwmon/adt7310.c
+++ b/drivers/hwmon/adt7310.c
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/regmap.h>
 #include <linux/spi/spi.h>
 #include <asm/unaligned.h>
 
@@ -38,16 +39,13 @@ static const u8 adt7310_reg_table[] = {
 
 #define AD7310_COMMAND(reg) (adt7310_reg_table[(reg)] << ADT7310_CMD_REG_OFFSET)
 
-static int adt7310_spi_read_word(struct device *dev, u8 reg)
+static int adt7310_spi_read_word(struct spi_device *spi, u8 reg)
 {
-	struct spi_device *spi = to_spi_device(dev);
-
 	return spi_w8r16be(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ);
 }
 
-static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data)
+static int adt7310_spi_write_word(struct spi_device *spi, u8 reg, u16 data)
 {
-	struct spi_device *spi = to_spi_device(dev);
 	u8 buf[3];
 
 	buf[0] = AD7310_COMMAND(reg);
@@ -56,17 +54,13 @@ static int adt7310_spi_write_word(struct device *dev, u8 reg, u16 data)
 	return spi_write(spi, buf, sizeof(buf));
 }
 
-static int adt7310_spi_read_byte(struct device *dev, u8 reg)
+static int adt7310_spi_read_byte(struct spi_device *spi, u8 reg)
 {
-	struct spi_device *spi = to_spi_device(dev);
-
 	return spi_w8r8(spi, AD7310_COMMAND(reg) | ADT7310_CMD_READ);
 }
 
-static int adt7310_spi_write_byte(struct device *dev, u8 reg,
-	u8 data)
+static int adt7310_spi_write_byte(struct spi_device *spi, u8 reg, u8 data)
 {
-	struct spi_device *spi = to_spi_device(dev);
 	u8 buf[2];
 
 	buf[0] = AD7310_COMMAND(reg);
@@ -75,17 +69,77 @@ static int adt7310_spi_write_byte(struct device *dev, u8 reg,
 	return spi_write(spi, buf, sizeof(buf));
 }
 
-static const struct adt7x10_ops adt7310_spi_ops = {
-	.read_word = adt7310_spi_read_word,
-	.write_word = adt7310_spi_write_word,
-	.read_byte = adt7310_spi_read_byte,
-	.write_byte = adt7310_spi_write_byte,
+static bool adt7310_regmap_is_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_STATUS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int adt7310_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct spi_device *spi = context;
+	int regval;
+
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_T_ALARM_HIGH:
+	case ADT7X10_T_ALARM_LOW:
+	case ADT7X10_T_CRIT:
+		regval = adt7310_spi_read_word(spi, reg);
+		break;
+	default:
+		regval = adt7310_spi_read_byte(spi, reg);
+		break;
+	}
+	if (regval < 0)
+		return regval;
+	*val = regval;
+	return 0;
+}
+
+static int adt7310_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct spi_device *spi = context;
+	int ret;
+
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_T_ALARM_HIGH:
+	case ADT7X10_T_ALARM_LOW:
+	case ADT7X10_T_CRIT:
+		ret = adt7310_spi_write_word(spi, reg, val);
+		break;
+	default:
+		ret = adt7310_spi_write_byte(spi, reg, val);
+		break;
+	}
+	return ret;
+}
+
+static const struct regmap_config adt7310_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = adt7310_regmap_is_volatile,
+	.reg_read = adt7310_reg_read,
+	.reg_write = adt7310_reg_write,
 };
 
 static int adt7310_spi_probe(struct spi_device *spi)
 {
+	struct regmap *regmap;
+
+	regmap = devm_regmap_init(&spi->dev, NULL, spi, &adt7310_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
 	return adt7x10_probe(&spi->dev, spi_get_device_id(spi)->name, spi->irq,
-			&adt7310_spi_ops);
+			     regmap);
 }
 
 static int adt7310_spi_remove(struct spi_device *spi)
diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c
index 973db057427be..b1f4497dca051 100644
--- a/drivers/hwmon/adt7410.c
+++ b/drivers/hwmon/adt7410.c
@@ -9,43 +9,82 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
+#include <linux/regmap.h>
 
 #include "adt7x10.h"
 
-static int adt7410_i2c_read_word(struct device *dev, u8 reg)
+static bool adt7410_regmap_is_volatile(struct device *dev, unsigned int reg)
 {
-	return i2c_smbus_read_word_swapped(to_i2c_client(dev), reg);
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_STATUS:
+		return true;
+	default:
+		return false;
+	}
 }
 
-static int adt7410_i2c_write_word(struct device *dev, u8 reg, u16 data)
+static int adt7410_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
-	return i2c_smbus_write_word_swapped(to_i2c_client(dev), reg, data);
-}
+	struct i2c_client *client = context;
+	int regval;
 
-static int adt7410_i2c_read_byte(struct device *dev, u8 reg)
-{
-	return i2c_smbus_read_byte_data(to_i2c_client(dev), reg);
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_T_ALARM_HIGH:
+	case ADT7X10_T_ALARM_LOW:
+	case ADT7X10_T_CRIT:
+		regval = i2c_smbus_read_word_swapped(client, reg);
+		break;
+	default:
+		regval = i2c_smbus_read_byte_data(client, reg);
+		break;
+	}
+	if (regval < 0)
+		return regval;
+	*val = regval;
+	return 0;
 }
 
-static int adt7410_i2c_write_byte(struct device *dev, u8 reg, u8 data)
+static int adt7410_reg_write(void *context, unsigned int reg, unsigned int val)
 {
-	return i2c_smbus_write_byte_data(to_i2c_client(dev), reg, data);
+	struct i2c_client *client = context;
+	int ret;
+
+	switch (reg) {
+	case ADT7X10_TEMPERATURE:
+	case ADT7X10_T_ALARM_HIGH:
+	case ADT7X10_T_ALARM_LOW:
+	case ADT7X10_T_CRIT:
+		ret = i2c_smbus_write_word_swapped(client, reg, val);
+		break;
+	default:
+		ret = i2c_smbus_write_byte_data(client, reg, val);
+		break;
+	}
+	return ret;
 }
 
-static const struct adt7x10_ops adt7410_i2c_ops = {
-	.read_word = adt7410_i2c_read_word,
-	.write_word = adt7410_i2c_write_word,
-	.read_byte = adt7410_i2c_read_byte,
-	.write_byte = adt7410_i2c_write_byte,
+static const struct regmap_config adt7410_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = ADT7X10_ID,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = adt7410_regmap_is_volatile,
+	.reg_read = adt7410_reg_read,
+	.reg_write = adt7410_reg_write,
 };
 
 static int adt7410_i2c_probe(struct i2c_client *client)
 {
-	if (!i2c_check_functionality(client->adapter,
-			I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA))
-		return -ENODEV;
+	struct regmap *regmap;
+
+	regmap = devm_regmap_init(&client->dev, NULL, client,
+				  &adt7410_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
 
-	return adt7x10_probe(&client->dev, NULL, client->irq, &adt7410_i2c_ops);
+	return adt7x10_probe(&client->dev, NULL, client->irq, regmap);
 }
 
 static int adt7410_i2c_remove(struct i2c_client *client)
diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index e9d33aa78a193..05dd48b707b44 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/regmap.h>
 
 #include "adt7x10.h"
 
@@ -53,46 +54,15 @@
 
 /* Each client has this additional data */
 struct adt7x10_data {
-	const struct adt7x10_ops *ops;
+	struct regmap		*regmap;
 	const char		*name;
 	struct device		*hwmon_dev;
 	struct mutex		update_lock;
 	u8			config;
 	u8			oldconfig;
-	bool			valid;		/* true if registers valid */
-	unsigned long		last_updated;	/* In jiffies */
-	s16			temp[4];	/* Register values,
-						   0 = input
-						   1 = high
-						   2 = low
-						   3 = critical */
-	u8			hyst;		/* hysteresis offset */
+	bool			valid;		/* true if temperature valid */
 };
 
-static int adt7x10_read_byte(struct device *dev, u8 reg)
-{
-	struct adt7x10_data *d = dev_get_drvdata(dev);
-	return d->ops->read_byte(dev, reg);
-}
-
-static int adt7x10_write_byte(struct device *dev, u8 reg, u8 data)
-{
-	struct adt7x10_data *d = dev_get_drvdata(dev);
-	return d->ops->write_byte(dev, reg, data);
-}
-
-static int adt7x10_read_word(struct device *dev, u8 reg)
-{
-	struct adt7x10_data *d = dev_get_drvdata(dev);
-	return d->ops->read_word(dev, reg);
-}
-
-static int adt7x10_write_word(struct device *dev, u8 reg, u16 data)
-{
-	struct adt7x10_data *d = dev_get_drvdata(dev);
-	return d->ops->write_word(dev, reg, data);
-}
-
 static const u8 ADT7X10_REG_TEMP[4] = {
 	ADT7X10_TEMPERATURE,		/* input */
 	ADT7X10_T_ALARM_HIGH,		/* high */
@@ -103,10 +73,12 @@ static const u8 ADT7X10_REG_TEMP[4] = {
 static irqreturn_t adt7x10_irq_handler(int irq, void *private)
 {
 	struct device *dev = private;
-	int status;
+	struct adt7x10_data *d = dev_get_drvdata(dev);
+	unsigned int status;
+	int ret;
 
-	status = adt7x10_read_byte(dev, ADT7X10_STATUS);
-	if (status < 0)
+	ret = regmap_read(d->regmap, ADT7X10_STATUS, &status);
+	if (ret < 0)
 		return IRQ_HANDLED;
 
 	if (status & ADT7X10_STAT_T_HIGH)
@@ -119,14 +91,15 @@ static irqreturn_t adt7x10_irq_handler(int irq, void *private)
 	return IRQ_HANDLED;
 }
 
-static int adt7x10_temp_ready(struct device *dev)
+static int adt7x10_temp_ready(struct regmap *regmap)
 {
-	int i, status;
+	unsigned int status;
+	int i, ret;
 
 	for (i = 0; i < 6; i++) {
-		status = adt7x10_read_byte(dev, ADT7X10_STATUS);
-		if (status < 0)
-			return status;
+		ret = regmap_read(regmap, ADT7X10_STATUS, &status);
+		if (ret < 0)
+			return ret;
 		if (!(status & ADT7X10_STAT_NOT_RDY))
 			return 0;
 		msleep(60);
@@ -134,71 +107,10 @@ static int adt7x10_temp_ready(struct device *dev)
 	return -ETIMEDOUT;
 }
 
-static int adt7x10_update_temp(struct device *dev)
-{
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-	int ret = 0;
-
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
-	    || !data->valid) {
-		int temp;
-
-		dev_dbg(dev, "Starting update\n");
-
-		ret = adt7x10_temp_ready(dev); /* check for new value */
-		if (ret)
-			goto abort;
-
-		temp = adt7x10_read_word(dev, ADT7X10_REG_TEMP[0]);
-		if (temp < 0) {
-			ret = temp;
-			dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
-				ADT7X10_REG_TEMP[0], ret);
-			goto abort;
-		}
-		data->temp[0] = temp;
-		data->last_updated = jiffies;
-		data->valid = true;
-	}
-
-abort:
-	mutex_unlock(&data->update_lock);
-	return ret;
-}
-
-static int adt7x10_fill_cache(struct device *dev)
-{
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-	int ret;
-	int i;
-
-	for (i = 1; i < ARRAY_SIZE(data->temp); i++) {
-		ret = adt7x10_read_word(dev, ADT7X10_REG_TEMP[i]);
-		if (ret < 0) {
-			dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
-				ADT7X10_REG_TEMP[i], ret);
-			return ret;
-		}
-		data->temp[i] = ret;
-	}
-
-	ret = adt7x10_read_byte(dev, ADT7X10_T_HYST);
-	if (ret < 0) {
-		dev_dbg(dev, "Failed to read value: reg %d, error %d\n",
-				ADT7X10_T_HYST, ret);
-		return ret;
-	}
-	data->hyst = ret;
-
-	return 0;
-}
-
 static s16 ADT7X10_TEMP_TO_REG(long temp)
 {
 	return DIV_ROUND_CLOSEST(clamp_val(temp, ADT7X10_TEMP_MIN,
-					       ADT7X10_TEMP_MAX) * 128, 1000);
+					   ADT7X10_TEMP_MAX) * 128, 1000);
 }
 
 static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg)
@@ -222,18 +134,26 @@ static ssize_t adt7x10_temp_show(struct device *dev,
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct adt7x10_data *data = dev_get_drvdata(dev);
+	unsigned int val;
+	int ret;
 
-
-	if (attr->index == 0) {
-		int ret;
-
-		ret = adt7x10_update_temp(dev);
-		if (ret)
+	mutex_lock(&data->update_lock);
+	if (attr->index == 0 && !data->valid) {
+		/* wait for valid temperature */
+		ret = adt7x10_temp_ready(data->regmap);
+		if (ret) {
+			mutex_unlock(&data->update_lock);
 			return ret;
+		}
+		data->valid = true;
 	}
+	mutex_unlock(&data->update_lock);
 
-	return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data,
-		       data->temp[attr->index]));
+	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[attr->index], &val);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, val));
 }
 
 static ssize_t adt7x10_temp_store(struct device *dev,
@@ -251,12 +171,10 @@ static ssize_t adt7x10_temp_store(struct device *dev,
 		return ret;
 
 	mutex_lock(&data->update_lock);
-	data->temp[nr] = ADT7X10_TEMP_TO_REG(temp);
-	ret = adt7x10_write_word(dev, ADT7X10_REG_TEMP[nr], data->temp[nr]);
-	if (ret)
-		count = ret;
+	ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[nr],
+			   ADT7X10_TEMP_TO_REG(temp));
 	mutex_unlock(&data->update_lock);
-	return count;
+	return ret ? : count;
 }
 
 static ssize_t adt7x10_t_hyst_show(struct device *dev,
@@ -265,9 +183,21 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev,
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct adt7x10_data *data = dev_get_drvdata(dev);
 	int nr = attr->index;
-	int hyst;
+	int hyst, temp, ret;
+
+	mutex_lock(&data->update_lock);
+	ret = regmap_read(data->regmap, ADT7X10_T_HYST, &hyst);
+	if (ret) {
+		mutex_unlock(&data->update_lock);
+		return ret;
+	}
+
+	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[nr], &temp);
+	mutex_unlock(&data->update_lock);
+	if (ret)
+		return ret;
 
-	hyst = (data->hyst & ADT7X10_T_HYST_MASK) * 1000;
+	hyst = (hyst & ADT7X10_T_HYST_MASK) * 1000;
 
 	/*
 	 * hysteresis is stored as a 4 bit offset in the device, convert it
@@ -275,8 +205,8 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev,
 	 */
 	if (nr == 2)	/* min has positive offset, others have negative */
 		hyst = -hyst;
-	return sprintf(buf, "%d\n",
-		       ADT7X10_REG_TO_TEMP(data, data->temp[nr]) - hyst);
+
+	return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, temp) - hyst);
 }
 
 static ssize_t adt7x10_t_hyst_store(struct device *dev,
@@ -284,35 +214,45 @@ static ssize_t adt7x10_t_hyst_store(struct device *dev,
 				    const char *buf, size_t count)
 {
 	struct adt7x10_data *data = dev_get_drvdata(dev);
+	unsigned int regval;
 	int limit, ret;
 	long hyst;
 
 	ret = kstrtol(buf, 10, &hyst);
 	if (ret)
 		return ret;
+
+	mutex_lock(&data->update_lock);
+
 	/* convert absolute hysteresis value to a 4 bit delta value */
-	limit = ADT7X10_REG_TO_TEMP(data, data->temp[1]);
-	hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX);
-	data->hyst = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000),
-				   0, ADT7X10_T_HYST_MASK);
-	ret = adt7x10_write_byte(dev, ADT7X10_T_HYST, data->hyst);
-	if (ret)
-		return ret;
+	ret = regmap_read(data->regmap, ADT7X10_T_ALARM_HIGH, &regval);
+	if (ret < 0)
+		goto abort;
 
-	return count;
+	limit = ADT7X10_REG_TO_TEMP(data, regval);
+
+	hyst = clamp_val(hyst, ADT7X10_TEMP_MIN, ADT7X10_TEMP_MAX);
+	regval = clamp_val(DIV_ROUND_CLOSEST(limit - hyst, 1000), 0,
+			   ADT7X10_T_HYST_MASK);
+	ret = regmap_write(data->regmap, ADT7X10_T_HYST, regval);
+abort:
+	mutex_unlock(&data->update_lock);
+	return ret ? : count;
 }
 
 static ssize_t adt7x10_alarm_show(struct device *dev,
 				  struct device_attribute *da, char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct adt7x10_data *data = dev_get_drvdata(dev);
+	unsigned int status;
 	int ret;
 
-	ret = adt7x10_read_byte(dev, ADT7X10_STATUS);
+	ret = regmap_read(data->regmap, ADT7X10_STATUS, &status);
 	if (ret < 0)
 		return ret;
 
-	return sprintf(buf, "%d\n", !!(ret & attr->index));
+	return sprintf(buf, "%d\n", !!(status & attr->index));
 }
 
 static ssize_t name_show(struct device *dev, struct device_attribute *da,
@@ -357,28 +297,29 @@ static const struct attribute_group adt7x10_group = {
 };
 
 int adt7x10_probe(struct device *dev, const char *name, int irq,
-		  const struct adt7x10_ops *ops)
+		  struct regmap *regmap)
 {
 	struct adt7x10_data *data;
+	unsigned int config;
 	int ret;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->ops = ops;
+	data->regmap = regmap;
 	data->name = name;
 
 	dev_set_drvdata(dev, data);
 	mutex_init(&data->update_lock);
 
 	/* configure as specified */
-	ret = adt7x10_read_byte(dev, ADT7X10_CONFIG);
+	ret = regmap_read(regmap, ADT7X10_CONFIG, &config);
 	if (ret < 0) {
 		dev_dbg(dev, "Can't read config? %d\n", ret);
 		return ret;
 	}
-	data->oldconfig = ret;
+	data->oldconfig = config;
 
 	/*
 	 * Set to 16 bit resolution, continous conversion and comparator mode.
@@ -389,16 +330,12 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 	data->config |= ADT7X10_FULL | ADT7X10_RESOLUTION | ADT7X10_EVENT_MODE;
 
 	if (data->config != data->oldconfig) {
-		ret = adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config);
+		ret = regmap_write(regmap, ADT7X10_CONFIG, data->config);
 		if (ret)
 			return ret;
 	}
 	dev_dbg(dev, "Config %02x\n", data->config);
 
-	ret = adt7x10_fill_cache(dev);
-	if (ret)
-		goto exit_restore;
-
 	/* Register sysfs hooks */
 	ret = sysfs_create_group(&dev->kobj, &adt7x10_group);
 	if (ret)
@@ -439,7 +376,7 @@ exit_remove_name:
 exit_remove:
 	sysfs_remove_group(&dev->kobj, &adt7x10_group);
 exit_restore:
-	adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig);
+	regmap_write(regmap, ADT7X10_CONFIG, data->oldconfig);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(adt7x10_probe);
@@ -456,7 +393,7 @@ void adt7x10_remove(struct device *dev, int irq)
 		device_remove_file(dev, &dev_attr_name);
 	sysfs_remove_group(&dev->kobj, &adt7x10_group);
 	if (data->oldconfig != data->config)
-		adt7x10_write_byte(dev, ADT7X10_CONFIG, data->oldconfig);
+		regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig);
 }
 EXPORT_SYMBOL_GPL(adt7x10_remove);
 
@@ -466,15 +403,15 @@ static int adt7x10_suspend(struct device *dev)
 {
 	struct adt7x10_data *data = dev_get_drvdata(dev);
 
-	return adt7x10_write_byte(dev, ADT7X10_CONFIG,
-		data->config | ADT7X10_PD);
+	return regmap_write(data->regmap, ADT7X10_CONFIG,
+			    data->config | ADT7X10_PD);
 }
 
 static int adt7x10_resume(struct device *dev)
 {
 	struct adt7x10_data *data = dev_get_drvdata(dev);
 
-	return adt7x10_write_byte(dev, ADT7X10_CONFIG, data->config);
+	return regmap_write(data->regmap, ADT7X10_CONFIG, data->config);
 }
 
 SIMPLE_DEV_PM_OPS(adt7x10_dev_pm_ops, adt7x10_suspend, adt7x10_resume);
diff --git a/drivers/hwmon/adt7x10.h b/drivers/hwmon/adt7x10.h
index a1ae682eb32e6..55ff08bfe9462 100644
--- a/drivers/hwmon/adt7x10.h
+++ b/drivers/hwmon/adt7x10.h
@@ -17,15 +17,9 @@
 
 struct device;
 
-struct adt7x10_ops {
-	int (*read_byte)(struct device *, u8 reg);
-	int (*write_byte)(struct device *, u8 reg, u8 data);
-	int (*read_word)(struct device *, u8 reg);
-	int (*write_word)(struct device *, u8 reg, u16 data);
-};
-
 int adt7x10_probe(struct device *dev, const char *name, int irq,
-	const struct adt7x10_ops *ops);
+		  struct regmap *regmap);
+
 void adt7x10_remove(struct device *dev, int irq);
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From af910e92886c35461ccc5ea234e17f448a46b876 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Tue, 21 Dec 2021 23:58:36 +0200
Subject: [PATCH 0750/1586] hwmon: (adt7x10) Add device managed action for
 restoring config

To simplify the core driver remove function.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20211221215841.2641417-3-demonsingur@gmail.com
[groeck: Adjust to use regmap; only register restore function if needed]
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7x10.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index 05dd48b707b44..47ce1a88a03fa 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -296,6 +296,13 @@ static const struct attribute_group adt7x10_group = {
 	.attrs = adt7x10_attributes,
 };
 
+static void adt7x10_restore_config(void *private)
+{
+	struct adt7x10_data *data = private;
+
+	regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig);
+}
+
 int adt7x10_probe(struct device *dev, const char *name, int irq,
 		  struct regmap *regmap)
 {
@@ -333,13 +340,16 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 		ret = regmap_write(regmap, ADT7X10_CONFIG, data->config);
 		if (ret)
 			return ret;
+		ret = devm_add_action_or_reset(dev, adt7x10_restore_config, data);
+		if (ret)
+			return ret;
 	}
 	dev_dbg(dev, "Config %02x\n", data->config);
 
 	/* Register sysfs hooks */
 	ret = sysfs_create_group(&dev->kobj, &adt7x10_group);
 	if (ret)
-		goto exit_restore;
+		return ret;
 
 	/*
 	 * The I2C device will already have it's own 'name' attribute, but for
@@ -375,8 +385,6 @@ exit_remove_name:
 		device_remove_file(dev, &dev_attr_name);
 exit_remove:
 	sysfs_remove_group(&dev->kobj, &adt7x10_group);
-exit_restore:
-	regmap_write(regmap, ADT7X10_CONFIG, data->oldconfig);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(adt7x10_probe);
@@ -392,8 +400,6 @@ void adt7x10_remove(struct device *dev, int irq)
 	if (data->name)
 		device_remove_file(dev, &dev_attr_name);
 	sysfs_remove_group(&dev->kobj, &adt7x10_group);
-	if (data->oldconfig != data->config)
-		regmap_write(data->regmap, ADT7X10_CONFIG, data->oldconfig);
 }
 EXPORT_SYMBOL_GPL(adt7x10_remove);
 
-- 
GitLab


From a748d30c37bb7d238e5869f4f4d6037367493ebe Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Tue, 21 Dec 2021 23:58:37 +0200
Subject: [PATCH 0751/1586] hwmon: (adt7x10) Use
 devm_hwmon_device_register_with_info

Describe the only available channel, implement read, write
and is_visible callbacks.
Also, pass name to core driver for the i2c device so that
it can be used to register hwmon device.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20211221215841.2641417-4-demonsingur@gmail.com
[groeck: Adjusted to use regmap]
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7410.c |   2 +-
 drivers/hwmon/adt7x10.c | 249 ++++++++++++++++++++--------------------
 2 files changed, 125 insertions(+), 126 deletions(-)

diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c
index b1f4497dca051..81950a079c2f8 100644
--- a/drivers/hwmon/adt7410.c
+++ b/drivers/hwmon/adt7410.c
@@ -84,7 +84,7 @@ static int adt7410_i2c_probe(struct i2c_client *client)
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 
-	return adt7x10_probe(&client->dev, NULL, client->irq, regmap);
+	return adt7x10_probe(&client->dev, client->name, client->irq, regmap);
 }
 
 static int adt7410_i2c_remove(struct i2c_client *client)
diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index 47ce1a88a03fa..9482fd8fff41f 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -8,12 +8,12 @@
  * and adt7410.c from iio-staging by Sonic Zhang <sonic.zhang@analog.com>
  */
 
+#include <linux/device.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/delay.h>
@@ -55,19 +55,24 @@
 /* Each client has this additional data */
 struct adt7x10_data {
 	struct regmap		*regmap;
-	const char		*name;
-	struct device		*hwmon_dev;
 	struct mutex		update_lock;
 	u8			config;
 	u8			oldconfig;
 	bool			valid;		/* true if temperature valid */
 };
 
-static const u8 ADT7X10_REG_TEMP[4] = {
-	ADT7X10_TEMPERATURE,		/* input */
-	ADT7X10_T_ALARM_HIGH,		/* high */
-	ADT7X10_T_ALARM_LOW,		/* low */
-	ADT7X10_T_CRIT,			/* critical */
+enum {
+	adt7x10_temperature = 0,
+	adt7x10_t_alarm_high,
+	adt7x10_t_alarm_low,
+	adt7x10_t_crit,
+};
+
+static const u8 ADT7X10_REG_TEMP[] = {
+	[adt7x10_temperature] = ADT7X10_TEMPERATURE,		/* input */
+	[adt7x10_t_alarm_high] = ADT7X10_T_ALARM_HIGH,		/* high */
+	[adt7x10_t_alarm_low] = ADT7X10_T_ALARM_LOW,		/* low */
+	[adt7x10_t_crit] = ADT7X10_T_CRIT,			/* critical */
 };
 
 static irqreturn_t adt7x10_irq_handler(int irq, void *private)
@@ -127,18 +132,13 @@ static int ADT7X10_REG_TO_TEMP(struct adt7x10_data *data, s16 reg)
 
 /*-----------------------------------------------------------------------*/
 
-/* sysfs attributes for hwmon */
-
-static ssize_t adt7x10_temp_show(struct device *dev,
-				 struct device_attribute *da, char *buf)
+static int adt7x10_temp_read(struct adt7x10_data *data, int index, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-	unsigned int val;
+	unsigned int regval;
 	int ret;
 
 	mutex_lock(&data->update_lock);
-	if (attr->index == 0 && !data->valid) {
+	if (index == adt7x10_temperature && !data->valid) {
 		/* wait for valid temperature */
 		ret = adt7x10_temp_ready(data->regmap);
 		if (ret) {
@@ -149,40 +149,27 @@ static ssize_t adt7x10_temp_show(struct device *dev,
 	}
 	mutex_unlock(&data->update_lock);
 
-	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[attr->index], &val);
+	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], &regval);
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, val));
+	*val = ADT7X10_REG_TO_TEMP(data, regval);
+	return 0;
 }
 
-static ssize_t adt7x10_temp_store(struct device *dev,
-				  struct device_attribute *da,
-				  const char *buf, size_t count)
+static int adt7x10_temp_write(struct adt7x10_data *data, int index, long temp)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-	int nr = attr->index;
-	long temp;
 	int ret;
 
-	ret = kstrtol(buf, 10, &temp);
-	if (ret)
-		return ret;
-
 	mutex_lock(&data->update_lock);
-	ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[nr],
+	ret = regmap_write(data->regmap, ADT7X10_REG_TEMP[index],
 			   ADT7X10_TEMP_TO_REG(temp));
 	mutex_unlock(&data->update_lock);
-	return ret ? : count;
+	return ret;
 }
 
-static ssize_t adt7x10_t_hyst_show(struct device *dev,
-				   struct device_attribute *da, char *buf)
+static int adt7x10_hyst_read(struct adt7x10_data *data, int index, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-	int nr = attr->index;
 	int hyst, temp, ret;
 
 	mutex_lock(&data->update_lock);
@@ -192,7 +179,7 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev,
 		return ret;
 	}
 
-	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[nr], &temp);
+	ret = regmap_read(data->regmap, ADT7X10_REG_TEMP[index], &temp);
 	mutex_unlock(&data->update_lock);
 	if (ret)
 		return ret;
@@ -203,24 +190,18 @@ static ssize_t adt7x10_t_hyst_show(struct device *dev,
 	 * hysteresis is stored as a 4 bit offset in the device, convert it
 	 * to an absolute value
 	 */
-	if (nr == 2)	/* min has positive offset, others have negative */
+	/* min has positive offset, others have negative */
+	if (index == adt7x10_t_alarm_low)
 		hyst = -hyst;
 
-	return sprintf(buf, "%d\n", ADT7X10_REG_TO_TEMP(data, temp) - hyst);
+	*val = ADT7X10_REG_TO_TEMP(data, temp) - hyst;
+	return 0;
 }
 
-static ssize_t adt7x10_t_hyst_store(struct device *dev,
-				    struct device_attribute *da,
-				    const char *buf, size_t count)
+static int adt7x10_hyst_write(struct adt7x10_data *data, long hyst)
 {
-	struct adt7x10_data *data = dev_get_drvdata(dev);
 	unsigned int regval;
 	int limit, ret;
-	long hyst;
-
-	ret = kstrtol(buf, 10, &hyst);
-	if (ret)
-		return ret;
 
 	mutex_lock(&data->update_lock);
 
@@ -237,14 +218,11 @@ static ssize_t adt7x10_t_hyst_store(struct device *dev,
 	ret = regmap_write(data->regmap, ADT7X10_T_HYST, regval);
 abort:
 	mutex_unlock(&data->update_lock);
-	return ret ? : count;
+	return ret;
 }
 
-static ssize_t adt7x10_alarm_show(struct device *dev,
-				  struct device_attribute *da, char *buf)
+static int adt7x10_alarm_read(struct adt7x10_data *data, int index, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct adt7x10_data *data = dev_get_drvdata(dev);
 	unsigned int status;
 	int ret;
 
@@ -252,48 +230,102 @@ static ssize_t adt7x10_alarm_show(struct device *dev,
 	if (ret < 0)
 		return ret;
 
-	return sprintf(buf, "%d\n", !!(status & attr->index));
+	*val = !!(status & index);
+
+	return 0;
 }
 
-static ssize_t name_show(struct device *dev, struct device_attribute *da,
-			 char *buf)
+static umode_t adt7x10_is_visible(const void *data,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel)
+{
+	switch (attr) {
+	case hwmon_temp_max:
+	case hwmon_temp_min:
+	case hwmon_temp_crit:
+	case hwmon_temp_max_hyst:
+		return 0644;
+	case hwmon_temp_input:
+	case hwmon_temp_min_alarm:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_crit_alarm:
+	case hwmon_temp_min_hyst:
+	case hwmon_temp_crit_hyst:
+		return 0444;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int adt7x10_read(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long *val)
 {
 	struct adt7x10_data *data = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s\n", data->name);
+	switch (attr) {
+	case hwmon_temp_input:
+		return adt7x10_temp_read(data, adt7x10_temperature, val);
+	case hwmon_temp_max:
+		return adt7x10_temp_read(data, adt7x10_t_alarm_high, val);
+	case hwmon_temp_min:
+		return adt7x10_temp_read(data, adt7x10_t_alarm_low, val);
+	case hwmon_temp_crit:
+		return adt7x10_temp_read(data, adt7x10_t_crit, val);
+	case hwmon_temp_max_hyst:
+		return adt7x10_hyst_read(data, adt7x10_t_alarm_high, val);
+	case hwmon_temp_min_hyst:
+		return adt7x10_hyst_read(data, adt7x10_t_alarm_low, val);
+	case hwmon_temp_crit_hyst:
+		return adt7x10_hyst_read(data, adt7x10_t_crit, val);
+	case hwmon_temp_min_alarm:
+		return adt7x10_alarm_read(data, ADT7X10_STAT_T_LOW, val);
+	case hwmon_temp_max_alarm:
+		return adt7x10_alarm_read(data, ADT7X10_STAT_T_HIGH, val);
+	case hwmon_temp_crit_alarm:
+		return adt7x10_alarm_read(data, ADT7X10_STAT_T_CRIT, val);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static SENSOR_DEVICE_ATTR_RO(temp1_input, adt7x10_temp, 0);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, adt7x10_temp, 1);
-static SENSOR_DEVICE_ATTR_RW(temp1_min, adt7x10_temp, 2);
-static SENSOR_DEVICE_ATTR_RW(temp1_crit, adt7x10_temp, 3);
-static SENSOR_DEVICE_ATTR_RW(temp1_max_hyst, adt7x10_t_hyst, 1);
-static SENSOR_DEVICE_ATTR_RO(temp1_min_hyst, adt7x10_t_hyst, 2);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_hyst, adt7x10_t_hyst, 3);
-static SENSOR_DEVICE_ATTR_RO(temp1_min_alarm, adt7x10_alarm,
-			     ADT7X10_STAT_T_LOW);
-static SENSOR_DEVICE_ATTR_RO(temp1_max_alarm, adt7x10_alarm,
-			     ADT7X10_STAT_T_HIGH);
-static SENSOR_DEVICE_ATTR_RO(temp1_crit_alarm, adt7x10_alarm,
-			     ADT7X10_STAT_T_CRIT);
-static DEVICE_ATTR_RO(name);
-
-static struct attribute *adt7x10_attributes[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-	NULL
+static int adt7x10_write(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long val)
+{
+	struct adt7x10_data *data = dev_get_drvdata(dev);
+
+	switch (attr) {
+	case hwmon_temp_max:
+		return adt7x10_temp_write(data, adt7x10_t_alarm_high, val);
+	case hwmon_temp_min:
+		return adt7x10_temp_write(data, adt7x10_t_alarm_low, val);
+	case hwmon_temp_crit:
+		return adt7x10_temp_write(data, adt7x10_t_crit, val);
+	case hwmon_temp_max_hyst:
+		return adt7x10_hyst_write(data, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_channel_info *adt7x10_info[] = {
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+			   HWMON_T_CRIT | HWMON_T_MAX_HYST | HWMON_T_MIN_HYST |
+			   HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+			   HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM),
+	NULL,
+};
+
+static const struct hwmon_ops adt7x10_hwmon_ops = {
+	.is_visible = adt7x10_is_visible,
+	.read = adt7x10_read,
+	.write = adt7x10_write,
 };
 
-static const struct attribute_group adt7x10_group = {
-	.attrs = adt7x10_attributes,
+static const struct hwmon_chip_info adt7x10_chip_info = {
+	.ops = &adt7x10_hwmon_ops,
+	.info = adt7x10_info,
 };
 
 static void adt7x10_restore_config(void *private)
@@ -308,6 +340,7 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 {
 	struct adt7x10_data *data;
 	unsigned int config;
+	struct device *hdev;
 	int ret;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -315,7 +348,6 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 		return -ENOMEM;
 
 	data->regmap = regmap;
-	data->name = name;
 
 	dev_set_drvdata(dev, data);
 	mutex_init(&data->update_lock);
@@ -346,60 +378,27 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 	}
 	dev_dbg(dev, "Config %02x\n", data->config);
 
-	/* Register sysfs hooks */
-	ret = sysfs_create_group(&dev->kobj, &adt7x10_group);
-	if (ret)
-		return ret;
-
-	/*
-	 * The I2C device will already have it's own 'name' attribute, but for
-	 * the SPI device we need to register it. name will only be non NULL if
-	 * the device doesn't register the 'name' attribute on its own.
-	 */
-	if (name) {
-		ret = device_create_file(dev, &dev_attr_name);
-		if (ret)
-			goto exit_remove;
-	}
-
-	data->hwmon_dev = hwmon_device_register(dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto exit_remove_name;
-	}
+	hdev = devm_hwmon_device_register_with_info(dev, name, data,
+						    &adt7x10_chip_info, NULL);
+	if (IS_ERR(hdev))
+		return PTR_ERR(hdev);
 
 	if (irq > 0) {
 		ret = request_threaded_irq(irq, NULL, adt7x10_irq_handler,
 				IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				dev_name(dev), dev);
 		if (ret)
-			goto exit_hwmon_device_unregister;
+			return ret;
 	}
 
 	return 0;
-
-exit_hwmon_device_unregister:
-	hwmon_device_unregister(data->hwmon_dev);
-exit_remove_name:
-	if (name)
-		device_remove_file(dev, &dev_attr_name);
-exit_remove:
-	sysfs_remove_group(&dev->kobj, &adt7x10_group);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(adt7x10_probe);
 
 void adt7x10_remove(struct device *dev, int irq)
 {
-	struct adt7x10_data *data = dev_get_drvdata(dev);
-
 	if (irq > 0)
 		free_irq(irq, dev);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	if (data->name)
-		device_remove_file(dev, &dev_attr_name);
-	sysfs_remove_group(&dev->kobj, &adt7x10_group);
 }
 EXPORT_SYMBOL_GPL(adt7x10_remove);
 
-- 
GitLab


From f691adc3e95ecd6ae01830f7f6140dce3bd730a2 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Tue, 21 Dec 2021 23:58:38 +0200
Subject: [PATCH 0752/1586] hwmon: (adt7x10) Use devm_request_threaded_irq

To simplify the core driver remove function.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20211221215841.2641417-5-demonsingur@gmail.com
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7x10.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index 9482fd8fff41f..147c28b241675 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -384,9 +384,11 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 		return PTR_ERR(hdev);
 
 	if (irq > 0) {
-		ret = request_threaded_irq(irq, NULL, adt7x10_irq_handler,
-				IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				dev_name(dev), dev);
+		ret = devm_request_threaded_irq(dev, irq, NULL,
+						adt7x10_irq_handler,
+						IRQF_TRIGGER_FALLING |
+						IRQF_ONESHOT,
+						dev_name(dev), dev);
 		if (ret)
 			return ret;
 	}
@@ -397,8 +399,6 @@ EXPORT_SYMBOL_GPL(adt7x10_probe);
 
 void adt7x10_remove(struct device *dev, int irq)
 {
-	if (irq > 0)
-		free_irq(irq, dev);
 }
 EXPORT_SYMBOL_GPL(adt7x10_remove);
 
-- 
GitLab


From 8331585ab3700662c0979244fb06522aa0d36fbc Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Tue, 21 Dec 2021 23:58:39 +0200
Subject: [PATCH 0753/1586] hwmon: (adt7x10) Remove empty driver removal
 callback

Not used to do anything anymore.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20211221215841.2641417-6-demonsingur@gmail.com
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7310.c | 7 -------
 drivers/hwmon/adt7410.c | 7 -------
 drivers/hwmon/adt7x10.c | 5 -----
 drivers/hwmon/adt7x10.h | 2 --
 4 files changed, 21 deletions(-)

diff --git a/drivers/hwmon/adt7310.c b/drivers/hwmon/adt7310.c
index a83092470bce4..1efc0bdcceabd 100644
--- a/drivers/hwmon/adt7310.c
+++ b/drivers/hwmon/adt7310.c
@@ -142,12 +142,6 @@ static int adt7310_spi_probe(struct spi_device *spi)
 			     regmap);
 }
 
-static int adt7310_spi_remove(struct spi_device *spi)
-{
-	adt7x10_remove(&spi->dev, spi->irq);
-	return 0;
-}
-
 static const struct spi_device_id adt7310_id[] = {
 	{ "adt7310", 0 },
 	{ "adt7320", 0 },
@@ -161,7 +155,6 @@ static struct spi_driver adt7310_driver = {
 		.pm	= ADT7X10_DEV_PM_OPS,
 	},
 	.probe		= adt7310_spi_probe,
-	.remove		= adt7310_spi_remove,
 	.id_table	= adt7310_id,
 };
 module_spi_driver(adt7310_driver);
diff --git a/drivers/hwmon/adt7410.c b/drivers/hwmon/adt7410.c
index 81950a079c2f8..aede5baca7b9f 100644
--- a/drivers/hwmon/adt7410.c
+++ b/drivers/hwmon/adt7410.c
@@ -87,12 +87,6 @@ static int adt7410_i2c_probe(struct i2c_client *client)
 	return adt7x10_probe(&client->dev, client->name, client->irq, regmap);
 }
 
-static int adt7410_i2c_remove(struct i2c_client *client)
-{
-	adt7x10_remove(&client->dev, client->irq);
-	return 0;
-}
-
 static const struct i2c_device_id adt7410_ids[] = {
 	{ "adt7410", 0 },
 	{ "adt7420", 0 },
@@ -107,7 +101,6 @@ static struct i2c_driver adt7410_driver = {
 		.pm	= ADT7X10_DEV_PM_OPS,
 	},
 	.probe_new	= adt7410_i2c_probe,
-	.remove		= adt7410_i2c_remove,
 	.id_table	= adt7410_ids,
 	.address_list	= I2C_ADDRS(0x48, 0x49, 0x4a, 0x4b),
 };
diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index 147c28b241675..ea8cd918bc226 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -397,11 +397,6 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 }
 EXPORT_SYMBOL_GPL(adt7x10_probe);
 
-void adt7x10_remove(struct device *dev, int irq)
-{
-}
-EXPORT_SYMBOL_GPL(adt7x10_remove);
-
 #ifdef CONFIG_PM_SLEEP
 
 static int adt7x10_suspend(struct device *dev)
diff --git a/drivers/hwmon/adt7x10.h b/drivers/hwmon/adt7x10.h
index 55ff08bfe9462..ba22c32c83552 100644
--- a/drivers/hwmon/adt7x10.h
+++ b/drivers/hwmon/adt7x10.h
@@ -20,8 +20,6 @@ struct device;
 int adt7x10_probe(struct device *dev, const char *name, int irq,
 		  struct regmap *regmap);
 
-void adt7x10_remove(struct device *dev, int irq);
-
 #ifdef CONFIG_PM_SLEEP
 extern const struct dev_pm_ops adt7x10_dev_pm_ops;
 #define ADT7X10_DEV_PM_OPS (&adt7x10_dev_pm_ops)
-- 
GitLab


From a7a5731a09521a1053bb409647bf3e52cdf350de Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Tue, 21 Dec 2021 23:58:41 +0200
Subject: [PATCH 0754/1586] hwmon: (adt7x10) Use hwmon_notify_event

The hwmon subsystem provides means of notifying userspace
about events. Use it.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20211221215841.2641417-8-demonsingur@gmail.com
[groeck: Pass hwmon device to interrupt handler]
Tested-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7x10.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/adt7x10.c b/drivers/hwmon/adt7x10.c
index ea8cd918bc226..ce54bffab2ec2 100644
--- a/drivers/hwmon/adt7x10.c
+++ b/drivers/hwmon/adt7x10.c
@@ -87,11 +87,11 @@ static irqreturn_t adt7x10_irq_handler(int irq, void *private)
 		return IRQ_HANDLED;
 
 	if (status & ADT7X10_STAT_T_HIGH)
-		sysfs_notify(&dev->kobj, NULL, "temp1_max_alarm");
+		hwmon_notify_event(dev, hwmon_temp, hwmon_temp_max_alarm, 0);
 	if (status & ADT7X10_STAT_T_LOW)
-		sysfs_notify(&dev->kobj, NULL, "temp1_min_alarm");
+		hwmon_notify_event(dev, hwmon_temp, hwmon_temp_min_alarm, 0);
 	if (status & ADT7X10_STAT_T_CRIT)
-		sysfs_notify(&dev->kobj, NULL, "temp1_crit_alarm");
+		hwmon_notify_event(dev, hwmon_temp, hwmon_temp_crit_alarm, 0);
 
 	return IRQ_HANDLED;
 }
@@ -388,7 +388,7 @@ int adt7x10_probe(struct device *dev, const char *name, int irq,
 						adt7x10_irq_handler,
 						IRQF_TRIGGER_FALLING |
 						IRQF_ONESHOT,
-						dev_name(dev), dev);
+						dev_name(dev), hdev);
 		if (ret)
 			return ret;
 	}
-- 
GitLab


From f545a2fd473606809cb4a5045919a10194d86d36 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Tue, 8 Feb 2022 10:42:43 +0100
Subject: [PATCH 0755/1586] hwmon: (asus-ec-sensors) add CPU core voltage

A user discovered [1] the CPU Core voltage sensor, which spans 2
registers and provides output in mV. Althroug the discovery was made
with a X470 chipset, the sensor is present in X570 (tested with C8H).
For now simply add it to each board with the CPU current sensor present.

[1] https://github.com/zeule/asus-ec-sensors/issues/12

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Denis Pauk <pauk.denis@gmail.com>
Link: https://lore.kernel.org/r/20220208094244.1106312-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/asus_ec_sensors.rst |  1 +
 drivers/hwmon/asus-ec-sensors.c         | 29 ++++++++++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
index 22de1b037cfba..e7e8f1640f457 100644
--- a/Documentation/hwmon/asus_ec_sensors.rst
+++ b/Documentation/hwmon/asus_ec_sensors.rst
@@ -39,6 +39,7 @@ The driver is aware of and reads the following sensors:
 9. Readings from the "Water flow meter" header (RPM)
 10. Readings from the "Water In" and "Water Out" temperature headers
 11. CPU current
+12. CPU core voltage
 
 Sensor values are read from EC registers, and to avoid race with the board
 firmware the driver acquires ACPI mutex, the one used by the WMI when its
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index dd7b207d062fd..bfac08a5dc57d 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -18,6 +18,7 @@
  * - VRM Heat Sink fan RPM
  * - Water Flow fan RPM
  * - CPU current
+ * - CPU core voltage
  */
 
 #include <linux/acpi.h>
@@ -100,6 +101,8 @@ enum ec_sensors {
 	ec_sensor_temp_t_sensor,
 	/* VRM temperature [℃] */
 	ec_sensor_temp_vrm,
+	/* CPU Core voltage [mV] */
+	ec_sensor_in_cpu_core,
 	/* CPU_Opt fan [RPM] */
 	ec_sensor_fan_cpu_opt,
 	/* VRM heat sink fan [RPM] */
@@ -121,6 +124,7 @@ enum ec_sensors {
 #define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb)
 #define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor)
 #define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm)
+#define SENSOR_IN_CPU_CORE BIT(ec_sensor_in_cpu_core)
 #define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt)
 #define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs)
 #define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset)
@@ -139,6 +143,8 @@ static const struct ec_sensor_info known_ec_sensors[] = {
 	[ec_sensor_temp_t_sensor] =
 		EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
 	[ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
+	[ec_sensor_in_cpu_core] =
+		EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2),
 	[ec_sensor_fan_cpu_opt] =
 		EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0),
 	[ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2),
@@ -172,32 +178,34 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
 		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
-		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
 			      "ROG CROSSHAIR VIII DARK HERO",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
 		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
-		SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+		SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
+		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
 			      "ROG CROSSHAIR VIII FORMULA",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
 		SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-		SENSOR_CURR_CPU),
+		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
 		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
 		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
 			      "ROG CROSSHAIR VIII HERO (WI-FI)",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
 		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
 		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
-		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU),
+		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE,
 			      "ROG CROSSHAIR VIII IMPACT",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
-		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
+		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
 		SENSOR_TEMP_T_SENSOR |
@@ -205,17 +213,19 @@ static const struct dmi_system_id asus_ec_dmi_table[] __initconst = {
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
 		SENSOR_TEMP_T_SENSOR |
-		SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU),
+		SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS |
+		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
 		SENSOR_TEMP_T_SENSOR |
-		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+		SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET |
+		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING",
 		SENSOR_SET_TEMP_CHIPSET_CPU_MB |
 		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET),
 	DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING",
 		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
-		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU),
+		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE),
 	{}
 };
 
@@ -467,7 +477,6 @@ static long scale_sensor_value(s32 value, int data_type)
 	switch (data_type) {
 	case hwmon_curr:
 	case hwmon_temp:
-	case hwmon_in:
 		return value * MILLI;
 	default:
 		return value;
-- 
GitLab


From a7a6f65a39a75a5821eb9aad157326c30f8bbb07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= <mat.jonczyk@o2.pl>
Date: Sat, 12 Feb 2022 13:56:53 +0100
Subject: [PATCH 0756/1586] x86/Kconfig: move and modify CONFIG_I8K
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Kconfig, inside the "Processor type and features" menu, there is
the CONFIG_I8K option: "Dell i8k legacy laptop support". This is
very confusing - enabling CONFIG_I8K is not required for the kernel to
support old Dell laptops. This option is specific to the dell-smm-hwmon
driver, which mostly exports some hardware monitoring information and
allows the user to change fan speed.

This option is misplaced, so move CONFIG_I8K to drivers/hwmon/Kconfig,
where it belongs.

Also, modify the dependency order - change
        select SENSORS_DELL_SMM
to
        depends on SENSORS_DELL_SMM
as it is just a configuration option of dell-smm-hwmon. This includes
changing the option type from tristate to bool. It was tristate because
it could select CONFIG_SENSORS_DELL_SMM=m .

When running "make oldconfig" on configurations with
CONFIG_SENSORS_DELL_SMM enabled , this change will result in an
additional question (which could be printed several times during
bisecting). I think that tidying up the configuration is worth it,
though.

Next patch tweaks the description of CONFIG_I8K.

Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Cc: Pali Rohár <pali@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Mark Gross <markgross@kernel.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220212125654.357408-1-mat.jonczyk@o2.pl
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 arch/x86/Kconfig      | 17 -----------------
 drivers/hwmon/Kconfig | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660c..71d4ddd48c027 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1275,23 +1275,6 @@ config TOSHIBA
 	  Say Y if you intend to run this kernel on a Toshiba portable.
 	  Say N otherwise.
 
-config I8K
-	tristate "Dell i8k legacy laptop support"
-	depends on HWMON
-	depends on PROC_FS
-	select SENSORS_DELL_SMM
-	help
-	  This option enables legacy /proc/i8k userspace interface in hwmon
-	  dell-smm-hwmon driver. Character file /proc/i8k reports bios version,
-	  temperature and allows controlling fan speeds of Dell laptops via
-	  System Management Mode. For old Dell laptops (like Dell Inspiron 8000)
-	  it reports also power and hotkey status. For fan speed control is
-	  needed userspace package i8kutils.
-
-	  Say Y if you intend to run this kernel on old Dell laptops or want to
-	  use userspace package i8kutils.
-	  Say N otherwise.
-
 config X86_REBOOTFIXUPS
 	bool "Enable X86 board specific fixups for reboot"
 	depends on X86_32
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 387253ae6c149..fceea537101de 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -506,6 +506,22 @@ config SENSORS_DELL_SMM
 	  When option I8K is also enabled this driver provides legacy /proc/i8k
 	  userspace interface for i8kutils package.
 
+config I8K
+	bool "Dell i8k legacy laptop support"
+	depends on SENSORS_DELL_SMM
+	depends on PROC_FS
+	help
+	  This option enables legacy /proc/i8k userspace interface in hwmon
+	  dell-smm-hwmon driver. Character file /proc/i8k reports bios version,
+	  temperature and allows controlling fan speeds of Dell laptops via
+	  System Management Mode. For old Dell laptops (like Dell Inspiron 8000)
+	  it reports also power and hotkey status. For fan speed control is
+	  needed userspace package i8kutils.
+
+	  Say Y if you intend to run this kernel on old Dell laptops or want to
+	  use userspace package i8kutils.
+	  Say N otherwise.
+
 config SENSORS_DA9052_ADC
 	tristate "Dialog DA9052/DA9053 ADC"
 	depends on PMIC_DA9052
-- 
GitLab


From 5f86cce61c1d12219b2ea25b9404bf263a77dd72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= <mat.jonczyk@o2.pl>
Date: Sat, 12 Feb 2022 13:56:54 +0100
Subject: [PATCH 0757/1586] hwmon: (dell-smm) rewrite CONFIG_I8K description
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not the laptops, but the /proc/i8k interface that is legacy (or so
I think was the intention of the help text author). The old description
was confusing, fix this.

The phrase "Say Y if you intend to run this kernel on old Dell laptops
or want to use userspace package i8kutils." was introduced in 2015, in
commit 039ae58503f3 ("hwmon: Allow to compile dell-smm-hwmon driver without /proc/i8k")

I think that "old laptops" was about hotkey and Fn key support - this
driver in the 2.4 kernels' era apparently had these capabilities
(see: https://github.com/vitorafsr/i8kutils , description of
"repeat_rate" kernel module parameter).

Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Cc: Pali Rohár <pali@kernel.org>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Mark Gross <markgross@kernel.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20220212125654.357408-2-mat.jonczyk@o2.pl
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index fceea537101de..ce9149e0648f9 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -507,19 +507,18 @@ config SENSORS_DELL_SMM
 	  userspace interface for i8kutils package.
 
 config I8K
-	bool "Dell i8k legacy laptop support"
+	bool "Legacy /proc/i8k interface of Dell laptop SMM BIOS hwmon driver"
 	depends on SENSORS_DELL_SMM
 	depends on PROC_FS
 	help
-	  This option enables legacy /proc/i8k userspace interface in hwmon
-	  dell-smm-hwmon driver. Character file /proc/i8k reports bios version,
-	  temperature and allows controlling fan speeds of Dell laptops via
-	  System Management Mode. For old Dell laptops (like Dell Inspiron 8000)
-	  it reports also power and hotkey status. For fan speed control is
-	  needed userspace package i8kutils.
+	  This option enables the legacy /proc/i8k userspace interface of the
+	  dell-smm-hwmon driver. The character file /proc/i8k exposes the BIOS
+	  version, temperatures and allows control of fan speeds of some Dell
+	  laptops. Sometimes it also reports power and hotkey status.
 
-	  Say Y if you intend to run this kernel on old Dell laptops or want to
-	  use userspace package i8kutils.
+	  This interface is required to run programs from the i8kutils package.
+
+	  Say Y if you intend to run userspace programs that use this interface.
 	  Say N otherwise.
 
 config SENSORS_DA9052_ADC
-- 
GitLab


From 99cb5e9f7a78857657220f65533dce550331d629 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Sun, 13 Feb 2022 01:47:33 +0100
Subject: [PATCH 0758/1586] hwmon: (tc654) Add thermal_cooling device support

Adds thermal_cooling device support to the tc654/tc655
driver. This make it possible to integrate it into a
device-tree supported thermal-zone node as a
cooling device.

I have been using this patch as part of the Netgear WNDR4700
Centria NAS Router support within OpenWrt since 2016.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Link: https://lore.kernel.org/r/20220213004733.2421193-1-chunkeey@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tc654.c | 104 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 15 deletions(-)

diff --git a/drivers/hwmon/tc654.c b/drivers/hwmon/tc654.c
index a52ca72af1201..54cd33d096884 100644
--- a/drivers/hwmon/tc654.c
+++ b/drivers/hwmon/tc654.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/thermal.h>
 #include <linux/util_macros.h>
 
 enum tc654_regs {
@@ -379,28 +380,20 @@ static ssize_t pwm_show(struct device *dev, struct device_attribute *da,
 	return sprintf(buf, "%d\n", pwm);
 }
 
-static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
-			 const char *buf, size_t count)
+static int _set_pwm(struct tc654_data *data, unsigned long val)
 {
-	struct tc654_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
-	unsigned long val;
 	int ret;
 
-	if (kstrtoul(buf, 10, &val))
-		return -EINVAL;
-	if (val > 255)
-		return -EINVAL;
-
 	mutex_lock(&data->update_lock);
 
-	if (val == 0)
+	if (val == 0) {
 		data->config |= TC654_REG_CONFIG_SDM;
-	else
+		data->duty_cycle = 0;
+	} else {
 		data->config &= ~TC654_REG_CONFIG_SDM;
-
-	data->duty_cycle = find_closest(val, tc654_pwm_map,
-					ARRAY_SIZE(tc654_pwm_map));
+		data->duty_cycle = val - 1;
+	}
 
 	ret = i2c_smbus_write_byte_data(client, TC654_REG_CONFIG, data->config);
 	if (ret < 0)
@@ -411,6 +404,24 @@ static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
 
 out:
 	mutex_unlock(&data->update_lock);
+	return ret;
+}
+
+static ssize_t pwm_store(struct device *dev, struct device_attribute *da,
+			 const char *buf, size_t count)
+{
+	struct tc654_data *data = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	if (kstrtoul(buf, 10, &val))
+		return -EINVAL;
+	if (val > 255)
+		return -EINVAL;
+	if (val > 0)
+		val = find_closest(val, tc654_pwm_map, ARRAY_SIZE(tc654_pwm_map)) + 1;
+
+	ret = _set_pwm(data, val);
 	return ret < 0 ? ret : count;
 }
 
@@ -442,6 +453,58 @@ static struct attribute *tc654_attrs[] = {
 
 ATTRIBUTE_GROUPS(tc654);
 
+/*
+ * thermal cooling device functions
+ *
+ * Account for the "ShutDown Mode (SDM)" state by offsetting
+ * the 16 PWM duty cycle states by 1.
+ *
+ * State  0 =   0% PWM | Shutdown - Fan(s) are off
+ * State  1 =  30% PWM | duty_cycle =  0
+ * State  2 = ~35% PWM | duty_cycle =  1
+ * [...]
+ * State 15 = ~95% PWM | duty_cycle = 14
+ * State 16 = 100% PWM | duty_cycle = 15
+ */
+#define TC654_MAX_COOLING_STATE	16
+
+static int tc654_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	*state = TC654_MAX_COOLING_STATE;
+	return 0;
+}
+
+static int tc654_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	struct tc654_data *data = tc654_update_client(cdev->devdata);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	if (data->config & TC654_REG_CONFIG_SDM)
+		*state = 0;	/* FAN is off */
+	else
+		*state = data->duty_cycle + 1;	/* offset PWM States by 1 */
+
+	return 0;
+}
+
+static int tc654_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+	struct tc654_data *data = tc654_update_client(cdev->devdata);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return _set_pwm(data, clamp_val(state, 0, TC654_MAX_COOLING_STATE));
+}
+
+static const struct thermal_cooling_device_ops tc654_fan_cool_ops = {
+	.get_max_state = tc654_get_max_state,
+	.get_cur_state = tc654_get_cur_state,
+	.set_cur_state = tc654_set_cur_state,
+};
+
 /*
  * device probe and removal
  */
@@ -472,7 +535,18 @@ static int tc654_probe(struct i2c_client *client)
 	hwmon_dev =
 	    devm_hwmon_device_register_with_groups(dev, client->name, data,
 						   tc654_groups);
-	return PTR_ERR_OR_ZERO(hwmon_dev);
+	if (IS_ERR(hwmon_dev))
+		return PTR_ERR(hwmon_dev);
+
+	if (IS_ENABLED(CONFIG_THERMAL)) {
+		struct thermal_cooling_device *cdev;
+
+		cdev = devm_thermal_of_cooling_device_register(dev, dev->of_node, client->name,
+							       hwmon_dev, &tc654_fan_cool_ops);
+		return PTR_ERR_OR_ZERO(cdev);
+	}
+
+	return 0;
 }
 
 static const struct i2c_device_id tc654_id[] = {
-- 
GitLab


From 8aba9ca62677570abf8c6b62611adb85bf1580ca Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Fri, 11 Feb 2022 17:48:55 +0100
Subject: [PATCH 0759/1586] hwmon: (asus-ec-sensors) deduce sensor signedness
 from its type

Reading DSDT code for ASUS X470-based boards (the ones served by the
asus_wmi_Sensors driver), where ASUS put hardware monitoring functions
into the WMI code, reveals that fan and current sensors data is
unsigned. For the current sensor that was confirmed by a user who showed
high enough current value for overflow.

Thus let's assume that the signedness of the sensors is determined by its
type and that only temperature ones provide signed numbers.

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220211164855.265698-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 40 +++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index bfac08a5dc57d..d2b84578d2af2 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -266,6 +266,15 @@ static u8 register_index(u16 reg)
 	return reg & 0x00ff;
 }
 
+static bool is_sensor_data_signed(const struct ec_sensor_info *si)
+{
+	/*
+	 * guessed from WMI functions in DSDT code for boards
+	 * of the X470 generation
+	 */
+	return si->type == hwmon_temp;
+}
+
 static const struct ec_sensor_info *
 get_sensor_info(const struct ec_sensors_data *state, int index)
 {
@@ -420,15 +429,28 @@ static int asus_ec_block_read(const struct device *dev,
 
 static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data)
 {
-	switch (si->addr.components.size) {
-	case 1:
-		return (s8)*data;
-	case 2:
-		return (s16)get_unaligned_be16(data);
-	case 4:
-		return (s32)get_unaligned_be32(data);
-	default:
-		return 0;
+	if (is_sensor_data_signed(si)) {
+		switch (si->addr.components.size) {
+		case 1:
+			return (s8)*data;
+		case 2:
+			return (s16)get_unaligned_be16(data);
+		case 4:
+			return (s32)get_unaligned_be32(data);
+		default:
+			return 0;
+		}
+	} else {
+		switch (si->addr.components.size) {
+		case 1:
+			return *data;
+		case 2:
+			return get_unaligned_be16(data);
+		case 4:
+			return get_unaligned_be32(data);
+		default:
+			return 0;
+		}
 	}
 }
 
-- 
GitLab


From 1298184b387fd71fe68ecb7dc2001cb63ffa17c5 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Wed, 16 Feb 2022 20:19:58 +0100
Subject: [PATCH 0760/1586] hwmon: (asus-ec-sensors) merge setup functions

Merge configure_sensor_setup() into probe().

Changes:
 - v2: add local struct device *dev = &pdev->dev;
 - v3: initialize dev at declaration
 - v4: fix checkpatch warning
 - v5: fix formatting
 - v6: code style fixes
Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 41 +++++++++++++--------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index d2b84578d2af2..0701ade162270 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -611,23 +611,31 @@ get_board_sensors(const struct device *dev)
 	return (unsigned long)dmi_entry->driver_data;
 }
 
-static int __init configure_sensor_setup(struct device *dev)
+static int __init asus_ec_probe(struct platform_device *pdev)
 {
-	struct ec_sensors_data *ec_data = dev_get_drvdata(dev);
+	const struct hwmon_channel_info **ptr_asus_ec_ci;
 	int nr_count[hwmon_max] = { 0 }, nr_types = 0;
-	struct device *hwdev;
 	struct hwmon_channel_info *asus_ec_hwmon_chan;
-	const struct hwmon_channel_info **ptr_asus_ec_ci;
 	const struct hwmon_chip_info *chip_info;
+	struct device *dev = &pdev->dev;
+	struct ec_sensors_data *ec_data;
 	const struct ec_sensor_info *si;
 	enum hwmon_sensor_types type;
+	unsigned long board_sensors;
+	struct device *hwdev;
 	unsigned int i;
 
-	ec_data->board_sensors = get_board_sensors(dev);
-	if (!ec_data->board_sensors) {
+	board_sensors = get_board_sensors(dev);
+	if (!board_sensors)
 		return -ENODEV;
-	}
 
+	ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data),
+			       GFP_KERNEL);
+	if (!ec_data)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, ec_data);
+	ec_data->board_sensors = board_sensors;
 	ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors);
 	ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
 					sizeof(struct ec_sensor), GFP_KERNEL);
@@ -638,9 +646,8 @@ static int __init configure_sensor_setup(struct device *dev)
 	ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers,
 					    sizeof(u8), GFP_KERNEL);
 
-	if (!ec_data->registers || !ec_data->read_buffer) {
+	if (!ec_data->registers || !ec_data->read_buffer)
 		return -ENOMEM;
-	}
 
 	fill_ec_registers(ec_data);
 
@@ -688,22 +695,6 @@ static int __init configure_sensor_setup(struct device *dev)
 	return PTR_ERR_OR_ZERO(hwdev);
 }
 
-static int __init asus_ec_probe(struct platform_device *pdev)
-{
-	struct ec_sensors_data *state;
-	int status = 0;
-
-	state = devm_kzalloc(&pdev->dev, sizeof(struct ec_sensors_data),
-			     GFP_KERNEL);
-
-	if (!state) {
-		return -ENOMEM;
-	}
-
-	dev_set_drvdata(&pdev->dev, state);
-	status = configure_sensor_setup(&pdev->dev);
-	return status;
-}
 
 static const struct acpi_device_id acpi_ec_ids[] = {
 	/* Embedded Controller Device */
-- 
GitLab


From 034dadfbd329231fc771984d4897c71f73a4434d Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Thu, 17 Feb 2022 08:32:38 +0100
Subject: [PATCH 0761/1586] hwmon: (asus-ec-sensors) depend on X86 in KConfig

All the supported mainboards are for the X86 platform

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220217073238.2479005-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index ce9149e0648f9..b3597ba66ddb9 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -2284,6 +2284,7 @@ config SENSORS_ASUS_WMI_EC
 
 config SENSORS_ASUS_EC
 	tristate "ASUS EC Sensors"
+	depends on X86
 	help
 	  If you say yes here you get support for the ACPI embedded controller
 	  hardware monitoring interface found in ASUS motherboards. The driver
-- 
GitLab


From e0f0307ac16fe69c6f9dcc8d14b532bc608fc989 Mon Sep 17 00:00:00 2001
From: Zev Weiss <zev@bewilderbeest.net>
Date: Fri, 18 Feb 2022 16:03:59 -0800
Subject: [PATCH 0762/1586] hwmon: (pmbus) Add get_error_flags support to
 regulator ops

The various PMBus status bits don't all map perfectly to the more
limited set of REGULATOR_ERROR_* flags, but there's a reasonable
number where they correspond well enough.

Signed-off-by: Zev Weiss <zev@bewilderbeest.net>
Link: https://lore.kernel.org/r/20220219000359.19985-1-zev@bewilderbeest.net
[groeck: Added missing locking]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 114 +++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index b1386a4df4cc4..8eed7968a50ef 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2432,10 +2432,124 @@ static int pmbus_regulator_disable(struct regulator_dev *rdev)
 	return _pmbus_regulator_on_off(rdev, 0);
 }
 
+/* A PMBus status flag and the corresponding REGULATOR_ERROR_* flag */
+struct pmbus_regulator_status_assoc {
+	int pflag, rflag;
+};
+
+/* PMBus->regulator bit mappings for a PMBus status register */
+struct pmbus_regulator_status_category {
+	int func;
+	int reg;
+	const struct pmbus_regulator_status_assoc *bits; /* zero-terminated */
+};
+
+static const struct pmbus_regulator_status_category pmbus_regulator_flag_map[] = {
+	{
+		.func = PMBUS_HAVE_STATUS_VOUT,
+		.reg = PMBUS_STATUS_VOUT,
+		.bits = (const struct pmbus_regulator_status_assoc[]) {
+			{ PB_VOLTAGE_UV_WARNING, REGULATOR_ERROR_UNDER_VOLTAGE_WARN },
+			{ PB_VOLTAGE_UV_FAULT,   REGULATOR_ERROR_UNDER_VOLTAGE },
+			{ PB_VOLTAGE_OV_WARNING, REGULATOR_ERROR_OVER_VOLTAGE_WARN },
+			{ PB_VOLTAGE_OV_FAULT,   REGULATOR_ERROR_REGULATION_OUT },
+			{ },
+		},
+	}, {
+		.func = PMBUS_HAVE_STATUS_IOUT,
+		.reg = PMBUS_STATUS_IOUT,
+		.bits = (const struct pmbus_regulator_status_assoc[]) {
+			{ PB_IOUT_OC_WARNING,    REGULATOR_ERROR_OVER_CURRENT_WARN },
+			{ PB_IOUT_OC_FAULT,      REGULATOR_ERROR_OVER_CURRENT },
+			{ PB_IOUT_OC_LV_FAULT,   REGULATOR_ERROR_OVER_CURRENT },
+			{ },
+		},
+	}, {
+		.func = PMBUS_HAVE_STATUS_TEMP,
+		.reg = PMBUS_STATUS_TEMPERATURE,
+		.bits = (const struct pmbus_regulator_status_assoc[]) {
+			{ PB_TEMP_OT_WARNING,    REGULATOR_ERROR_OVER_TEMP_WARN },
+			{ PB_TEMP_OT_FAULT,      REGULATOR_ERROR_OVER_TEMP },
+			{ },
+		},
+	},
+};
+
+static int pmbus_regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags)
+{
+	int i, status;
+	const struct pmbus_regulator_status_category *cat;
+	const struct pmbus_regulator_status_assoc *bit;
+	struct device *dev = rdev_get_dev(rdev);
+	struct i2c_client *client = to_i2c_client(dev->parent);
+	struct pmbus_data *data = i2c_get_clientdata(client);
+	u8 page = rdev_get_id(rdev);
+	int func = data->info->func[page];
+
+	*flags = 0;
+
+	mutex_lock(&data->update_lock);
+
+	for (i = 0; i < ARRAY_SIZE(pmbus_regulator_flag_map); i++) {
+		cat = &pmbus_regulator_flag_map[i];
+		if (!(func & cat->func))
+			continue;
+
+		status = pmbus_read_byte_data(client, page, cat->reg);
+		if (status < 0) {
+			mutex_unlock(&data->update_lock);
+			return status;
+		}
+
+		for (bit = cat->bits; bit->pflag; bit++) {
+			if (status & bit->pflag)
+				*flags |= bit->rflag;
+		}
+	}
+
+	/*
+	 * Map what bits of STATUS_{WORD,BYTE} we can to REGULATOR_ERROR_*
+	 * bits.  Some of the other bits are tempting (especially for cases
+	 * where we don't have the relevant PMBUS_HAVE_STATUS_*
+	 * functionality), but there's an unfortunate ambiguity in that
+	 * they're defined as indicating a fault *or* a warning, so we can't
+	 * easily determine whether to report REGULATOR_ERROR_<foo> or
+	 * REGULATOR_ERROR_<foo>_WARN.
+	 */
+	status = pmbus_get_status(client, page, PMBUS_STATUS_WORD);
+	mutex_unlock(&data->update_lock);
+	if (status < 0)
+		return status;
+
+	if (pmbus_regulator_is_enabled(rdev) && (status & PB_STATUS_OFF))
+		*flags |= REGULATOR_ERROR_FAIL;
+
+	/*
+	 * Unlike most other status bits, PB_STATUS_{IOUT_OC,VOUT_OV} are
+	 * defined strictly as fault indicators (not warnings).
+	 */
+	if (status & PB_STATUS_IOUT_OC)
+		*flags |= REGULATOR_ERROR_OVER_CURRENT;
+	if (status & PB_STATUS_VOUT_OV)
+		*flags |= REGULATOR_ERROR_REGULATION_OUT;
+
+	/*
+	 * If we haven't discovered any thermal faults or warnings via
+	 * PMBUS_STATUS_TEMPERATURE, map PB_STATUS_TEMPERATURE to a warning as
+	 * a (conservative) best-effort interpretation.
+	 */
+	if (!(*flags & (REGULATOR_ERROR_OVER_TEMP | REGULATOR_ERROR_OVER_TEMP_WARN)) &&
+	    (status & PB_STATUS_TEMPERATURE))
+		*flags |= REGULATOR_ERROR_OVER_TEMP_WARN;
+
+	return 0;
+}
+
 const struct regulator_ops pmbus_regulator_ops = {
 	.enable = pmbus_regulator_enable,
 	.disable = pmbus_regulator_disable,
 	.is_enabled = pmbus_regulator_is_enabled,
+	.get_error_flags = pmbus_regulator_get_error_flags,
 };
 EXPORT_SYMBOL_NS_GPL(pmbus_regulator_ops, PMBUS);
 
-- 
GitLab


From da78ad2b6b9b7308c5835428442fef8d73cea2e3 Mon Sep 17 00:00:00 2001
From: Zev Weiss <zev@bewilderbeest.net>
Date: Fri, 18 Feb 2022 16:07:42 -0800
Subject: [PATCH 0763/1586] hwmon: (pmbus/lm25066) Add regulator support

While these chips aren't strictly advertised as voltage regulators per
se, they (aside from the lm25056) support the PMBus OPERATION command
to enable and disable their outputs and have status bits for reporting
various warnings and faults, and can hence usefully support all the
pmbus_regulator_ops operations.

Signed-off-by: Zev Weiss <zev@bewilderbeest.net>
Link: https://lore.kernel.org/r/20220219000742.20126-1-zev@bewilderbeest.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig   |  7 +++++++
 drivers/hwmon/pmbus/lm25066.c | 14 ++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index c96f7b7338bd7..c73aa50c76157 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -174,6 +174,13 @@ config SENSORS_LM25066
 	  This driver can also be built as a module. If so, the module will
 	  be called lm25066.
 
+config SENSORS_LM25066_REGULATOR
+	bool "Regulator support for LM25066 and compatibles"
+	depends on SENSORS_LM25066 && REGULATOR
+	help
+	  If you say yes here you get regulator support for National
+	  Semiconductor LM25066, LM5064, and LM5066.
+
 config SENSORS_LTC2978
 	tristate "Linear Technologies LTC2978 and compatibles"
 	help
diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c
index 8402b41520eb3..09792cd03d9fd 100644
--- a/drivers/hwmon/pmbus/lm25066.c
+++ b/drivers/hwmon/pmbus/lm25066.c
@@ -435,6 +435,12 @@ static int lm25066_write_word_data(struct i2c_client *client, int page, int reg,
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR)
+static const struct regulator_desc lm25066_reg_desc[] = {
+	PMBUS_REGULATOR("vout", 0),
+};
+#endif
+
 static const struct i2c_device_id lm25066_id[] = {
 	{"lm25056", lm25056},
 	{"lm25066", lm25066},
@@ -545,6 +551,14 @@ static int lm25066_probe(struct i2c_client *client)
 	info->m[PSC_CURRENT_IN] = info->m[PSC_CURRENT_IN] * shunt / 1000;
 	info->m[PSC_POWER] = info->m[PSC_POWER] * shunt / 1000;
 
+#if IS_ENABLED(CONFIG_SENSORS_LM25066_REGULATOR)
+	/* LM25056 doesn't support OPERATION */
+	if (data->id != lm25056) {
+		info->num_regulators = ARRAY_SIZE(lm25066_reg_desc);
+		info->reg_desc = lm25066_reg_desc;
+	}
+#endif
+
 	return pmbus_do_probe(client, info);
 }
 
-- 
GitLab


From 567e77a2c213f0103912378891f25442b665cbfb Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Thu, 17 Feb 2022 20:43:18 +0100
Subject: [PATCH 0764/1586] hwmon: (asus-ec-sensors) do not print from .probe()

Remove the call to dev_info() from the board detection function, which
is called from probe(), not only to be in line with hwmon driver rules, but
also because the message duplicates the error code returned from probe()
for that case (ENODEV).

Changes in:
 - v2: add missing newline (style).

Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20220217194318.2960472-1-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 0701ade162270..b5cf0136360cd 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -597,18 +597,12 @@ static struct hwmon_chip_info asus_ec_chip_info = {
 	.ops = &asus_ec_hwmon_ops,
 };
 
-static unsigned long __init
-get_board_sensors(const struct device *dev)
+static unsigned long __init get_board_sensors(void)
 {
-	const struct dmi_system_id *dmi_entry;
+	const struct dmi_system_id *dmi_entry =
+		dmi_first_match(asus_ec_dmi_table);
 
-	dmi_entry = dmi_first_match(asus_ec_dmi_table);
-	if (!dmi_entry) {
-		dev_info(dev, "Unsupported board");
-		return 0;
-	}
-
-	return (unsigned long)dmi_entry->driver_data;
+	return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0;
 }
 
 static int __init asus_ec_probe(struct platform_device *pdev)
@@ -625,7 +619,7 @@ static int __init asus_ec_probe(struct platform_device *pdev)
 	struct device *hwdev;
 	unsigned int i;
 
-	board_sensors = get_board_sensors(dev);
+	board_sensors = get_board_sensors();
 	if (!board_sensors)
 		return -ENODEV;
 
-- 
GitLab


From 6109c3e1905c3f8d0a3909c5f6a9ad5186822b2b Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 15 Feb 2022 09:10:19 -0600
Subject: [PATCH 0765/1586] hwmon: (occ) Add sysfs entry for IPS (Idle Power
 Saver) status

BMC control applications need to check the Idle Power Saver status
byte returned by the OCC poll response, so export it in sysfs with
the other OCC-specific data.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20220215151022.7498-2-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.h |  1 +
 drivers/hwmon/occ/sysfs.c  | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index 5020117be740f..a88c66d36e38a 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -119,6 +119,7 @@ struct occ {
 	u8 prev_stat;
 	u8 prev_ext_stat;
 	u8 prev_occs_present;
+	u8 prev_ips_status;
 };
 
 int occ_setup(struct occ *occ, const char *name);
diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c
index 03b16abef67fd..6dc69c9aa4c29 100644
--- a/drivers/hwmon/occ/sysfs.c
+++ b/drivers/hwmon/occ/sysfs.c
@@ -63,6 +63,9 @@ static ssize_t occ_sysfs_show(struct device *dev,
 		else
 			val = 1;
 		break;
+	case 8:
+		val = header->ips_status;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -88,6 +91,7 @@ static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4);
 static SENSOR_DEVICE_ATTR(occ_quick_pwr_drop, 0444, occ_sysfs_show, NULL, 5);
 static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6);
 static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7);
+static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8);
 static DEVICE_ATTR_RO(occ_error);
 
 static struct attribute *occ_attributes[] = {
@@ -99,6 +103,7 @@ static struct attribute *occ_attributes[] = {
 	&sensor_dev_attr_occ_quick_pwr_drop.dev_attr.attr,
 	&sensor_dev_attr_occ_state.dev_attr.attr,
 	&sensor_dev_attr_occs_present.dev_attr.attr,
+	&sensor_dev_attr_occ_ips_status.dev_attr.attr,
 	&dev_attr_occ_error.attr,
 	NULL
 };
@@ -162,6 +167,11 @@ void occ_sysfs_poll_done(struct occ *occ)
 		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
 	}
 
+	if (header->ips_status != occ->prev_ips_status) {
+		name = sensor_dev_attr_occ_ips_status.dev_attr.attr.name;
+		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+	}
+
 	if (occ->error && occ->error != occ->prev_error) {
 		name = dev_attr_occ_error.attr.name;
 		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
@@ -174,6 +184,7 @@ done:
 	occ->prev_stat = header->status;
 	occ->prev_ext_stat = header->ext_status;
 	occ->prev_occs_present = header->occs_present;
+	occ->prev_ips_status = header->ips_status;
 }
 
 int occ_setup_sysfs(struct occ *occ)
-- 
GitLab


From a25126fc32890a2a03ba5ddd1189aa8a4bcf4591 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 15 Feb 2022 09:10:20 -0600
Subject: [PATCH 0766/1586] hwmon: (occ) Add sysfs entry for OCC mode

BMC control applications need to check the OCC mode returned by the
OCC poll response, so export it in sysfs with the other OCC-specific
data.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20220215151022.7498-3-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.h |  1 +
 drivers/hwmon/occ/sysfs.c  | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index a88c66d36e38a..2dd4a4d240c0f 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -120,6 +120,7 @@ struct occ {
 	u8 prev_ext_stat;
 	u8 prev_occs_present;
 	u8 prev_ips_status;
+	u8 prev_mode;
 };
 
 int occ_setup(struct occ *occ, const char *name);
diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c
index 6dc69c9aa4c29..88f655887c958 100644
--- a/drivers/hwmon/occ/sysfs.c
+++ b/drivers/hwmon/occ/sysfs.c
@@ -66,6 +66,9 @@ static ssize_t occ_sysfs_show(struct device *dev,
 	case 8:
 		val = header->ips_status;
 		break;
+	case 9:
+		val = header->mode;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -92,6 +95,7 @@ static SENSOR_DEVICE_ATTR(occ_quick_pwr_drop, 0444, occ_sysfs_show, NULL, 5);
 static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6);
 static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7);
 static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8);
+static SENSOR_DEVICE_ATTR(occ_mode, 0444, occ_sysfs_show, NULL, 9);
 static DEVICE_ATTR_RO(occ_error);
 
 static struct attribute *occ_attributes[] = {
@@ -104,6 +108,7 @@ static struct attribute *occ_attributes[] = {
 	&sensor_dev_attr_occ_state.dev_attr.attr,
 	&sensor_dev_attr_occs_present.dev_attr.attr,
 	&sensor_dev_attr_occ_ips_status.dev_attr.attr,
+	&sensor_dev_attr_occ_mode.dev_attr.attr,
 	&dev_attr_occ_error.attr,
 	NULL
 };
@@ -172,6 +177,11 @@ void occ_sysfs_poll_done(struct occ *occ)
 		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
 	}
 
+	if (header->mode != occ->prev_mode) {
+		name = sensor_dev_attr_occ_mode.dev_attr.attr.name;
+		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+	}
+
 	if (occ->error && occ->error != occ->prev_error) {
 		name = dev_attr_occ_error.attr.name;
 		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
@@ -185,6 +195,7 @@ done:
 	occ->prev_ext_stat = header->ext_status;
 	occ->prev_occs_present = header->occs_present;
 	occ->prev_ips_status = header->ips_status;
+	occ->prev_mode = header->mode;
 }
 
 int occ_setup_sysfs(struct occ *occ)
-- 
GitLab


From a03d8969887325167d5fe47ead99d280bbbc6b06 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 15 Feb 2022 09:10:21 -0600
Subject: [PATCH 0767/1586] hwmon: (occ) Add sysfs entries for additional
 extended status bits

Add sysfs entries for DVFS due to a VRM Vdd over-temperature condition,
and add the GPU throttling condition bits (such that if bit 1 is set,
GPU1 is throttling).

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20220215151022.7498-4-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/sysfs.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c
index 88f655887c958..b2f788a777469 100644
--- a/drivers/hwmon/occ/sysfs.c
+++ b/drivers/hwmon/occ/sysfs.c
@@ -19,6 +19,8 @@
 #define OCC_EXT_STAT_DVFS_POWER		BIT(6)
 #define OCC_EXT_STAT_MEM_THROTTLE	BIT(5)
 #define OCC_EXT_STAT_QUICK_DROP		BIT(4)
+#define OCC_EXT_STAT_DVFS_VDD		BIT(3)
+#define OCC_EXT_STAT_GPU_THROTTLE	GENMASK(2, 0)
 
 static ssize_t occ_sysfs_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
@@ -69,6 +71,12 @@ static ssize_t occ_sysfs_show(struct device *dev,
 	case 9:
 		val = header->mode;
 		break;
+	case 10:
+		val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD);
+		break;
+	case 11:
+		val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -96,6 +104,8 @@ static SENSOR_DEVICE_ATTR(occ_state, 0444, occ_sysfs_show, NULL, 6);
 static SENSOR_DEVICE_ATTR(occs_present, 0444, occ_sysfs_show, NULL, 7);
 static SENSOR_DEVICE_ATTR(occ_ips_status, 0444, occ_sysfs_show, NULL, 8);
 static SENSOR_DEVICE_ATTR(occ_mode, 0444, occ_sysfs_show, NULL, 9);
+static SENSOR_DEVICE_ATTR(occ_dvfs_vdd, 0444, occ_sysfs_show, NULL, 10);
+static SENSOR_DEVICE_ATTR(occ_gpu_throttle, 0444, occ_sysfs_show, NULL, 11);
 static DEVICE_ATTR_RO(occ_error);
 
 static struct attribute *occ_attributes[] = {
@@ -109,6 +119,8 @@ static struct attribute *occ_attributes[] = {
 	&sensor_dev_attr_occs_present.dev_attr.attr,
 	&sensor_dev_attr_occ_ips_status.dev_attr.attr,
 	&sensor_dev_attr_occ_mode.dev_attr.attr,
+	&sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr,
+	&sensor_dev_attr_occ_gpu_throttle.dev_attr.attr,
 	&dev_attr_occ_error.attr,
 	NULL
 };
@@ -166,6 +178,18 @@ void occ_sysfs_poll_done(struct occ *occ)
 		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
 	}
 
+	if ((header->ext_status & OCC_EXT_STAT_DVFS_VDD) !=
+	    (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_VDD)) {
+		name = sensor_dev_attr_occ_dvfs_vdd.dev_attr.attr.name;
+		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+	}
+
+	if ((header->ext_status & OCC_EXT_STAT_GPU_THROTTLE) !=
+	    (occ->prev_ext_stat & OCC_EXT_STAT_GPU_THROTTLE)) {
+		name = sensor_dev_attr_occ_gpu_throttle.dev_attr.attr.name;
+		sysfs_notify(&occ->bus_dev->kobj, NULL, name);
+	}
+
 	if ((header->status & OCC_STAT_MASTER) &&
 	    header->occs_present != occ->prev_occs_present) {
 		name = sensor_dev_attr_occs_present.dev_attr.attr.name;
-- 
GitLab


From 7cd682b027ee1ed8bcf1e79e208c2b6b1a4d31b7 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 15 Feb 2022 20:11:13 +0100
Subject: [PATCH 0768/1586] hwmon: (dell-smm) Reword and mark parameter "force"
 as unsafe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When enabling said module parameter, the driver ignores
all feature blacklists on relevant models, which has the
potential for strange side effects. Also there seems to
be a slight chance for unsupported devices to behave
badly when probed for features.
In such cases, the kernel should be tainted to inform
people that these issues might have been caused by
the dell_smm_hwmon driver with "force" enabled.
Also reword the parameter description to remind users
that enabling "force" also enables blacklisted features.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20220215191113.16640-8-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 9949eeb79378e..1fd672610aeba 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -86,8 +86,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("i8k");
 
 static bool force;
-module_param(force, bool, 0);
-MODULE_PARM_DESC(force, "Force loading without checking for supported models");
+module_param_unsafe(force, bool, 0);
+MODULE_PARM_DESC(force, "Force loading without checking for supported models and features");
 
 static bool ignore_dmi;
 module_param(ignore_dmi, bool, 0);
-- 
GitLab


From ec3db1ecf63e02265b4e93ddb1ba5cc6858c3972 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 15 Feb 2022 20:11:12 +0100
Subject: [PATCH 0769/1586] hwmon: (dell-smm) Add SMM interface documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the SMM interface as requested by Pali Rohar.
Since Dell does not offer any offical documentation
regarding the SMM interface, the necessary information
was extracted from the dell_smm_hwmon driver and other
sources.

Suggested-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20220215191113.16640-7-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/dell-smm-hwmon.rst | 180 +++++++++++++++++++++++++
 1 file changed, 180 insertions(+)

diff --git a/Documentation/hwmon/dell-smm-hwmon.rst b/Documentation/hwmon/dell-smm-hwmon.rst
index beec884911715..d3323a96665d6 100644
--- a/Documentation/hwmon/dell-smm-hwmon.rst
+++ b/Documentation/hwmon/dell-smm-hwmon.rst
@@ -165,3 +165,183 @@ obtain the same information and to control the fan status. The ioctl
 interface can be accessed from C programs or from shell using the
 i8kctl utility. See the source file of ``i8kutils`` for more
 information on how to use the ioctl interface.
+
+SMM Interface
+-------------
+
+.. warning:: The SMM interface was reverse-engineered by trial-and-error
+             since Dell did not provide any Documentation,
+             please keep that in mind.
+
+The driver uses the SMM interface to send commands to the system BIOS.
+This interface is normally used by Dell's 32-bit diagnostic program or
+on newer notebook models by the buildin BIOS diagnostics.
+The SMM is triggered by writing to the special ioports ``0xb2`` and ``0x84``,
+and may cause short hangs when the BIOS code is taking too long to
+execute.
+
+The SMM handler inside the system BIOS looks at the contents of the
+``eax``, ``ebx``, ``ecx``, ``edx``, ``esi`` and ``edi`` registers.
+Each register has a special purpose:
+
+=============== ==================================
+Register        Purpose
+=============== ==================================
+eax             Holds the command code before SMM,
+                holds the first result after SMM.
+ebx             Holds the arguments.
+ecx             Unknown, set to 0.
+edx             Holds the second result after SMM.
+esi             Unknown, set to 0.
+edi             Unknown, set to 0.
+=============== ==================================
+
+The SMM handler can signal a failure by either:
+
+- setting the lower sixteen bits of ``eax`` to ``0xffff``
+- not modifying ``eax`` at all
+- setting the carry flag
+
+SMM command codes
+-----------------
+
+=============== ======================= ================================================
+Command Code    Command Name            Description
+=============== ======================= ================================================
+``0x0025``      Get Fn key status       Returns the Fn key pressed after SMM:
+
+                                        - 9th bit in ``eax`` indicates Volume up
+                                        - 10th bit in ``eax`` indicates Volume down
+                                        - both bits indicate Volume mute
+
+``0xa069``      Get power status        Returns current power status after SMM:
+
+                                        - 1st bit in ``eax`` indicates Battery connected
+                                        - 3th bit in ``eax`` indicates AC connected
+
+``0x00a3``      Get fan state           Returns current fan state after SMM:
+
+                                        - 1st byte in ``eax`` holds the current
+                                          fan state (0 - 2 or 3)
+
+``0x01a3``      Set fan state           Sets the fan speed:
+
+                                        - 1st byte in ``ebx`` holds the fan number
+                                        - 2nd byte in ``ebx`` holds the desired
+                                          fan state (0 - 2 or 3)
+
+``0x02a3``      Get fan speed           Returns the current fan speed in RPM:
+
+                                        - 1st byte in ``ebx`` holds the fan number
+                                        - 1st word in ``eax`` holds the current
+                                          fan speed in RPM (after SMM)
+
+``0x03a3``      Get fan type            Returns the fan type:
+
+                                        - 1st byte in ``ebx`` holds the fan number
+                                        - 1st byte in ``eax`` holds the
+                                          fan type (after SMM):
+
+                                          - 5th bit indicates docking fan
+                                          - 1 indicates Processor fan
+                                          - 2 indicates Motherboard fan
+                                          - 3 indicates Video fan
+                                          - 4 indicates Power supply fan
+                                          - 5 indicates Chipset fan
+                                          - 6 indicates other fan type
+
+``0x04a3``      Get nominal fan speed   Returns the nominal RPM in each fan state:
+
+                                        - 1st byte in ``ebx`` holds the fan number
+                                        - 2nd byte in ``ebx`` holds the fan state
+                                          in question (0 - 2 or 3)
+                                        - 1st word in ``eax`` holds the nominal
+                                          fan speed in RPM (after SMM)
+
+``0x05a3``      Get fan speed tolerance Returns the speed tolerance for each fan state:
+
+                                        - 1st byte in ``ebx`` holds the fan number
+                                        - 2nd byte in ``ebx`` holds the fan state
+                                          in question (0 - 2 or 3)
+                                        - 1st byte in ``eax`` returns the speed
+                                          tolerance
+
+``0x10a3``      Get sensor temperature  Returns the measured temperature:
+
+                                        - 1st byte in ``ebx`` holds the sensor number
+                                        - 1st byte in ``eax`` holds the measured
+                                          temperature (after SMM)
+
+``0x11a3``      Get sensor type         Returns the sensor type:
+
+                                        - 1st byte in ``ebx`` holds the sensor number
+                                        - 1st byte in ``eax`` holds the
+                                          temperature type (after SMM):
+
+                                          - 1 indicates CPU sensor
+                                          - 2 indicates GPU sensor
+                                          - 3 indicates SODIMM sensor
+                                          - 4 indicates other sensor type
+                                          - 5 indicates Ambient sensor
+                                          - 6 indicates other sensor type
+
+``0xfea3``      Get SMM signature       Returns Dell signature if interface
+                                        is supported (after SMM):
+
+                                        - ``eax`` holds 1145651527
+                                          (0x44494147 or "DIAG")
+                                        - ``edx`` holds 1145392204
+                                          (0x44454c4c or "DELL")
+
+``0xffa3``      Get SMM signature       Same as ``0xfea3``, check both.
+=============== ======================= ================================================
+
+There are additional commands for enabling (``0x31a3`` or ``0x35a3``) and
+disabling (``0x30a3`` or ``0x34a3``) automatic fan speed control.
+The commands are however causing severe sideeffects on many machines, so
+they are not used by default.
+
+On several machines (Inspiron 3505, Precision 490, Vostro 1720, ...), the
+fans supports a 4th "magic" state, which signals the BIOS that automatic
+fan control should be enabled for a specific fan.
+However there are also some machines who do support a 4th regular fan state too,
+but in case of the "magic" state, the nominal RPM reported for this state is a
+placeholder value, which however is not always detectable.
+
+Firmware Bugs
+-------------
+
+The SMM calls can behave erratic on some machines:
+
+======================================================= =================
+Firmware Bug                                            Affected Machines
+======================================================= =================
+Reading of fan states return spurious errors.           Precision 490
+
+Reading of fan types causes erratic fan behaviour.      Studio XPS 8000
+
+                                                        Studio XPS 8100
+
+                                                        Inspiron 580
+
+Fan-related SMM calls take too long (about 500ms).      Inspiron 7720
+
+                                                        Vostro 3360
+
+                                                        XPS 13 9333
+
+                                                        XPS 15 L502X
+======================================================= =================
+
+In case you experience similar issues on your Dell machine, please
+submit a bugreport on bugzilla to we can apply workarounds.
+
+Limitations
+-----------
+
+The SMM calls can take too long to execute on some machines, causing
+short hangs and/or audio glitches.
+Also the fan state needs to be restored after suspend, as well as
+the automatic mode settings.
+When reading a temperature sensor, values above 127 degrees indicate
+a BIOS read error or a deactivated sensor.
-- 
GitLab


From 4d9983dee5d4839008f7d1a75e866bccaa5877e8 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 15 Feb 2022 20:11:09 +0100
Subject: [PATCH 0770/1586] hwmon: (dell-smm) Make fan/temp sensor number a u8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, we only use bits 0 to 7 of the fan/temp sensor number
by doing number & 0xff. Passing the value as a u8 makes this
step unnecessary. Also add checks to the ioctl handler since
users might get confused when passing 0x00000101 does the same
as passing 0x00000001.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20220215191113.16640-4-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 68 ++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 1fd672610aeba..1cde005ff4e46 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -21,6 +21,7 @@
 #include <linux/errno.h>
 #include <linux/hwmon.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
@@ -250,46 +251,52 @@ static int i8k_smm(struct smm_regs *regs)
 /*
  * Read the fan status.
  */
-static int i8k_get_fan_status(const struct dell_smm_data *data, int fan)
+static int i8k_get_fan_status(const struct dell_smm_data *data, u8 fan)
 {
-	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, };
+	struct smm_regs regs = {
+		.eax = I8K_SMM_GET_FAN,
+		.ebx = fan,
+	};
 
 	if (data->disallow_fan_support)
 		return -EINVAL;
 
-	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
 /*
  * Read the fan speed in RPM.
  */
-static int i8k_get_fan_speed(const struct dell_smm_data *data, int fan)
+static int i8k_get_fan_speed(const struct dell_smm_data *data, u8 fan)
 {
-	struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, };
+	struct smm_regs regs = {
+		.eax = I8K_SMM_GET_SPEED,
+		.ebx = fan,
+	};
 
 	if (data->disallow_fan_support)
 		return -EINVAL;
 
-	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
 }
 
 /*
  * Read the fan type.
  */
-static int _i8k_get_fan_type(const struct dell_smm_data *data, int fan)
+static int _i8k_get_fan_type(const struct dell_smm_data *data, u8 fan)
 {
-	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
+	struct smm_regs regs = {
+		.eax = I8K_SMM_GET_FAN_TYPE,
+		.ebx = fan,
+	};
 
 	if (data->disallow_fan_support || data->disallow_fan_type_call)
 		return -EINVAL;
 
-	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
-static int i8k_get_fan_type(struct dell_smm_data *data, int fan)
+static int i8k_get_fan_type(struct dell_smm_data *data, u8 fan)
 {
 	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
 	if (data->fan_type[fan] == INT_MIN)
@@ -301,14 +308,16 @@ static int i8k_get_fan_type(struct dell_smm_data *data, int fan)
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
-static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, int fan, int speed)
+static int __init i8k_get_fan_nominal_speed(const struct dell_smm_data *data, u8 fan, int speed)
 {
-	struct smm_regs regs = { .eax = I8K_SMM_GET_NOM_SPEED, };
+	struct smm_regs regs = {
+		.eax = I8K_SMM_GET_NOM_SPEED,
+		.ebx = fan | (speed << 8),
+	};
 
 	if (data->disallow_fan_support)
 		return -EINVAL;
 
-	regs.ebx = (fan & 0xff) | (speed << 8);
 	return i8k_smm(&regs) ? : (regs.eax & 0xffff) * data->i8k_fan_mult;
 }
 
@@ -329,7 +338,7 @@ static int i8k_enable_fan_auto_mode(const struct dell_smm_data *data, bool enabl
 /*
  * Set the fan speed (off, low, high, ...).
  */
-static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
+static int i8k_set_fan(const struct dell_smm_data *data, u8 fan, int speed)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, };
 
@@ -337,33 +346,35 @@ static int i8k_set_fan(const struct dell_smm_data *data, int fan, int speed)
 		return -EINVAL;
 
 	speed = (speed < 0) ? 0 : ((speed > data->i8k_fan_max) ? data->i8k_fan_max : speed);
-	regs.ebx = (fan & 0xff) | (speed << 8);
+	regs.ebx = fan | (speed << 8);
 
 	return i8k_smm(&regs);
 }
 
-static int __init i8k_get_temp_type(int sensor)
+static int __init i8k_get_temp_type(u8 sensor)
 {
-	struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP_TYPE, };
+	struct smm_regs regs = {
+		.eax = I8K_SMM_GET_TEMP_TYPE,
+		.ebx = sensor,
+	};
 
-	regs.ebx = sensor & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
 /*
  * Read the cpu temperature.
  */
-static int _i8k_get_temp(int sensor)
+static int _i8k_get_temp(u8 sensor)
 {
 	struct smm_regs regs = {
 		.eax = I8K_SMM_GET_TEMP,
-		.ebx = sensor & 0xff,
+		.ebx = sensor,
 	};
 
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
-static int i8k_get_temp(int sensor)
+static int i8k_get_temp(u8 sensor)
 {
 	int temp = _i8k_get_temp(sensor);
 
@@ -496,6 +507,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&val, argp, sizeof(int)))
 			return -EFAULT;
 
+		if (val > U8_MAX || val < 0)
+			return -EINVAL;
+
 		val = i8k_get_fan_speed(data, val);
 		break;
 
@@ -503,6 +517,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&val, argp, sizeof(int)))
 			return -EFAULT;
 
+		if (val > U8_MAX || val < 0)
+			return -EINVAL;
+
 		val = i8k_get_fan_status(data, val);
 		break;
 
@@ -513,6 +530,9 @@ static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&val, argp, sizeof(int)))
 			return -EFAULT;
 
+		if (val > U8_MAX || val < 0)
+			return -EINVAL;
+
 		if (copy_from_user(&speed, argp + 1, sizeof(int)))
 			return -EFAULT;
 
@@ -920,7 +940,8 @@ static int __init dell_smm_init_hwmon(struct device *dev)
 {
 	struct dell_smm_data *data = dev_get_drvdata(dev);
 	struct device *dell_smm_hwmon_dev;
-	int i, state, err;
+	int state, err;
+	u8 i;
 
 	for (i = 0; i < DELL_SMM_NO_TEMP; i++) {
 		data->temp_type[i] = i8k_get_temp_type(i);
@@ -1236,7 +1257,8 @@ static int __init dell_smm_probe(struct platform_device *pdev)
 {
 	struct dell_smm_data *data;
 	const struct dmi_system_id *id, *fan_control;
-	int fan, ret;
+	int ret;
+	u8 fan;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(struct dell_smm_data), GFP_KERNEL);
 	if (!data)
-- 
GitLab


From c82fdd42fb50d281f6c7268e136178f096af6c69 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 15 Feb 2022 20:11:10 +0100
Subject: [PATCH 0771/1586] hwmon: (dell-smm) Improve temperature sensors
 detection

On the Dell Inspiron 3505, three temperature sensors are
available through the SMM interface. However since they
do not have an associated type, they are not detected.
Probe for those sensors in case no type was detected.
_i8k_get_temp() is used instead of i8k_get_temp()
since it is sometimes faster and the result is
easier to check (no -ENODATA) since we do not
care about the actual temp value.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220215191113.16640-5-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 1cde005ff4e46..c5939e68586d0 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -651,6 +651,11 @@ static umode_t dell_smm_is_visible(const void *drvdata, enum hwmon_sensor_types
 	case hwmon_temp:
 		switch (attr) {
 		case hwmon_temp_input:
+			/* _i8k_get_temp() is fine since we do not care about the actual value */
+			if (data->temp_type[channel] >= 0 || _i8k_get_temp(channel) >= 0)
+				return 0444;
+
+			break;
 		case hwmon_temp_label:
 			if (data->temp_type[channel] >= 0)
 				return 0444;
-- 
GitLab


From 54cc3dbfc10dc3db7cb1cf49aee4477a8398fbde Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 21 Feb 2022 12:09:56 +0100
Subject: [PATCH 0772/1586] hwmon: (pmbus) Add regulator supply into macro

Add regulator supply into PWBUS_REGULATOR macro. This makes it optional
to define a vin-supply in DT. Not defining a supply will add a dummy
regulator supply instead and only cause the following debug output:

```
Looking up vin-supply property in node [...] failed
```

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/58f2ff7b90233fad3d7ae2e9d66d5192e2c1ac01.1645437439.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index e0aa8aa46d8c4..38f049d68d329 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -464,6 +464,7 @@ extern const struct regulator_ops pmbus_regulator_ops;
 #define PMBUS_REGULATOR(_name, _id)				\
 	[_id] = {						\
 		.name = (_name # _id),				\
+		.supply_name = "vin",				\
 		.id = (_id),					\
 		.of_match = of_match_ptr(_name # _id),		\
 		.regulators_node = of_match_ptr("regulators"),	\
-- 
GitLab


From 84dc9e8a7eec2cdff00728baedf0fb35fc7c11e8 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 15 Feb 2022 09:10:22 -0600
Subject: [PATCH 0773/1586] hwmon: (occ) Add soft minimum power cap attribute

Export the power caps data for the soft minimum power cap through hwmon.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20220215151022.7498-5-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index 0cb4a0a6cbc10..f00cd59f1d19f 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -674,6 +674,9 @@ static ssize_t occ_show_caps_3(struct device *dev,
 	case 7:
 		val = caps->user_source;
 		break;
+	case 8:
+		val = get_unaligned_be16(&caps->soft_min) * 1000000ULL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -835,12 +838,13 @@ static int occ_setup_sensor_attrs(struct occ *occ)
 	case 1:
 		num_attrs += (sensors->caps.num_sensors * 7);
 		break;
-	case 3:
-		show_caps = occ_show_caps_3;
-		fallthrough;
 	case 2:
 		num_attrs += (sensors->caps.num_sensors * 8);
 		break;
+	case 3:
+		show_caps = occ_show_caps_3;
+		num_attrs += (sensors->caps.num_sensors * 9);
+		break;
 	default:
 		sensors->caps.num_sensors = 0;
 	}
@@ -1047,6 +1051,15 @@ static int occ_setup_sensor_attrs(struct occ *occ)
 			attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
 						     show_caps, NULL, 7, 0);
 			attr++;
+
+			if (sensors->caps.version > 2) {
+				snprintf(attr->name, sizeof(attr->name),
+					 "power%d_cap_min_soft", s);
+				attr->sensor = OCC_INIT_ATTR(attr->name, 0444,
+							     show_caps, NULL,
+							     8, 0);
+				attr++;
+			}
 		}
 	}
 
-- 
GitLab


From 4b1dd41cf211c2c1b93cab77aebbb0a01157d3ff Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 21 Feb 2022 10:42:04 +0100
Subject: [PATCH 0774/1586] dt-bindings: vendor-prefixes: add Vicor Corporation

Add vendor prefix for Vicor Corporation.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/5b487a0b68d58b64022662f1e527ac80a6ad81ef.1645435888.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 294093d45a230..047a83a089cee 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1298,6 +1298,8 @@ patternProperties:
     description: Vertexcom Technologies, Inc.
   "^via,.*":
     description: VIA Technologies, Inc.
+  "^vicor,.*":
+    description: Vicor Corporation
   "^videostrong,.*":
     description: Videostrong Technology Co., Ltd.
   "^virtio,.*":
-- 
GitLab


From b7b94f15e55223e75b588863075a10ae79445db4 Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 21 Feb 2022 10:42:05 +0100
Subject: [PATCH 0775/1586] dt-bindings:trivial-devices: Add pli1209bc

Add trivial device entry for PLI1209BC Digital Supervisor from Vicor
Corporation.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/15a9fcfb5b9592c6d87f12c2a4c77fd069f5cfff.1645435888.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 091792ba993e8..d03d90360aa09 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -354,6 +354,8 @@ properties:
           - ti,tps544c25
             # Winbond/Nuvoton H/W Monitor
           - winbond,w83793
+            # Vicor Corporation Digital Supervisor
+          - vicor,pli1209bc
             # i2c trusted platform module (TPM)
           - winbond,wpct301
 
-- 
GitLab


From d0cd978513f2e37c353ec2dcfbd863f97edb1dcd Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 21 Feb 2022 10:42:06 +0100
Subject: [PATCH 0776/1586] hwmon: (pmbus) Add support for pli1209bc

PLI1209BC is a Digital Supervisor from Vicor Corporation.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/4e016e66275bc46c90974aec18b150c874e64787.1645435888.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst     |   1 +
 Documentation/hwmon/pli1209bc.rst |  75 +++++++++++++++++++
 drivers/hwmon/pmbus/Kconfig       |   9 +++
 drivers/hwmon/pmbus/Makefile      |   1 +
 drivers/hwmon/pmbus/pli1209bc.c   | 115 ++++++++++++++++++++++++++++++
 5 files changed, 201 insertions(+)
 create mode 100644 Documentation/hwmon/pli1209bc.rst
 create mode 100644 drivers/hwmon/pmbus/pli1209bc.c

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index b69fdaf1af82e..075d1defcd204 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -161,6 +161,7 @@ Hardware Monitoring Kernel Drivers
    pc87427
    pcf8591
    pim4328
+   pli1209bc
    pm6764tr
    pmbus
    powr1220
diff --git a/Documentation/hwmon/pli1209bc.rst b/Documentation/hwmon/pli1209bc.rst
new file mode 100644
index 0000000000000..ea5b3f68a5154
--- /dev/null
+++ b/Documentation/hwmon/pli1209bc.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver pli1209bc
+=======================
+
+Supported chips:
+
+  * Digital Supervisor PLI1209BC
+
+    Prefix: 'pli1209bc'
+
+    Addresses scanned: 0x50 - 0x5F
+
+    Datasheet: https://www.vicorpower.com/documents/datasheets/ds-PLI1209BCxyzz-VICOR.pdf
+
+Authors:
+    - Marcello Sylvester Bauer <sylv@sylv.io>
+
+Description
+-----------
+
+The Vicor PLI1209BC is an isolated digital power system supervisor that provides
+a communication interface between a host processor and one Bus Converter Module
+(BCM). The PLI communicates with a system controller via a PMBus compatible
+interface over an isolated UART interface. Through the PLI, the host processor
+can configure, set protection limits, and monitor the BCM.
+
+Sysfs entries
+-------------
+
+======================= ========================================================
+in1_label		"vin2"
+in1_input		Input voltage.
+in1_rated_min		Minimum rated input voltage.
+in1_rated_max		Maximum rated input voltage.
+in1_max			Maximum input voltage.
+in1_max_alarm		Input voltage high alarm.
+in1_crit		Critical input voltage.
+in1_crit_alarm		Input voltage critical alarm.
+
+in2_label		"vout2"
+in2_input		Output voltage.
+in2_rated_min		Minimum rated output voltage.
+in2_rated_max		Maximum rated output voltage.
+in2_alarm		Output voltage alarm
+
+curr1_label		"iin2"
+curr1_input		Input current.
+curr1_max		Maximum input current.
+curr1_max_alarm		Maximum input current high alarm.
+curr1_crit		Critical input current.
+curr1_crit_alarm	Input current critical alarm.
+
+curr2_label		"iout2"
+curr2_input		Output current.
+curr2_crit		Critical output current.
+curr2_crit_alarm	Output current critical alarm.
+curr2_max		Maximum output current.
+curr2_max_alarm		Output current high alarm.
+
+power1_label		"pin2"
+power1_input		Input power.
+power1_alarm		Input power alarm.
+
+power2_label		"pout2"
+power2_input		Output power.
+power2_rated_max	Maximum rated output power.
+
+temp1_input		Die temperature.
+temp1_alarm		Die temperature alarm.
+temp1_max		Maximum die temperature.
+temp1_max_alarm		Die temperature high alarm.
+temp1_crit		Critical die temperature.
+temp1_crit_alarm	Die temperature critical alarm.
+======================= ========================================================
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index c73aa50c76157..f18f67a946974 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -317,6 +317,15 @@ config SENSORS_PIM4328
 	  This driver can also be built as a module. If so, the module will
 	  be called pim4328.
 
+config SENSORS_PLI1209BC
+	tristate "Vicor PLI1209BC"
+	help
+	  If you say yes here you get hardware monitoring support for Vicor
+	  PLI1209BC Digital Supervisor.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called pli1209bc.
+
 config SENSORS_PM6764TR
 	tristate "ST PM6764TR"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index e5935f70c9e01..a4a96ac71de79 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_SENSORS_MAX8688)	+= max8688.o
 obj-$(CONFIG_SENSORS_MP2888)	+= mp2888.o
 obj-$(CONFIG_SENSORS_MP2975)	+= mp2975.o
 obj-$(CONFIG_SENSORS_MP5023)	+= mp5023.o
+obj-$(CONFIG_SENSORS_PLI1209BC)	+= pli1209bc.o
 obj-$(CONFIG_SENSORS_PM6764TR)	+= pm6764tr.o
 obj-$(CONFIG_SENSORS_PXE1610)	+= pxe1610.o
 obj-$(CONFIG_SENSORS_Q54SJ108A2)	+= q54sj108a2.o
diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c
new file mode 100644
index 0000000000000..5f8847307e559
--- /dev/null
+++ b/drivers/hwmon/pmbus/pli1209bc.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Hardware monitoring driver for Vicor PLI1209BC Digital Supervisor
+ *
+ * Copyright (c) 2022 9elements GmbH
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/pmbus.h>
+#include "pmbus.h"
+
+/*
+ * The capability command is only supported at page 0. Probing the device while
+ * the page register is set to 1 will falsely enable PEC support. Disable
+ * capability probing accordingly, since the PLI1209BC does not have any
+ * additional capabilities.
+ */
+static struct pmbus_platform_data pli1209bc_plat_data = {
+	.flags = PMBUS_NO_CAPABILITY,
+};
+
+static int pli1209bc_read_word_data(struct i2c_client *client, int page,
+				    int phase, int reg)
+{
+	int data;
+
+	switch (reg) {
+	/* PMBUS_READ_POUT uses a direct format with R=0 */
+	case PMBUS_READ_POUT:
+		data = pmbus_read_word_data(client, page, phase, reg);
+		if (data < 0)
+			return data;
+		data = sign_extend32(data, 15) * 10;
+		return clamp_val(data, -32768, 32767) & 0xffff;
+	default:
+		return -ENODATA;
+	}
+}
+
+static struct pmbus_driver_info pli1209bc_info = {
+	.pages = 2,
+	.format[PSC_VOLTAGE_IN] = direct,
+	.format[PSC_VOLTAGE_OUT] = direct,
+	.format[PSC_CURRENT_IN] = direct,
+	.format[PSC_CURRENT_OUT] = direct,
+	.format[PSC_POWER] = direct,
+	.format[PSC_TEMPERATURE] = direct,
+	.m[PSC_VOLTAGE_IN] = 1,
+	.b[PSC_VOLTAGE_IN] = 0,
+	.R[PSC_VOLTAGE_IN] = 1,
+	.m[PSC_VOLTAGE_OUT] = 1,
+	.b[PSC_VOLTAGE_OUT] = 0,
+	.R[PSC_VOLTAGE_OUT] = 1,
+	.m[PSC_CURRENT_IN] = 1,
+	.b[PSC_CURRENT_IN] = 0,
+	.R[PSC_CURRENT_IN] = 3,
+	.m[PSC_CURRENT_OUT] = 1,
+	.b[PSC_CURRENT_OUT] = 0,
+	.R[PSC_CURRENT_OUT] = 2,
+	.m[PSC_POWER] = 1,
+	.b[PSC_POWER] = 0,
+	.R[PSC_POWER] = 1,
+	.m[PSC_TEMPERATURE] = 1,
+	.b[PSC_TEMPERATURE] = 0,
+	.R[PSC_TEMPERATURE] = 0,
+	/*
+	 * Page 0 sums up all attributes except voltage readings.
+	 * The pli1209 digital supervisor only contains a single BCM, making
+	 * page 0 redundant.
+	 */
+	.func[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT
+	    | PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT
+	    | PMBUS_HAVE_PIN | PMBUS_HAVE_POUT
+	    | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
+	    | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT,
+	.read_word_data = pli1209bc_read_word_data,
+};
+
+static int pli1209bc_probe(struct i2c_client *client)
+{
+	client->dev.platform_data = &pli1209bc_plat_data;
+	return pmbus_do_probe(client, &pli1209bc_info);
+}
+
+static const struct i2c_device_id pli1209bc_id[] = {
+	{"pli1209bc", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, pli1209bc_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pli1209bc_of_match[] = {
+	{ .compatible = "vicor,pli1209bc" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, pli1209bc_of_match);
+#endif
+
+static struct i2c_driver pli1209bc_driver = {
+	.driver = {
+		   .name = "pli1209bc",
+		   .of_match_table = of_match_ptr(pli1209bc_of_match),
+		   },
+	.probe_new = pli1209bc_probe,
+	.id_table = pli1209bc_id,
+};
+
+module_i2c_driver(pli1209bc_driver);
+
+MODULE_AUTHOR("Marcello Sylvester Bauer <sylv@sylv.io>");
+MODULE_DESCRIPTION("PMBus driver for Vicor PLI1209BC");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
-- 
GitLab


From ba1d263af1c034baf479bca14d6f715b713214b1 Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Mon, 21 Feb 2022 10:42:07 +0100
Subject: [PATCH 0777/1586] hwmon: (pmbus/pli1209bc) Add regulator support

Add regulator support for PLI1209BC Digital Supervisor.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/21b0cdb6dd72654effa451d3b1636ecd07b160e9.1645435888.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig     |  7 +++++++
 drivers/hwmon/pmbus/pli1209bc.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index f18f67a946974..6552467c588d1 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -326,6 +326,13 @@ config SENSORS_PLI1209BC
 	  This driver can also be built as a module. If so, the module will
 	  be called pli1209bc.
 
+config SENSORS_PLI1209BC_REGULATOR
+	bool "Regulator support for PLI1209BC"
+	depends on SENSORS_PLI1209BC && REGULATOR
+	help
+	  If you say yes here you get regulator support for Vicor PLI1209BC
+	  Digital Supervisor.
+
 config SENSORS_PM6764TR
 	tristate "ST PM6764TR"
 	help
diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c
index 5f8847307e559..05b4ee35ba270 100644
--- a/drivers/hwmon/pmbus/pli1209bc.c
+++ b/drivers/hwmon/pmbus/pli1209bc.c
@@ -8,6 +8,7 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/pmbus.h>
+#include <linux/regulator/driver.h>
 #include "pmbus.h"
 
 /*
@@ -33,11 +34,37 @@ static int pli1209bc_read_word_data(struct i2c_client *client, int page,
 			return data;
 		data = sign_extend32(data, 15) * 10;
 		return clamp_val(data, -32768, 32767) & 0xffff;
+	/*
+	 * PMBUS_READ_VOUT and PMBUS_READ_TEMPERATURE_1 return invalid data
+	 * when the BCM is turned off. Since it is not possible to return
+	 * ENODATA error, return zero instead.
+	 */
+	case PMBUS_READ_VOUT:
+	case PMBUS_READ_TEMPERATURE_1:
+		data = pmbus_read_word_data(client, page, phase,
+					    PMBUS_STATUS_WORD);
+		if (data < 0)
+			return data;
+		if (data & PB_STATUS_POWER_GOOD_N)
+			return 0;
+		return pmbus_read_word_data(client, page, phase, reg);
 	default:
 		return -ENODATA;
 	}
 }
 
+#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
+static const struct regulator_desc pli1209bc_reg_desc = {
+	.name = "vout2",
+	.id = 1,
+	.of_match = of_match_ptr("vout2"),
+	.regulators_node = of_match_ptr("regulators"),
+	.ops = &pmbus_regulator_ops,
+	.type = REGULATOR_VOLTAGE,
+	.owner = THIS_MODULE,
+};
+#endif
+
 static struct pmbus_driver_info pli1209bc_info = {
 	.pages = 2,
 	.format[PSC_VOLTAGE_IN] = direct,
@@ -75,6 +102,10 @@ static struct pmbus_driver_info pli1209bc_info = {
 	    | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
 	    | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT,
 	.read_word_data = pli1209bc_read_word_data,
+#if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
+	.num_regulators = 1,
+	.reg_desc = &pli1209bc_reg_desc,
+#endif
 };
 
 static int pli1209bc_probe(struct i2c_client *client)
-- 
GitLab


From 64b631fb0c6f7e5fbbe0d641556e07e7a8a272ef Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Fri, 18 Feb 2022 10:06:42 +0100
Subject: [PATCH 0778/1586] dt-bindings: Add ti,tmp125 temperature sensor
 binding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From the freely available Texas Instruments' TMP125 datasheet:

"The TMP125 is an SPI-compatible temperature sensor available in the
tiny SOT23-6 package. Requiring no external components, the TMP125
is capable of measuring temperatures within 2 degree C of accuracy
over a temperature range of −25 degree C to +85 degree C and
2.5 degree C of accuracy over −40 degree C to +125 degree C."

The TMP125 is very similar to the TMP121/TMP122 series of familiar
chips.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/d3538ba9beededfe3a9ad5dab4903a6a01834822.1645175187.git.chunkeey@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index d03d90360aa09..1c49bbc654eed 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -337,6 +337,7 @@ properties:
             # Thermometer with SPI interface
           - ti,tmp121
           - ti,tmp122
+          - ti,tmp125
             # Digital Temperature Sensor
           - ti,tmp275
             # TI DC-DC converter on PMBus
-- 
GitLab


From cd929672a9ef644aca12de59a75de5f061d5983d Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Fri, 18 Feb 2022 10:06:43 +0100
Subject: [PATCH 0779/1586] hwmon: (lm70) Add ti,tmp125 support

The TMP125 is a 2 degree Celsius accurate Digital
Temperature Sensor with a SPI interface.

The temperature register is a 16-bit, read-only register.
The MSB (Bit 15) is a leading zero and never set. Bits 14
to 5 are the 1+9 temperature data bits in a two's
complement format. Bits 4 to 0 are useless copies of
Bit 5 value and therefore ignored.

This was tested on a Aerohive HiveAP-350.

Bonus: lm70 supports TMP122/TMP124 as well.
I added them to the Kconfig module description.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Link: https://lore.kernel.org/r/43b19cbd4e7f51e9509e561b02b5d8d0e7079fac.1645175187.git.chunkeey@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/lm70.rst |  7 +++++++
 drivers/hwmon/Kconfig        |  4 ++--
 drivers/hwmon/lm70.c         | 16 ++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/lm70.rst b/Documentation/hwmon/lm70.rst
index 6ddc5b67ccb54..11303a7e16a80 100644
--- a/Documentation/hwmon/lm70.rst
+++ b/Documentation/hwmon/lm70.rst
@@ -15,6 +15,10 @@ Supported chips:
 
     Information: https://www.ti.com/product/tmp122
 
+  * Texas Instruments TMP125
+
+    Information: https://www.ti.com/product/tmp125
+
   * National Semiconductor LM71
 
     Datasheet: https://www.ti.com/product/LM71
@@ -53,6 +57,9 @@ The LM74 and TMP121/TMP122/TMP123/TMP124 are very similar; main difference is
 
 The TMP122/TMP124 also feature configurable temperature thresholds.
 
+The TMP125 is less accurate and provides 10-bit temperature data
+with 0.25 degrees Celsius resolution.
+
 The LM71 is also very similar; main difference is 14-bit temperature
 data (0.03125 degrees celsius resolution).
 
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index b3597ba66ddb9..463d5b110a85f 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1224,8 +1224,8 @@ config SENSORS_LM70
 	depends on SPI_MASTER
 	help
 	  If you say yes here you get support for the National Semiconductor
-	  LM70, LM71, LM74 and Texas Instruments TMP121/TMP123 digital tempera-
-	  ture sensor chips.
+	  LM70, LM71, LM74 and Texas Instruments TMP121/TMP123, TMP122/TMP124,
+	  TMP125 digital temperature sensor chips.
 
 	  This driver can also be built as a module. If so, the module
 	  will be called lm70.
diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index d2a60de5b8de9..c20a749fc7f21 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -34,6 +34,7 @@
 #define LM70_CHIP_LM71		2	/* NS LM71 */
 #define LM70_CHIP_LM74		3	/* NS LM74 */
 #define LM70_CHIP_TMP122	4	/* TI TMP122/TMP124 */
+#define LM70_CHIP_TMP125	5	/* TI TMP125 */
 
 struct lm70 {
 	struct spi_device *spi;
@@ -87,6 +88,12 @@ static ssize_t temp1_input_show(struct device *dev,
 	 * LM71:
 	 * 14 bits of 2's complement data, discard LSB 2 bits,
 	 * resolution 0.0312 degrees celsius.
+	 *
+	 * TMP125:
+	 * MSB/D15 is a leading zero. D14 is the sign-bit. This is
+	 * followed by 9 temperature bits (D13..D5) in 2's complement
+	 * data format with a resolution of 0.25 degrees celsius per unit.
+	 * LSB 5 bits (D4..D0) share the same value as D5 and get discarded.
 	 */
 	switch (p_lm70->chip) {
 	case LM70_CHIP_LM70:
@@ -102,6 +109,10 @@ static ssize_t temp1_input_show(struct device *dev,
 	case LM70_CHIP_LM71:
 		val = ((int)raw / 4) * 3125 / 100;
 		break;
+
+	case LM70_CHIP_TMP125:
+		val = (sign_extend32(raw, 14) / 32) * 250;
+		break;
 	}
 
 	status = sprintf(buf, "%d\n", val); /* millidegrees Celsius */
@@ -135,6 +146,10 @@ static const struct of_device_id lm70_of_ids[] = {
 		.compatible = "ti,tmp122",
 		.data = (void *) LM70_CHIP_TMP122,
 	},
+	{
+		.compatible = "ti,tmp125",
+		.data = (void *) LM70_CHIP_TMP125,
+	},
 	{
 		.compatible = "ti,lm71",
 		.data = (void *) LM70_CHIP_LM71,
@@ -184,6 +199,7 @@ static const struct spi_device_id lm70_ids[] = {
 	{ "lm70",   LM70_CHIP_LM70 },
 	{ "tmp121", LM70_CHIP_TMP121 },
 	{ "tmp122", LM70_CHIP_TMP122 },
+	{ "tmp125", LM70_CHIP_TMP125 },
 	{ "lm71",   LM70_CHIP_LM71 },
 	{ "lm74",   LM70_CHIP_LM74 },
 	{ },
-- 
GitLab


From e75d16e58467c5703821e12536c7dc438f3c425d Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Thu, 24 Feb 2022 07:12:09 +0100
Subject: [PATCH 0780/1586] hwmon: (core) Add support for pwm auto channels
 attribute

pwm[1-*]_auto_channels_temp is documented as an official
hwmon sysfs attribute, yet there is no support for it in
the new with_info-API. Fix that.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220224061210.16452-2-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 1 +
 include/linux/hwmon.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 0d6c6809f26cc..989e2c8496dd2 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -604,6 +604,7 @@ static const char * const hwmon_pwm_attr_templates[] = {
 	[hwmon_pwm_enable] = "pwm%d_enable",
 	[hwmon_pwm_mode] = "pwm%d_mode",
 	[hwmon_pwm_freq] = "pwm%d_freq",
+	[hwmon_pwm_auto_channels_temp] = "pwm%d_auto_channels_temp",
 };
 
 static const char * const hwmon_intrusion_attr_templates[] = {
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index fad1f1df26df7..eba380b76d157 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -332,12 +332,14 @@ enum hwmon_pwm_attributes {
 	hwmon_pwm_enable,
 	hwmon_pwm_mode,
 	hwmon_pwm_freq,
+	hwmon_pwm_auto_channels_temp,
 };
 
 #define HWMON_PWM_INPUT			BIT(hwmon_pwm_input)
 #define HWMON_PWM_ENABLE		BIT(hwmon_pwm_enable)
 #define HWMON_PWM_MODE			BIT(hwmon_pwm_mode)
 #define HWMON_PWM_FREQ			BIT(hwmon_pwm_freq)
+#define HWMON_PWM_AUTO_CHANNELS_TEMP	BIT(hwmon_pwm_auto_channels_temp)
 
 enum hwmon_intrusion_attributes {
 	hwmon_intrusion_alarm,
-- 
GitLab


From aa9f833dfc128169a1162261c5641aa516b4a231 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Thu, 24 Feb 2022 07:12:10 +0100
Subject: [PATCH 0781/1586] hwmon: (sch5627) Add pwmX_auto_channels_temp
 support

After doing some research, it seems that Fujitsu's
hardware monitoring solution exports data describing
which temperature sensors affect which fans, similar
to the data in fan_source of the ftsteutates driver.
Writing 0 into these registers forces the fans to
full speed.
Export this data with standard attributes.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220224061210.16452-3-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/sch5627.rst |  4 +++
 drivers/hwmon/sch5627.c         | 61 +++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/Documentation/hwmon/sch5627.rst b/Documentation/hwmon/sch5627.rst
index 187682e99114f..ecb4fc84d0451 100644
--- a/Documentation/hwmon/sch5627.rst
+++ b/Documentation/hwmon/sch5627.rst
@@ -20,6 +20,10 @@ Description
 SMSC SCH5627 Super I/O chips include complete hardware monitoring
 capabilities. They can monitor up to 5 voltages, 4 fans and 8 temperatures.
 
+In addition, the SCH5627 exports data describing which temperature sensors
+affect the speed of each fan. Setting pwmX_auto_channels_temp to 0 forces
+the corresponding fan to full speed until another value is written.
+
 The SMSC SCH5627 hardware monitoring part also contains an integrated
 watchdog. In order for this watchdog to function some motherboard specific
 initialization most be done by the BIOS, so if the watchdog is not enabled
diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c
index 72c3f6757e348..25fbbd4c9a2b3 100644
--- a/drivers/hwmon/sch5627.c
+++ b/drivers/hwmon/sch5627.c
@@ -52,6 +52,9 @@ static const u16 SCH5627_REG_FAN[SCH5627_NO_FANS] = {
 static const u16 SCH5627_REG_FAN_MIN[SCH5627_NO_FANS] = {
 	0x62, 0x64, 0x66, 0x68 };
 
+static const u16 SCH5627_REG_PWM_MAP[SCH5627_NO_FANS] = {
+	0xA0, 0xA1, 0xA2, 0xA3 };
+
 static const u16 SCH5627_REG_IN_MSB[SCH5627_NO_IN] = {
 	0x22, 0x23, 0x24, 0x25, 0x189 };
 static const u16 SCH5627_REG_IN_LSN[SCH5627_NO_IN] = {
@@ -223,6 +226,9 @@ static int reg_to_rpm(u16 reg)
 static umode_t sch5627_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr,
 				  int channel)
 {
+	if (type == hwmon_pwm && attr == hwmon_pwm_auto_channels_temp)
+		return 0644;
+
 	return 0444;
 }
 
@@ -278,6 +284,23 @@ static int sch5627_read(struct device *dev, enum hwmon_sensor_types type, u32 at
 			break;
 		}
 		break;
+	case hwmon_pwm:
+		switch (attr) {
+		case hwmon_pwm_auto_channels_temp:
+			mutex_lock(&data->update_lock);
+			ret = sch56xx_read_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel]);
+			mutex_unlock(&data->update_lock);
+
+			if (ret < 0)
+				return ret;
+
+			*val = ret;
+
+			return 0;
+		default:
+			break;
+		}
+		break;
 	case hwmon_in:
 		ret = sch5627_update_in(data);
 		if (ret < 0)
@@ -318,10 +341,42 @@ static int sch5627_read_string(struct device *dev, enum hwmon_sensor_types type,
 	return -EOPNOTSUPP;
 }
 
+static int sch5627_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
+			 long val)
+{
+	struct sch5627_data *data = dev_get_drvdata(dev);
+	int ret;
+
+	switch (type) {
+	case hwmon_pwm:
+		switch (attr) {
+		case hwmon_pwm_auto_channels_temp:
+			/* registers are 8 bit wide */
+			if (val > U8_MAX || val < 0)
+				return -EINVAL;
+
+			mutex_lock(&data->update_lock);
+			ret = sch56xx_write_virtual_reg(data->addr, SCH5627_REG_PWM_MAP[channel],
+							val);
+			mutex_unlock(&data->update_lock);
+
+			return ret;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static const struct hwmon_ops sch5627_ops = {
 	.is_visible = sch5627_is_visible,
 	.read = sch5627_read,
 	.read_string = sch5627_read_string,
+	.write = sch5627_write,
 };
 
 static const struct hwmon_channel_info *sch5627_info[] = {
@@ -342,6 +397,12 @@ static const struct hwmon_channel_info *sch5627_info[] = {
 			   HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT,
 			   HWMON_F_INPUT | HWMON_F_MIN | HWMON_F_FAULT
 			   ),
+	HWMON_CHANNEL_INFO(pwm,
+			   HWMON_PWM_AUTO_CHANNELS_TEMP,
+			   HWMON_PWM_AUTO_CHANNELS_TEMP,
+			   HWMON_PWM_AUTO_CHANNELS_TEMP,
+			   HWMON_PWM_AUTO_CHANNELS_TEMP
+			   ),
 	HWMON_CHANNEL_INFO(in,
 			   HWMON_I_INPUT | HWMON_I_LABEL,
 			   HWMON_I_INPUT | HWMON_I_LABEL,
-- 
GitLab


From 2fd3eec19c6e0a2c218853db9df27d4e74921673 Mon Sep 17 00:00:00 2001
From: Aleksa Savic <savicaleksa83@gmail.com>
Date: Sun, 27 Feb 2022 21:56:25 +0100
Subject: [PATCH 0782/1586] hwmon: (aquacomputer_d5next) Add support for
 Aquacomputer Farbwerk 360

Extend aquacomputer_d5next driver to expose hardware temperature sensors
of the Aquacomputer Farbwerk 360 RGB controller, which communicates through
a proprietary USB HID protocol.

Four temperature sensors are available. Additionally, serial number and
firmware version are exposed through debugfs.

This driver has been tested on x86_64.

Signed-off-by: Aleksa Savic <savicaleksa83@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/aquacomputer_d5next.rst |  49 ++-
 drivers/hwmon/aquacomputer_d5next.c         | 379 +++++++++++++-------
 2 files changed, 278 insertions(+), 150 deletions(-)

diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst
index 1f4bb4ba2e4bf..3373e27b707d6 100644
--- a/Documentation/hwmon/aquacomputer_d5next.rst
+++ b/Documentation/hwmon/aquacomputer_d5next.rst
@@ -6,22 +6,21 @@ Kernel driver aquacomputer-d5next
 Supported devices:
 
 * Aquacomputer D5 Next watercooling pump
+* Aquacomputer Farbwerk 360 RGB controller
 
 Author: Aleksa Savic
 
 Description
 -----------
 
-This driver exposes hardware sensors of the Aquacomputer D5 Next watercooling
-pump, which communicates through a proprietary USB HID protocol.
+This driver exposes hardware sensors of listed Aquacomputer devices, which
+communicate through proprietary USB HID protocols.
 
-Available sensors are pump and fan speed, power, voltage and current, as
-well as coolant temperature. Also available through debugfs are the serial
-number, firmware version and power-on count.
-
-Attaching a fan is optional and allows it to be controlled using temperature
-curves directly from the pump. If it's not connected, the fan-related sensors
-will report zeroes.
+For the D5 Next pump, available sensors are pump and fan speed, power, voltage
+and current, as well as coolant temperature. Also available through debugfs are
+the serial number, firmware version and power-on count. Attaching a fan to it is
+optional and allows it to be controlled using temperature curves directly from the
+pump. If it's not connected, the fan-related sensors will report zeroes.
 
 The pump can be configured either through software or via its physical
 interface. Configuring the pump through this driver is not implemented, as it
@@ -29,33 +28,31 @@ seems to require sending it a complete configuration. That includes addressable
 RGB LEDs, for which there is no standard sysfs interface. Thus, that task is
 better suited for userspace tools.
 
+The Farbwerk 360 exposes four temperature sensors. Depending on the device,
+not all sysfs and debugfs entries will be available.
+
 Usage notes
 -----------
 
-The pump communicates via HID reports. The driver is loaded automatically by
+The devices communicate via HID reports. The driver is loaded automatically by
 the kernel and supports hotswapping.
 
 Sysfs entries
 -------------
 
-============ =============================================
-temp1_input  Coolant temperature (in millidegrees Celsius)
-fan1_input   Pump speed (in RPM)
-fan2_input   Fan speed (in RPM)
-power1_input Pump power (in micro Watts)
-power2_input Fan power (in micro Watts)
-in0_input    Pump voltage (in milli Volts)
-in1_input    Fan voltage (in milli Volts)
-in2_input    +5V rail voltage (in milli Volts)
-curr1_input  Pump current (in milli Amperes)
-curr2_input  Fan current (in milli Amperes)
-============ =============================================
+================ =============================================
+temp[1-4]_input  Temperature sensors (in millidegrees Celsius)
+fan[1-2]_input   Pump/fan speed (in RPM)
+power[1-2]_input Pump/fan power (in micro Watts)
+in[0-2]_input    Pump/fan voltage (in milli Volts)
+curr[1-2]_input  Pump/fan current (in milli Amperes)
+================ =============================================
 
 Debugfs entries
 ---------------
 
-================ ===============================================
-serial_number    Serial number of the pump
+================ =================================================
+serial_number    Serial number of the device
 firmware_version Version of installed firmware
-power_cycles     Count of how many times the pump was powered on
-================ ===============================================
+power_cycles     Count of how many times the device was powered on
+================ =================================================
diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c
index fb9341a530511..525809cf7c952 100644
--- a/drivers/hwmon/aquacomputer_d5next.c
+++ b/drivers/hwmon/aquacomputer_d5next.c
@@ -1,32 +1,41 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * hwmon driver for Aquacomputer D5 Next watercooling pump
+ * hwmon driver for Aquacomputer devices (D5 Next, Farbwerk 360)
  *
- * The D5 Next sends HID reports (with ID 0x01) every second to report sensor values
- * (coolant temperature, pump and fan speed, voltage, current and power). It responds to
- * Get_Report requests, but returns a dummy value of no use.
+ * Aquacomputer devices send HID reports (with ID 0x01) every second to report
+ * sensor values.
  *
  * Copyright 2021 Aleksa Savic <savicaleksa83@gmail.com>
  */
 
-#include <asm/unaligned.h>
 #include <linux/debugfs.h>
 #include <linux/hid.h>
 #include <linux/hwmon.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <asm/unaligned.h>
 
-#define DRIVER_NAME			"aquacomputer-d5next"
+#define USB_VENDOR_ID_AQUACOMPUTER	0x0c70
+#define USB_PRODUCT_ID_D5NEXT		0xf00e
+#define USB_PRODUCT_ID_FARBWERK360	0xf010
 
-#define D5NEXT_STATUS_REPORT_ID	0x01
-#define D5NEXT_STATUS_UPDATE_INTERVAL	(2 * HZ) /* In seconds */
+enum kinds { d5next, farbwerk360 };
 
-/* Register offsets for the D5 Next pump */
+static const char *const aqc_device_names[] = {
+	[d5next] = "d5next",
+	[farbwerk360] = "farbwerk360"
+};
 
-#define D5NEXT_SERIAL_FIRST_PART	3
-#define D5NEXT_SERIAL_SECOND_PART	5
-#define D5NEXT_FIRMWARE_VERSION	13
+#define DRIVER_NAME			"aquacomputer_d5next"
+
+#define STATUS_REPORT_ID		0x01
+#define STATUS_UPDATE_INTERVAL		(2 * HZ)	/* In seconds */
+#define SERIAL_FIRST_PART		3
+#define SERIAL_SECOND_PART		5
+#define FIRMWARE_VERSION		13
+
+/* Register offsets for the D5 Next pump */
 #define D5NEXT_POWER_CYCLES		24
 
 #define D5NEXT_COOLANT_TEMP		87
@@ -44,76 +53,118 @@
 #define D5NEXT_PUMP_CURRENT		112
 #define D5NEXT_FAN_CURRENT		99
 
-/* Labels for provided values */
+/* Register offsets for the Farbwerk 360 RGB controller */
+#define FARBWERK360_NUM_SENSORS		4
+#define FARBWERK360_SENSOR_START		0x32
+#define FARBWERK360_SENSOR_SIZE		0x02
+#define FARBWERK360_SENSOR_DISCONNECTED	0x7FFF
 
-#define L_COOLANT_TEMP			"Coolant temp"
+/* Labels for D5 Next */
+#define L_D5NEXT_COOLANT_TEMP		"Coolant temp"
 
-#define L_PUMP_SPEED			"Pump speed"
-#define L_FAN_SPEED			"Fan speed"
-
-#define L_PUMP_POWER			"Pump power"
-#define L_FAN_POWER			"Fan power"
-
-#define L_PUMP_VOLTAGE			"Pump voltage"
-#define L_FAN_VOLTAGE			"Fan voltage"
-#define L_5V_VOLTAGE			"+5V voltage"
-
-#define L_PUMP_CURRENT			"Pump current"
-#define L_FAN_CURRENT			"Fan current"
+static const char *const label_d5next_speeds[] = {
+	"Pump speed",
+	"Fan speed"
+};
 
-static const char *const label_speeds[] = {
-	L_PUMP_SPEED,
-	L_FAN_SPEED,
+static const char *const label_d5next_power[] = {
+	"Pump power",
+	"Fan power"
 };
 
-static const char *const label_power[] = {
-	L_PUMP_POWER,
-	L_FAN_POWER,
+static const char *const label_d5next_voltages[] = {
+	"Pump voltage",
+	"Fan voltage",
+	"+5V voltage"
 };
 
-static const char *const label_voltages[] = {
-	L_PUMP_VOLTAGE,
-	L_FAN_VOLTAGE,
-	L_5V_VOLTAGE,
+static const char *const label_d5next_current[] = {
+	"Pump current",
+	"Fan current"
 };
 
-static const char *const label_current[] = {
-	L_PUMP_CURRENT,
-	L_FAN_CURRENT,
+/* Labels for Farbwerk 360 temperature sensors */
+static const char *const label_temp_sensors[] = {
+	"Sensor 1",
+	"Sensor 2",
+	"Sensor 3",
+	"Sensor 4"
 };
 
-struct d5next_data {
+struct aqc_data {
 	struct hid_device *hdev;
 	struct device *hwmon_dev;
 	struct dentry *debugfs;
-	s32 temp_input;
+	enum kinds kind;
+	const char *name;
+
+	/* General info, same across all devices */
+	u32 serial_number[2];
+	u16 firmware_version;
+
+	/* D5 Next specific - how many times the device was powered on */
+	u32 power_cycles;
+
+	/* Sensor values */
+	s32 temp_input[4];
 	u16 speed_input[2];
 	u32 power_input[2];
 	u16 voltage_input[3];
 	u16 current_input[2];
-	u32 serial_number[2];
-	u16 firmware_version;
-	u32 power_cycles; /* How many times the device was powered on */
+
 	unsigned long updated;
 };
 
-static umode_t d5next_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
-				 int channel)
+static umode_t aqc_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
+			      int channel)
 {
-	return 0444;
+	const struct aqc_data *priv = data;
+
+	switch (type) {
+	case hwmon_temp:
+		switch (priv->kind) {
+		case d5next:
+			if (channel == 0)
+				return 0444;
+			break;
+		case farbwerk360:
+			return 0444;
+		default:
+			break;
+		}
+		break;
+	case hwmon_fan:
+	case hwmon_power:
+	case hwmon_in:
+	case hwmon_curr:
+		switch (priv->kind) {
+		case d5next:
+			return 0444;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
 }
 
-static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel,
-		       long *val)
+static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+		    int channel, long *val)
 {
-	struct d5next_data *priv = dev_get_drvdata(dev);
+	struct aqc_data *priv = dev_get_drvdata(dev);
 
-	if (time_after(jiffies, priv->updated + D5NEXT_STATUS_UPDATE_INTERVAL))
+	if (time_after(jiffies, priv->updated + STATUS_UPDATE_INTERVAL))
 		return -ENODATA;
 
 	switch (type) {
 	case hwmon_temp:
-		*val = priv->temp_input;
+		if (priv->temp_input[channel] == -ENODATA)
+			return -ENODATA;
+
+		*val = priv->temp_input[channel];
 		break;
 	case hwmon_fan:
 		*val = priv->speed_input[channel];
@@ -134,24 +185,59 @@ static int d5next_read(struct device *dev, enum hwmon_sensor_types type, u32 att
 	return 0;
 }
 
-static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
-			      int channel, const char **str)
+static int aqc_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+			   int channel, const char **str)
 {
+	struct aqc_data *priv = dev_get_drvdata(dev);
+
 	switch (type) {
 	case hwmon_temp:
-		*str = L_COOLANT_TEMP;
+		switch (priv->kind) {
+		case d5next:
+			*str = L_D5NEXT_COOLANT_TEMP;
+			break;
+		case farbwerk360:
+			*str = label_temp_sensors[channel];
+			break;
+		default:
+			break;
+		}
 		break;
 	case hwmon_fan:
-		*str = label_speeds[channel];
+		switch (priv->kind) {
+		case d5next:
+			*str = label_d5next_speeds[channel];
+			break;
+		default:
+			break;
+		}
 		break;
 	case hwmon_power:
-		*str = label_power[channel];
+		switch (priv->kind) {
+		case d5next:
+			*str = label_d5next_power[channel];
+			break;
+		default:
+			break;
+		}
 		break;
 	case hwmon_in:
-		*str = label_voltages[channel];
+		switch (priv->kind) {
+		case d5next:
+			*str = label_d5next_voltages[channel];
+			break;
+		default:
+			break;
+		}
 		break;
 	case hwmon_curr:
-		*str = label_current[channel];
+		switch (priv->kind) {
+		case d5next:
+			*str = label_d5next_current[channel];
+			break;
+		default:
+			break;
+		}
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -160,60 +246,89 @@ static int d5next_read_string(struct device *dev, enum hwmon_sensor_types type,
 	return 0;
 }
 
-static const struct hwmon_ops d5next_hwmon_ops = {
-	.is_visible = d5next_is_visible,
-	.read = d5next_read,
-	.read_string = d5next_read_string,
+static const struct hwmon_ops aqc_hwmon_ops = {
+	.is_visible = aqc_is_visible,
+	.read = aqc_read,
+	.read_string = aqc_read_string,
 };
 
-static const struct hwmon_channel_info *d5next_info[] = {
-	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL),
-	HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL, HWMON_F_INPUT | HWMON_F_LABEL),
-	HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL, HWMON_P_INPUT | HWMON_P_LABEL),
-	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL,
+static const struct hwmon_channel_info *aqc_info[] = {
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL),
+	HWMON_CHANNEL_INFO(fan,
+			   HWMON_F_INPUT | HWMON_F_LABEL,
+			   HWMON_F_INPUT | HWMON_F_LABEL),
+	HWMON_CHANNEL_INFO(power,
+			   HWMON_P_INPUT | HWMON_P_LABEL,
+			   HWMON_P_INPUT | HWMON_P_LABEL),
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_LABEL,
 			   HWMON_I_INPUT | HWMON_I_LABEL),
-	HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_LABEL, HWMON_C_INPUT | HWMON_C_LABEL),
+	HWMON_CHANNEL_INFO(curr,
+			   HWMON_C_INPUT | HWMON_C_LABEL,
+			   HWMON_C_INPUT | HWMON_C_LABEL),
 	NULL
 };
 
-static const struct hwmon_chip_info d5next_chip_info = {
-	.ops = &d5next_hwmon_ops,
-	.info = d5next_info,
+static const struct hwmon_chip_info aqc_chip_info = {
+	.ops = &aqc_hwmon_ops,
+	.info = aqc_info,
 };
 
-static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size)
+static int aqc_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data,
+			 int size)
 {
-	struct d5next_data *priv;
+	int i, sensor_value;
+	struct aqc_data *priv;
 
-	if (report->id != D5NEXT_STATUS_REPORT_ID)
+	if (report->id != STATUS_REPORT_ID)
 		return 0;
 
 	priv = hid_get_drvdata(hdev);
 
 	/* Info provided with every report */
-
-	priv->serial_number[0] = get_unaligned_be16(data + D5NEXT_SERIAL_FIRST_PART);
-	priv->serial_number[1] = get_unaligned_be16(data + D5NEXT_SERIAL_SECOND_PART);
-
-	priv->firmware_version = get_unaligned_be16(data + D5NEXT_FIRMWARE_VERSION);
-	priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES);
+	priv->serial_number[0] = get_unaligned_be16(data + SERIAL_FIRST_PART);
+	priv->serial_number[1] = get_unaligned_be16(data + SERIAL_SECOND_PART);
+	priv->firmware_version = get_unaligned_be16(data + FIRMWARE_VERSION);
 
 	/* Sensor readings */
+	switch (priv->kind) {
+	case d5next:
+		priv->power_cycles = get_unaligned_be32(data + D5NEXT_POWER_CYCLES);
 
-	priv->temp_input = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10;
+		priv->temp_input[0] = get_unaligned_be16(data + D5NEXT_COOLANT_TEMP) * 10;
 
-	priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED);
-	priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED);
+		priv->speed_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_SPEED);
+		priv->speed_input[1] = get_unaligned_be16(data + D5NEXT_FAN_SPEED);
 
-	priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000;
-	priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000;
+		priv->power_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_POWER) * 10000;
+		priv->power_input[1] = get_unaligned_be16(data + D5NEXT_FAN_POWER) * 10000;
 
-	priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10;
-	priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10;
-	priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10;
+		priv->voltage_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_VOLTAGE) * 10;
+		priv->voltage_input[1] = get_unaligned_be16(data + D5NEXT_FAN_VOLTAGE) * 10;
+		priv->voltage_input[2] = get_unaligned_be16(data + D5NEXT_5V_VOLTAGE) * 10;
 
-	priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
-	priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
+		priv->current_input[0] = get_unaligned_be16(data + D5NEXT_PUMP_CURRENT);
+		priv->current_input[1] = get_unaligned_be16(data + D5NEXT_FAN_CURRENT);
+		break;
+	case farbwerk360:
+		/* Temperature sensor readings */
+		for (i = 0; i < FARBWERK360_NUM_SENSORS; i++) {
+			sensor_value = get_unaligned_be16(data + FARBWERK360_SENSOR_START +
+							  i * FARBWERK360_SENSOR_SIZE);
+			if (sensor_value == FARBWERK360_SENSOR_DISCONNECTED)
+				priv->temp_input[i] = -ENODATA;
+			else
+				priv->temp_input[i] = sensor_value * 10;
+		}
+		break;
+	default:
+		break;
+	}
 
 	priv->updated = jiffies;
 
@@ -224,7 +339,7 @@ static int d5next_raw_event(struct hid_device *hdev, struct hid_report *report,
 
 static int serial_number_show(struct seq_file *seqf, void *unused)
 {
-	struct d5next_data *priv = seqf->private;
+	struct aqc_data *priv = seqf->private;
 
 	seq_printf(seqf, "%05u-%05u\n", priv->serial_number[0], priv->serial_number[1]);
 
@@ -234,7 +349,7 @@ DEFINE_SHOW_ATTRIBUTE(serial_number);
 
 static int firmware_version_show(struct seq_file *seqf, void *unused)
 {
-	struct d5next_data *priv = seqf->private;
+	struct aqc_data *priv = seqf->private;
 
 	seq_printf(seqf, "%u\n", priv->firmware_version);
 
@@ -244,7 +359,7 @@ DEFINE_SHOW_ATTRIBUTE(firmware_version);
 
 static int power_cycles_show(struct seq_file *seqf, void *unused)
 {
-	struct d5next_data *priv = seqf->private;
+	struct aqc_data *priv = seqf->private;
 
 	seq_printf(seqf, "%u\n", priv->power_cycles);
 
@@ -252,29 +367,32 @@ static int power_cycles_show(struct seq_file *seqf, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(power_cycles);
 
-static void d5next_debugfs_init(struct d5next_data *priv)
+static void aqc_debugfs_init(struct aqc_data *priv)
 {
-	char name[32];
+	char name[64];
 
-	scnprintf(name, sizeof(name), "%s-%s", DRIVER_NAME, dev_name(&priv->hdev->dev));
+	scnprintf(name, sizeof(name), "%s_%s-%s", "aquacomputer", priv->name,
+		  dev_name(&priv->hdev->dev));
 
 	priv->debugfs = debugfs_create_dir(name, NULL);
 	debugfs_create_file("serial_number", 0444, priv->debugfs, priv, &serial_number_fops);
 	debugfs_create_file("firmware_version", 0444, priv->debugfs, priv, &firmware_version_fops);
-	debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
+
+	if (priv->kind == d5next)
+		debugfs_create_file("power_cycles", 0444, priv->debugfs, priv, &power_cycles_fops);
 }
 
 #else
 
-static void d5next_debugfs_init(struct d5next_data *priv)
+static void aqc_debugfs_init(struct aqc_data *priv)
 {
 }
 
 #endif
 
-static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
+static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
-	struct d5next_data *priv;
+	struct aqc_data *priv;
 	int ret;
 
 	priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -284,7 +402,7 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	priv->hdev = hdev;
 	hid_set_drvdata(hdev, priv);
 
-	priv->updated = jiffies - D5NEXT_STATUS_UPDATE_INTERVAL;
+	priv->updated = jiffies - STATUS_UPDATE_INTERVAL;
 
 	ret = hid_parse(hdev);
 	if (ret)
@@ -298,15 +416,28 @@ static int d5next_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (ret)
 		goto fail_and_stop;
 
-	priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "d5next", priv,
-							  &d5next_chip_info, NULL);
+	switch (hdev->product) {
+	case USB_PRODUCT_ID_D5NEXT:
+		priv->kind = d5next;
+		break;
+	case USB_PRODUCT_ID_FARBWERK360:
+		priv->kind = farbwerk360;
+		break;
+	default:
+		break;
+	}
+
+	priv->name = aqc_device_names[priv->kind];
+
+	priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, priv->name, priv,
+							  &aqc_chip_info, NULL);
 
 	if (IS_ERR(priv->hwmon_dev)) {
 		ret = PTR_ERR(priv->hwmon_dev);
 		goto fail_and_close;
 	}
 
-	d5next_debugfs_init(priv);
+	aqc_debugfs_init(priv);
 
 	return 0;
 
@@ -317,9 +448,9 @@ fail_and_stop:
 	return ret;
 }
 
-static void d5next_remove(struct hid_device *hdev)
+static void aqc_remove(struct hid_device *hdev)
 {
-	struct d5next_data *priv = hid_get_drvdata(hdev);
+	struct aqc_data *priv = hid_get_drvdata(hdev);
 
 	debugfs_remove_recursive(priv->debugfs);
 	hwmon_device_unregister(priv->hwmon_dev);
@@ -328,36 +459,36 @@ static void d5next_remove(struct hid_device *hdev)
 	hid_hw_stop(hdev);
 }
 
-static const struct hid_device_id d5next_table[] = {
-	{ HID_USB_DEVICE(0x0c70, 0xf00e) }, /* Aquacomputer D5 Next */
-	{},
+static const struct hid_device_id aqc_table[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_D5NEXT) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_AQUACOMPUTER, USB_PRODUCT_ID_FARBWERK360) },
+	{ }
 };
 
-MODULE_DEVICE_TABLE(hid, d5next_table);
+MODULE_DEVICE_TABLE(hid, aqc_table);
 
-static struct hid_driver d5next_driver = {
+static struct hid_driver aqc_driver = {
 	.name = DRIVER_NAME,
-	.id_table = d5next_table,
-	.probe = d5next_probe,
-	.remove = d5next_remove,
-	.raw_event = d5next_raw_event,
+	.id_table = aqc_table,
+	.probe = aqc_probe,
+	.remove = aqc_remove,
+	.raw_event = aqc_raw_event,
 };
 
-static int __init d5next_init(void)
+static int __init aqc_init(void)
 {
-	return hid_register_driver(&d5next_driver);
+	return hid_register_driver(&aqc_driver);
 }
 
-static void __exit d5next_exit(void)
+static void __exit aqc_exit(void)
 {
-	hid_unregister_driver(&d5next_driver);
+	hid_unregister_driver(&aqc_driver);
 }
 
 /* Request to initialize after the HID bus to ensure it's not being loaded before */
-
-late_initcall(d5next_init);
-module_exit(d5next_exit);
+late_initcall(aqc_init);
+module_exit(aqc_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Aleksa Savic <savicaleksa83@gmail.com>");
-MODULE_DESCRIPTION("Hwmon driver for Aquacomputer D5 Next pump");
+MODULE_DESCRIPTION("Hwmon driver for Aquacomputer devices");
-- 
GitLab


From ee55c9c5734e34d7bf40c4f91d181622630626ff Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 7 Feb 2022 14:10:08 +0100
Subject: [PATCH 0783/1586] m68k: defconfig: Update defconfigs for v5.17-rc3

  - Drop CONFIG_NFT_COUNTER=m (removed in commit 023223dfbfb34fcc
    ("netfilter: nf_tables: make counter support built-in")),
  - Drop CONFIG_NF_FLOW_TABLE_IPV4=m and CONFIG_NF_FLOW_TABLE_IPV6=m
    (can no longer be enabled since commit c42ba4290b2147aa ("netfilter:
    flowtable: remove ipv4/ipv6 modules")),
  - Enable the new option for legacy fbcon hardware acceleration code on
    all platforms that have a frame buffer device that can make use of
    it,
  - Drop CONFIG_TEST_HASH=m (replaced by auto-modular
    CONFIG_HASH_KUNIT_TEST in commit 0acc968f352336a4 ("test_hash.c:
    refactor into kunit")),
  - Enable modular build of the new siphash selftest.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20220123125543.2658445-1-geert@linux-m68k.org
Link: https://lore.kernel.org/r/20220207131008.2202977-1-geert@linux-m68k.org
---
 arch/m68k/configs/amiga_defconfig    | 6 ++----
 arch/m68k/configs/apollo_defconfig   | 6 ++----
 arch/m68k/configs/atari_defconfig    | 5 +----
 arch/m68k/configs/bvme6000_defconfig | 5 +----
 arch/m68k/configs/hp300_defconfig    | 6 ++----
 arch/m68k/configs/mac_defconfig      | 5 +----
 arch/m68k/configs/multi_defconfig    | 6 ++----
 arch/m68k/configs/mvme147_defconfig  | 5 +----
 arch/m68k/configs/mvme16x_defconfig  | 5 +----
 arch/m68k/configs/q40_defconfig      | 5 +----
 arch/m68k/configs/sun3_defconfig     | 5 +----
 arch/m68k/configs/sun3x_defconfig    | 5 +----
 12 files changed, 16 insertions(+), 48 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index bc9952f8be667..49f301c57df5f 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y
 CONFIG_FB_AMIGA_AGA=y
 CONFIG_FB_FM2=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_DMASOUND_PAULA=m
@@ -643,7 +641,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index a77269c6e5bac..405997b614472 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
@@ -599,7 +597,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 7a74efa6b9a1a..eb342a33b73e8 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -621,7 +618,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index a5323bf2eb333..e6de6b4dff86e 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5e80aa0869d54..048d9b114eb43 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
@@ -601,7 +599,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index e84326a3f62db..4e5b32ba00dfc 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -623,7 +620,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 337552f433390..7df61e743591a 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y
 CONFIG_FB_VALKYRIE=y
 CONFIG_FB_MAC=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_DMASOUND_ATARI=m
@@ -708,7 +706,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 7b688f7d272a2..80922fe80d9de 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -591,7 +588,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 7c2cb31d63dd8..530c4cf7c59ba 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ca43897af26de..d3f371e490ec0 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -610,7 +607,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index e3d515f37144a..f214f277c55a1 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -593,7 +590,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d601606c969b8..73b4c61394a2a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
@@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -593,7 +590,7 @@ CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
 CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
-CONFIG_TEST_HASH=m
+CONFIG_TEST_SIPHASH=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_VMALLOC=m
-- 
GitLab


From 0d52a01a266ba21840c7cb02e1de39c503759dda Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 7 Feb 2022 14:11:23 +0100
Subject: [PATCH 0784/1586] m68k: defconfig: Disable fbdev on Sun3/3x

Commit e019630e78e3482c ("[PATCH] remove broken video drivers") in
v2.6.21 removed Sun3 framebuffer support, hence there are no more frame
buffer device drivers available on the Sun3 and Sun3x platforms.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20220207131123.2203141-1-geert@linux-m68k.org
---
 arch/m68k/configs/sun3_defconfig  | 3 ---
 arch/m68k/configs/sun3x_defconfig | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index f214f277c55a1..db6769790bdb6 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -385,9 +385,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 73b4c61394a2a..e9c362683666a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -384,9 +384,6 @@ CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
 CONFIG_HID=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
-- 
GitLab


From e65b831a1e191caff3fc0d06bc7019cdaf8f868e Mon Sep 17 00:00:00 2001
From: Qinghua Jin <qhjin.dev@gmail.com>
Date: Fri, 7 Jan 2022 10:22:58 +0800
Subject: [PATCH 0785/1586] nvme-fc: fix a typo

subsytem -> subsystem

Signed-off-by: Qinghua Jin <qhjin.dev@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme-fc-driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index cb909edb76c48..5358a5facdee6 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -721,7 +721,7 @@ enum {
  *
  * Fields with static values for the port. Initialized by the
  * port_info struct supplied to the registration call.
- * @port_num:  NVME-FC transport subsytem port number
+ * @port_num:  NVME-FC transport subsystem port number
  * @node_name: FC WWNN for the port
  * @port_name: FC WWPN for the port
  * @private:   pointer to memory allocated alongside the local port
-- 
GitLab


From 50ab19d89feaf4ebeca6872b46da4b503eee20c1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 18 Jan 2022 23:49:54 -0800
Subject: [PATCH 0786/1586] nvme-core: remove unnecessary semicolon

It is not a good practice to have a semicolon at the end of the
function definition. Remove it from nvme_pr_type().

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5e0bfda04bd7b..a1d793e799825 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1979,7 +1979,7 @@ static char nvme_pr_type(enum pr_type type)
 	default:
 		return 0;
 	}
-};
+}
 
 static int nvme_send_ns_head_pr_command(struct block_device *bdev,
 		struct nvme_command *c, u8 data[16])
-- 
GitLab


From ba3266434d6615ff8015b01846d0e7756c9ad936 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Fri, 21 Jan 2022 21:05:39 -0800
Subject: [PATCH 0787/1586] nvme-core: remove unnecessary function parameter

In function nvme_execute_rq() we don't use gendisk parameter at all.
Remove the unsed parameter and adjust the calls.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a1d793e799825..dd18861f77c05 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1049,8 +1049,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
  * >0: nvme controller's cqe status response
  * <0: kernel error in lieu of controller response
  */
-static int nvme_execute_rq(struct gendisk *disk, struct request *rq,
-		bool at_head)
+static int nvme_execute_rq(struct request *rq, bool at_head)
 {
 	blk_status_t status;
 
@@ -1090,7 +1089,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 			goto out;
 	}
 
-	ret = nvme_execute_rq(NULL, req, at_head);
+	ret = nvme_execute_rq(req, at_head);
 	if (result && ret >= 0)
 		*result = nvme_req(req)->result;
  out:
@@ -1206,12 +1205,11 @@ int nvme_execute_passthru_rq(struct request *rq)
 	struct nvme_command *cmd = nvme_req(rq)->cmd;
 	struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
 	struct nvme_ns *ns = rq->q->queuedata;
-	struct gendisk *disk = ns ? ns->disk : NULL;
 	u32 effects;
 	int  ret;
 
 	effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
-	ret = nvme_execute_rq(disk, rq, false);
+	ret = nvme_execute_rq(rq, false);
 	if (effects) /* nothing to be done for zero cmd effects */
 		nvme_passthru_end(ctrl, effects, cmd, ret);
 
-- 
GitLab


From 572c97355bdc0e7cdd5979304b8554712e26ceb8 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 11 Jan 2022 22:20:59 -0800
Subject: [PATCH 0788/1586] nvme-fabrics: use unsigned int type

Loop variable i will never have a negative value, so use
unsigned int type instaed of int.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index f79a66d4e22ce..9f81a0562e3e9 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -873,7 +873,7 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
 		unsigned int required_opts)
 {
 	if ((opts->mask & required_opts) != required_opts) {
-		int i;
+		unsigned int i;
 
 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
 			if ((opt_tokens[i].token & required_opts) &&
-- 
GitLab


From 0801a4b630ab0949ddf0fc3f26cb17976e2d3afb Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 11 Jan 2022 22:21:00 -0800
Subject: [PATCH 0789/1586] nvme-fabrics: use unsigned int type

Loop variable i will never have a negative value, so use
unsigned int type instaed of int.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 9f81a0562e3e9..131e78e62d00d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -923,7 +923,7 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
 		unsigned int allowed_opts)
 {
 	if (opts->mask & ~allowed_opts) {
-		int i;
+		unsigned int i;
 
 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
 			if ((opt_tokens[i].token & opts->mask) &&
-- 
GitLab


From 72b3eab456ba57bf933b25e1b2adea3e3eafd153 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 11 Jan 2022 22:20:57 -0800
Subject: [PATCH 0790/1586] nvme-fabrics: use consistent zeroout pattern

Remove zeroout memeset call & zeroout local variable cmd at the time
of declaration in nvmf_ref_read32() similar to what we have done in
nvmf_reg_read64(), nvmf_reg_write32(), nvmf_connect_admin_queue(), and
nvmf_connect_io_queue().

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 131e78e62d00d..f4aa401247209 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -144,11 +144,10 @@ EXPORT_SYMBOL_GPL(nvmf_get_address);
  */
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	union nvme_result res;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.prop_get.opcode = nvme_fabrics_command;
 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
 	cmd.prop_get.offset = cpu_to_le32(off);
-- 
GitLab


From 581f19dd72b9d148ce667b6dcb20d3ef16b9a1cf Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 11 Jan 2022 22:20:58 -0800
Subject: [PATCH 0791/1586] nvme-fabrics: remove unnecessary braces for case

Braces are not required for enum value NVME_SC_CONNECT_INVALID_PARAM
when used on the switch-case statement, remove the braces.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index f4aa401247209..ee79a6d639b4b 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -271,7 +271,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 	int err_sctype = errval & ~NVME_SC_DNR;
 
 	switch (err_sctype) {
-	case (NVME_SC_CONNECT_INVALID_PARAM):
+	case NVME_SC_CONNECT_INVALID_PARAM:
 		if (offset >> 16) {
 			char *inv_data = "Connect Invalid Data Parameter";
 
-- 
GitLab


From 2caecd62ea5160803b25d96cb1a14ce755c2c259 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Wed, 2 Feb 2022 01:04:44 -0800
Subject: [PATCH 0792/1586] nvmet: use i_size_read() to set size for file-ns

Instead of calling vfs_getattr() use i_size_read() to read the size of
file so we can read the size of not only file type but also block type
with one call. This is needed to implement buffered_io support for the
NVMeOF block device backend.

We also change return type of function nvmet_file_ns_revalidate() from
int to void, since this function does not return any meaning value.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-file.c | 17 ++++-------------
 drivers/nvme/target/nvmet.h       |  2 +-
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 6be6e59d273bb..6485dc8eb9740 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -14,16 +14,9 @@
 #define NVMET_MAX_MPOOL_BVEC		16
 #define NVMET_MIN_MPOOL_OBJ		16
 
-int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
 {
-	struct kstat stat;
-	int ret;
-
-	ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
-			  AT_STATX_FORCE_SYNC);
-	if (!ret)
-		ns->size = stat.size;
-	return ret;
+	ns->size = i_size_read(ns->file->f_mapping->host);
 }
 
 void nvmet_file_ns_disable(struct nvmet_ns *ns)
@@ -43,7 +36,7 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
 int nvmet_file_ns_enable(struct nvmet_ns *ns)
 {
 	int flags = O_RDWR | O_LARGEFILE;
-	int ret;
+	int ret = 0;
 
 	if (!ns->buffered_io)
 		flags |= O_DIRECT;
@@ -57,9 +50,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 		return ret;
 	}
 
-	ret = nvmet_file_ns_revalidate(ns);
-	if (ret)
-		goto err;
+	nvmet_file_ns_revalidate(ns);
 
 	/*
 	 * i_blkbits can be greater than the universally accepted upper bound,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index af193423c10bb..69637bf8f8e19 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -541,7 +541,7 @@ u16 nvmet_bdev_flush(struct nvmet_req *req);
 u16 nvmet_file_flush(struct nvmet_req *req);
 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
-int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_file_ns_revalidate(struct nvmet_ns *ns);
 void nvmet_ns_revalidate(struct nvmet_ns *ns);
 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
 
-- 
GitLab


From 6f6d604b4ef896793901f213db3b45eefa7288fa Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Wed, 2 Feb 2022 01:04:45 -0800
Subject: [PATCH 0793/1586] nvmet: allow bdev in buffered_io mode

Allow block device to be configured in the buffered I/O mode by using
the file backend. In this way now we can use cache for the block
device namespace which shows significant performance improvement.

We update the block device ns enable function and return early when
buffered_io flag is set.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 95c2bbb0b2f5f..e9194804ddee4 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -76,6 +76,14 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 {
 	int ret;
 
+	/*
+	 * When buffered_io namespace attribute is enabled that means user want
+	 * this block device to be used as a file, so block device can take
+	 * an advantage of cache.
+	 */
+	if (ns->buffered_io)
+		return -ENOTBLK;
+
 	ns->bdev = blkdev_get_by_path(ns->device_path,
 			FMODE_READ | FMODE_WRITE, NULL);
 	if (IS_ERR(ns->bdev)) {
-- 
GitLab


From 8b850475c08caa9545c460d7dc823d0a8c7eafd3 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:27 +0200
Subject: [PATCH 0794/1586] nvme: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Also, use ida_alloc_min with the ns_ida as namespace
enumeration starts with 1.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd18861f77c05..da0e19148177c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -561,7 +561,7 @@ static void nvme_free_ns_head(struct kref *ref)
 		container_of(ref, struct nvme_ns_head, ref);
 
 	nvme_mpath_remove_disk(head);
-	ida_simple_remove(&head->subsys->ns_ida, head->instance);
+	ida_free(&head->subsys->ns_ida, head->instance);
 	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
 	kfree(head);
@@ -2565,7 +2565,7 @@ static void nvme_release_subsystem(struct device *dev)
 		container_of(dev, struct nvme_subsystem, dev);
 
 	if (subsys->instance >= 0)
-		ida_simple_remove(&nvme_instance_ida, subsys->instance);
+		ida_free(&nvme_instance_ida, subsys->instance);
 	kfree(subsys);
 }
 
@@ -3616,7 +3616,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
 
 static void nvme_cdev_rel(struct device *dev)
 {
-	ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
+	ida_free(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
 }
 
 void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
@@ -3630,7 +3630,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
 {
 	int minor, ret;
 
-	minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL);
+	minor = ida_alloc(&nvme_ns_chr_minor_ida, GFP_KERNEL);
 	if (minor < 0)
 		return minor;
 	cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
@@ -3693,7 +3693,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head = kzalloc(size, GFP_KERNEL);
 	if (!head)
 		goto out;
-	ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
+	ret = ida_alloc_min(&ctrl->subsys->ns_ida, 1, GFP_KERNEL);
 	if (ret < 0)
 		goto out_free_head;
 	head->instance = ret;
@@ -3732,7 +3732,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 out_cleanup_srcu:
 	cleanup_srcu_struct(&head->srcu);
 out_ida_remove:
-	ida_simple_remove(&ctrl->subsys->ns_ida, head->instance);
+	ida_free(&ctrl->subsys->ns_ida, head->instance);
 out_free_head:
 	kfree(head);
 out:
@@ -4429,7 +4429,7 @@ static void nvme_free_ctrl(struct device *dev)
 	struct nvme_subsystem *subsys = ctrl->subsys;
 
 	if (!subsys || ctrl->instance != subsys->instance)
-		ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+		ida_free(&nvme_instance_ida, ctrl->instance);
 
 	nvme_free_cels(ctrl);
 	nvme_mpath_uninit(ctrl);
@@ -4488,7 +4488,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		goto out;
 	}
 
-	ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
+	ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
 	if (ret < 0)
 		goto out;
 	ctrl->instance = ret;
@@ -4529,7 +4529,7 @@ out_free_name:
 	nvme_put_ctrl(ctrl);
 	kfree_const(ctrl->device->kobj.name);
 out_release_instance:
-	ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+	ida_free(&nvme_instance_ida, ctrl->instance);
 out:
 	if (ctrl->discard_page)
 		__free_page(ctrl->discard_page);
-- 
GitLab


From 3dd83f4013f0e85d37c059c91015a2721209fe86 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:28 +0200
Subject: [PATCH 0795/1586] nvme-fc: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 71b3108c22f06..96a5d7fc36f7e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -259,7 +259,7 @@ nvme_fc_free_lport(struct kref *ref)
 		complete(&nvme_fc_unload_proceed);
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
-	ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
+	ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num);
 	ida_destroy(&lport->endp_cnt);
 
 	put_device(lport->dev);
@@ -399,7 +399,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
 		goto out_reghost_failed;
 	}
 
-	idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL);
+	idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
 		goto out_fail_kfree;
@@ -439,7 +439,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
 	return 0;
 
 out_ida_put:
-	ida_simple_remove(&nvme_fc_local_port_cnt, idx);
+	ida_free(&nvme_fc_local_port_cnt, idx);
 out_fail_kfree:
 	kfree(newrec);
 out_reghost_failed:
@@ -535,7 +535,7 @@ nvme_fc_free_rport(struct kref *ref)
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
 	WARN_ON(!list_empty(&rport->disc_list));
-	ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
+	ida_free(&lport->endp_cnt, rport->remoteport.port_num);
 
 	kfree(rport);
 
@@ -713,7 +713,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
 		goto out_lport_put;
 	}
 
-	idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
+	idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
 		goto out_kfree_rport;
@@ -2393,7 +2393,7 @@ nvme_fc_ctrl_free(struct kref *ref)
 	put_device(ctrl->dev);
 	nvme_fc_rport_put(ctrl->rport);
 
-	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
 	if (ctrl->ctrl.opts)
 		nvmf_free_options(ctrl->ctrl.opts);
 	kfree(ctrl);
@@ -3472,7 +3472,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		goto out_fail;
 	}
 
-	idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
+	idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
 		goto out_free_ctrl;
@@ -3635,7 +3635,7 @@ out_free_queues:
 	kfree(ctrl->queues);
 out_free_ida:
 	put_device(ctrl->dev);
-	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+	ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
 out_free_ctrl:
 	kfree(ctrl);
 out_fail:
-- 
GitLab


From 22027a9811349de28f81e13e20e83299099acd3a Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:29 +0200
Subject: [PATCH 0796/1586] nvmet: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5119c687de683..724a6d373340a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1400,7 +1400,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (subsys->cntlid_min > subsys->cntlid_max)
 		goto out_free_sqs;
 
-	ret = ida_simple_get(&cntlid_ida,
+	ret = ida_alloc_range(&cntlid_ida,
 			     subsys->cntlid_min, subsys->cntlid_max,
 			     GFP_KERNEL);
 	if (ret < 0) {
@@ -1459,7 +1459,7 @@ static void nvmet_ctrl_free(struct kref *ref)
 	flush_work(&ctrl->async_event_work);
 	cancel_work_sync(&ctrl->fatal_err_work);
 
-	ida_simple_remove(&cntlid_ida, ctrl->cntlid);
+	ida_free(&cntlid_ida, ctrl->cntlid);
 
 	nvmet_async_events_free(ctrl);
 	kfree(ctrl->sqs);
-- 
GitLab


From 6dd0f465d57c4522294d0fd42fde7603264d0871 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:30 +0200
Subject: [PATCH 0797/1586] nvmet-fc: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 22b5108168a6a..cb013516784c1 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1115,7 +1115,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
 	if (!assoc)
 		return NULL;
 
-	idx = ida_simple_get(&tgtport->assoc_cnt, 0, 0, GFP_KERNEL);
+	idx = ida_alloc(&tgtport->assoc_cnt, GFP_KERNEL);
 	if (idx < 0)
 		goto out_free_assoc;
 
@@ -1157,7 +1157,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
 out_put:
 	nvmet_fc_tgtport_put(tgtport);
 out_ida:
-	ida_simple_remove(&tgtport->assoc_cnt, idx);
+	ida_free(&tgtport->assoc_cnt, idx);
 out_free_assoc:
 	kfree(assoc);
 	return NULL;
@@ -1183,7 +1183,7 @@ nvmet_fc_target_assoc_free(struct kref *ref)
 	/* if pending Rcv Disconnect Association LS, send rsp now */
 	if (oldls)
 		nvmet_fc_xmt_ls_rsp(tgtport, oldls);
-	ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id);
+	ida_free(&tgtport->assoc_cnt, assoc->a_id);
 	dev_info(tgtport->dev,
 		"{%d:%d} Association freed\n",
 		tgtport->fc_target_port.port_num, assoc->a_id);
@@ -1383,7 +1383,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
 		goto out_regtgt_failed;
 	}
 
-	idx = ida_simple_get(&nvmet_fc_tgtport_cnt, 0, 0, GFP_KERNEL);
+	idx = ida_alloc(&nvmet_fc_tgtport_cnt, GFP_KERNEL);
 	if (idx < 0) {
 		ret = -ENOSPC;
 		goto out_fail_kfree;
@@ -1433,7 +1433,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
 out_free_newrec:
 	put_device(dev);
 out_ida_put:
-	ida_simple_remove(&nvmet_fc_tgtport_cnt, idx);
+	ida_free(&nvmet_fc_tgtport_cnt, idx);
 out_fail_kfree:
 	kfree(newrec);
 out_regtgt_failed:
@@ -1460,7 +1460,7 @@ nvmet_fc_free_tgtport(struct kref *ref)
 	/* let the LLDD know we've finished tearing it down */
 	tgtport->ops->targetport_delete(&tgtport->fc_target_port);
 
-	ida_simple_remove(&nvmet_fc_tgtport_cnt,
+	ida_free(&nvmet_fc_tgtport_cnt,
 			tgtport->fc_target_port.port_num);
 
 	ida_destroy(&tgtport->assoc_cnt);
-- 
GitLab


From 7c2566394f8ceba4b99a373c1bc2411750301587 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:31 +0200
Subject: [PATCH 0798/1586] nvmet-rdma: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 1deb4043e2425..00656754e3314 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1356,7 +1356,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
 				!queue->host_qid);
 	}
 	nvmet_rdma_free_rsps(queue);
-	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+	ida_free(&nvmet_rdma_queue_ida, queue->idx);
 	kfree(queue);
 }
 
@@ -1459,7 +1459,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	spin_lock_init(&queue->rsps_lock);
 	INIT_LIST_HEAD(&queue->queue_list);
 
-	queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
+	queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
 	if (queue->idx < 0) {
 		ret = NVME_RDMA_CM_NO_RSC;
 		goto out_destroy_sq;
@@ -1510,7 +1510,7 @@ out_free_cmds:
 out_free_responses:
 	nvmet_rdma_free_rsps(queue);
 out_ida_remove:
-	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
+	ida_free(&nvmet_rdma_queue_ida, queue->idx);
 out_destroy_sq:
 	nvmet_sq_destroy(&queue->nvme_sq);
 out_free_queue:
-- 
GitLab


From 44f331a630bdc7c61de9c6760c4eec0133ee9f04 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 14 Feb 2022 11:07:32 +0200
Subject: [PATCH 0799/1586] nvmet-tcp: replace ida_simple[get|remove] with the
 simler ida_[alloc|free]

ida_simple_[get|remove] are wrappers anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/tcp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 7c1c43ce466bc..83ca577f72be2 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1473,7 +1473,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
 	nvmet_tcp_free_cmds(queue);
 	if (queue->hdr_digest || queue->data_digest)
 		nvmet_tcp_free_crypto(queue);
-	ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+	ida_free(&nvmet_tcp_queue_ida, queue->idx);
 
 	page = virt_to_head_page(queue->pf_cache.va);
 	__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
@@ -1613,7 +1613,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 	init_llist_head(&queue->resp_list);
 	INIT_LIST_HEAD(&queue->resp_send_list);
 
-	queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL);
+	queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL);
 	if (queue->idx < 0) {
 		ret = queue->idx;
 		goto out_free_queue;
@@ -1646,7 +1646,7 @@ out_destroy_sq:
 out_free_connect:
 	nvmet_tcp_free_cmd(&queue->connect);
 out_ida_remove:
-	ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);
+	ida_free(&nvmet_tcp_queue_ida, queue->idx);
 out_free_queue:
 	kfree(queue);
 	return ret;
-- 
GitLab


From 4686af885a9168f9ec70c4063616640911c48b03 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Wed, 9 Feb 2022 10:54:49 +0200
Subject: [PATCH 0800/1586] nvme-rdma: add helpers for mapping/unmapping
 request

Introduce nvme_rdma_dma_map_req/nvme_rdma_dma_unmap_req helper functions
to improve code readability and ease on the error flow.

Reviewed-by: Israel Rukshin <israelr@nvidia.com>
Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 111 +++++++++++++++++++++++----------------
 1 file changed, 65 insertions(+), 46 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 850f84d204d05..14ec2c85db060 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1282,6 +1282,22 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
 	return ib_post_send(queue->qp, &wr, NULL);
 }
 
+static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+
+	if (blk_integrity_rq(rq)) {
+		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+				req->metadata_sgl->nents, rq_dma_dir(rq));
+		sg_free_table_chained(&req->metadata_sgl->sg_table,
+				      NVME_INLINE_METADATA_SG_CNT);
+	}
+
+	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+			rq_dma_dir(rq));
+	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+}
+
 static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 		struct request *rq)
 {
@@ -1293,13 +1309,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 	if (!blk_rq_nr_phys_segments(rq))
 		return;
 
-	if (blk_integrity_rq(rq)) {
-		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
-				req->metadata_sgl->nents, rq_dma_dir(rq));
-		sg_free_table_chained(&req->metadata_sgl->sg_table,
-				      NVME_INLINE_METADATA_SG_CNT);
-	}
-
 	if (req->use_sig_mr)
 		pool = &queue->qp->sig_mrs;
 
@@ -1308,9 +1317,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
 		req->mr = NULL;
 	}
 
-	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
-			rq_dma_dir(rq));
-	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+	nvme_rdma_dma_unmap_req(ibdev, rq);
 }
 
 static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1521,22 +1528,11 @@ mr_put:
 	return -EINVAL;
 }
 
-static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
-		struct request *rq, struct nvme_command *c)
+static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
+		int *count, int *pi_count)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_rdma_device *dev = queue->device;
-	struct ib_device *ibdev = dev->dev;
-	int pi_count = 0;
-	int count, ret;
-
-	req->num_sge = 1;
-	refcount_set(&req->ref, 2); /* send and recv completions */
-
-	c->common.flags |= NVME_CMD_SGL_METABUF;
-
-	if (!blk_rq_nr_phys_segments(rq))
-		return nvme_rdma_set_sg_null(c);
+	int ret;
 
 	req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
 	ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
@@ -1548,9 +1544,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
 					    req->data_sgl.sg_table.sgl);
 
-	count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
-			      req->data_sgl.nents, rq_dma_dir(rq));
-	if (unlikely(count <= 0)) {
+	*count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
+			       req->data_sgl.nents, rq_dma_dir(rq));
+	if (unlikely(*count <= 0)) {
 		ret = -EIO;
 		goto out_free_table;
 	}
@@ -1569,16 +1565,50 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
 		req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
 				rq->bio, req->metadata_sgl->sg_table.sgl);
-		pi_count = ib_dma_map_sg(ibdev,
-					 req->metadata_sgl->sg_table.sgl,
-					 req->metadata_sgl->nents,
-					 rq_dma_dir(rq));
-		if (unlikely(pi_count <= 0)) {
+		*pi_count = ib_dma_map_sg(ibdev,
+					  req->metadata_sgl->sg_table.sgl,
+					  req->metadata_sgl->nents,
+					  rq_dma_dir(rq));
+		if (unlikely(*pi_count <= 0)) {
 			ret = -EIO;
 			goto out_free_pi_table;
 		}
 	}
 
+	return 0;
+
+out_free_pi_table:
+	sg_free_table_chained(&req->metadata_sgl->sg_table,
+			      NVME_INLINE_METADATA_SG_CNT);
+out_unmap_sg:
+	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+			rq_dma_dir(rq));
+out_free_table:
+	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+	return ret;
+}
+
+static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
+		struct request *rq, struct nvme_command *c)
+{
+	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_rdma_device *dev = queue->device;
+	struct ib_device *ibdev = dev->dev;
+	int pi_count = 0;
+	int count, ret;
+
+	req->num_sge = 1;
+	refcount_set(&req->ref, 2); /* send and recv completions */
+
+	c->common.flags |= NVME_CMD_SGL_METABUF;
+
+	if (!blk_rq_nr_phys_segments(rq))
+		return nvme_rdma_set_sg_null(c);
+
+	ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count);
+	if (unlikely(ret))
+		return ret;
+
 	if (req->use_sig_mr) {
 		ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
 		goto out;
@@ -1602,23 +1632,12 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	ret = nvme_rdma_map_sg_fr(queue, req, c, count);
 out:
 	if (unlikely(ret))
-		goto out_unmap_pi_sg;
+		goto out_dma_unmap_req;
 
 	return 0;
 
-out_unmap_pi_sg:
-	if (blk_integrity_rq(rq))
-		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
-				req->metadata_sgl->nents, rq_dma_dir(rq));
-out_free_pi_table:
-	if (blk_integrity_rq(rq))
-		sg_free_table_chained(&req->metadata_sgl->sg_table,
-				      NVME_INLINE_METADATA_SG_CNT);
-out_unmap_sg:
-	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
-			rq_dma_dir(rq));
-out_free_table:
-	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
+out_dma_unmap_req:
+	nvme_rdma_dma_unmap_req(ibdev, rq);
 	return ret;
 }
 
-- 
GitLab


From 72e8b5cd7dd387ba3eee81b0a59746ad8ccdcb5f Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Thu, 10 Feb 2022 11:12:36 -0800
Subject: [PATCH 0801/1586] nvme: add a helper to initialize connect_q

Add and use helper to remove duplicate code for fabrics connect_q
initialization and error handling for all the transports.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c     | 6 ++----
 drivers/nvme/host/nvme.h   | 8 ++++++++
 drivers/nvme/host/rdma.c   | 6 ++----
 drivers/nvme/host/tcp.c    | 6 ++----
 drivers/nvme/target/loop.c | 6 ++----
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 96a5d7fc36f7e..080f85f4105f3 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2916,11 +2916,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.tagset = &ctrl->tag_set;
 
-	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-	if (IS_ERR(ctrl->ctrl.connect_q)) {
-		ret = PTR_ERR(ctrl->ctrl.connect_q);
+	ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+	if (ret)
 		goto out_free_tag_set;
-	}
 
 	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
 	if (ret)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a162f6c6da6e1..f8658f984d649 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -894,6 +894,14 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
 }
 #endif
 
+static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl)
+{
+	ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
+	if (IS_ERR(ctrl->connect_q))
+		return PTR_ERR(ctrl->connect_q);
+	return 0;
+}
+
 static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 {
 	return dev_to_disk(dev)->private_data;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 14ec2c85db060..c49b9c3c46f23 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -978,11 +978,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 			goto out_free_io_queues;
 		}
 
-		ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-		if (IS_ERR(ctrl->ctrl.connect_q)) {
-			ret = PTR_ERR(ctrl->ctrl.connect_q);
+		ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+		if (ret)
 			goto out_free_tag_set;
-		}
 	}
 
 	ret = nvme_rdma_start_io_queues(ctrl);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4ceb28675fdf6..6cbcc8b4daaf9 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1825,11 +1825,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
 			goto out_free_io_queues;
 		}
 
-		ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
-		if (IS_ERR(ctrl->connect_q)) {
-			ret = PTR_ERR(ctrl->connect_q);
+		ret = nvme_ctrl_init_connect_q(ctrl);
+		if (ret)
 			goto out_free_tag_set;
-		}
 	}
 
 	ret = nvme_tcp_start_io_queues(ctrl);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index eb1094254c823..23f9d6f888042 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -543,11 +543,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 	if (ret)
 		goto out_destroy_queues;
 
-	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-	if (IS_ERR(ctrl->ctrl.connect_q)) {
-		ret = PTR_ERR(ctrl->ctrl.connect_q);
+	ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl));
+	if (ret)
 		goto out_free_tagset;
-	}
 
 	ret = nvme_loop_connect_io_queues(ctrl);
 	if (ret)
-- 
GitLab


From bd83fe6f2cd2133beaac7c423fd36c3515048fc8 Mon Sep 17 00:00:00 2001
From: Alan Adamson <alan.adamson@oracle.com>
Date: Thu, 3 Feb 2022 00:11:53 -0800
Subject: [PATCH 0802/1586] nvme: add verbose error logging

Improves logging of NVMe errors.  If NVME_VERBOSE_ERRORS is configured,
a verbose description of the error is logged, otherwise only status
codes/bits is logged.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
[kch]: fix several nits, cosmetics, and trim down code.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Alan Adamson <alan.adamson@oracle.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/Kconfig     |   8 ++
 drivers/nvme/host/Makefile    |   2 +-
 drivers/nvme/host/constants.c | 185 ++++++++++++++++++++++++++++++++++
 drivers/nvme/host/core.c      |  33 ++++++
 drivers/nvme/host/nvme.h      |  19 ++++
 include/linux/nvme.h          |   1 +
 6 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100644 drivers/nvme/host/constants.c

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index dc0450ca23a30..d6d056963c062 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -24,6 +24,14 @@ config NVME_MULTIPATH
 	   /dev/nvmeXnY device will show up for each NVMe namespace,
 	   even if it is accessible through multiple controllers.
 
+config NVME_VERBOSE_ERRORS
+	bool "NVMe verbose error reporting"
+	depends on NVME_CORE
+	help
+	   This option enables verbose reporting for NVMe errors. The
+	   error translation table will grow the kernel image size by
+	   about 4 KB.
+
 config NVME_HWMON
 	bool "NVMe hardware monitoring"
 	depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index dfaacd472e5dd..476c5c9884961 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
 obj-$(CONFIG_NVME_FC)			+= nvme-fc.o
 obj-$(CONFIG_NVME_TCP)			+= nvme-tcp.o
 
-nvme-core-y				:= core.o ioctl.o
+nvme-core-y				:= core.o ioctl.o constants.o
 nvme-core-$(CONFIG_TRACING)		+= trace.o
 nvme-core-$(CONFIG_NVME_MULTIPATH)	+= multipath.o
 nvme-core-$(CONFIG_BLK_DEV_ZONED)	+= zns.o
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
new file mode 100644
index 0000000000000..7d49eb34b348e
--- /dev/null
+++ b/drivers/nvme/host/constants.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express device driver verbose errors
+ * Copyright (c) 2022, Oracle and/or its affiliates
+ */
+
+#include <linux/blkdev.h>
+#include "nvme.h"
+
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+static const char * const nvme_ops[] = {
+	[nvme_cmd_flush] = "Flush",
+	[nvme_cmd_write] = "Write",
+	[nvme_cmd_read] = "Read",
+	[nvme_cmd_write_uncor] = "Write Uncorrectable",
+	[nvme_cmd_compare] = "Compare",
+	[nvme_cmd_write_zeroes] = "Write Zeros",
+	[nvme_cmd_dsm] = "Dataset Management",
+	[nvme_cmd_verify] = "Verify",
+	[nvme_cmd_resv_register] = "Reservation Register",
+	[nvme_cmd_resv_report] = "Reservation Report",
+	[nvme_cmd_resv_acquire] = "Reservation Acquire",
+	[nvme_cmd_resv_release] = "Reservation Release",
+	[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
+	[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
+	[nvme_cmd_zone_append] = "Zone Management Append",
+};
+
+static const char * const nvme_admin_ops[] = {
+	[nvme_admin_delete_sq] = "Delete SQ",
+	[nvme_admin_create_sq] = "Create SQ",
+	[nvme_admin_get_log_page] = "Get Log Page",
+	[nvme_admin_delete_cq] = "Delete CQ",
+	[nvme_admin_create_cq] = "Create CQ",
+	[nvme_admin_identify] = "Identify",
+	[nvme_admin_abort_cmd] = "Abort Command",
+	[nvme_admin_set_features] = "Set Features",
+	[nvme_admin_get_features] = "Get Features",
+	[nvme_admin_async_event] = "Async Event",
+	[nvme_admin_ns_mgmt] = "Namespace Management",
+	[nvme_admin_activate_fw] = "Activate Firmware",
+	[nvme_admin_download_fw] = "Download Firmware",
+	[nvme_admin_dev_self_test] = "Device Self Test",
+	[nvme_admin_ns_attach] = "Namespace Attach",
+	[nvme_admin_keep_alive] = "Keep Alive",
+	[nvme_admin_directive_send] = "Directive Send",
+	[nvme_admin_directive_recv] = "Directive Receive",
+	[nvme_admin_virtual_mgmt] = "Virtual Management",
+	[nvme_admin_nvme_mi_send] = "NVMe Send MI",
+	[nvme_admin_nvme_mi_recv] = "NVMe Receive MI",
+	[nvme_admin_dbbuf] = "Doorbell Buffer Config",
+	[nvme_admin_format_nvm] = "Format NVM",
+	[nvme_admin_security_send] = "Security Send",
+	[nvme_admin_security_recv] = "Security Receive",
+	[nvme_admin_sanitize_nvm] = "Sanitize NVM",
+	[nvme_admin_get_lba_status] = "Get LBA Status",
+};
+
+static const char * const nvme_statuses[] = {
+	[NVME_SC_SUCCESS] = "Success",
+	[NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode",
+	[NVME_SC_INVALID_FIELD] = "Invalid Field in Command",
+	[NVME_SC_CMDID_CONFLICT] = "Command ID Conflict",
+	[NVME_SC_DATA_XFER_ERROR] = "Data Transfer Error",
+	[NVME_SC_POWER_LOSS] = "Commands Aborted due to Power Loss Notification",
+	[NVME_SC_INTERNAL] = "Internal Error",
+	[NVME_SC_ABORT_REQ] = "Command Abort Requested",
+	[NVME_SC_ABORT_QUEUE] = "Command Aborted due to SQ Deletion",
+	[NVME_SC_FUSED_FAIL] = "Command Aborted due to Failed Fused Command",
+	[NVME_SC_FUSED_MISSING] = "Command Aborted due to Missing Fused Command",
+	[NVME_SC_INVALID_NS] = "Invalid Namespace or Format",
+	[NVME_SC_CMD_SEQ_ERROR] = "Command Sequence Error",
+	[NVME_SC_SGL_INVALID_LAST] = "Invalid SGL Segment Descriptor",
+	[NVME_SC_SGL_INVALID_COUNT] = "Invalid Number of SGL Descriptors",
+	[NVME_SC_SGL_INVALID_DATA] = "Data SGL Length Invalid",
+	[NVME_SC_SGL_INVALID_METADATA] = "Metadata SGL Length Invalid",
+	[NVME_SC_SGL_INVALID_TYPE] = "SGL Descriptor Type Invalid",
+	[NVME_SC_CMB_INVALID_USE] = "Invalid Use of Controller Memory Buffer",
+	[NVME_SC_PRP_INVALID_OFFSET] = "PRP Offset Invalid",
+	[NVME_SC_ATOMIC_WU_EXCEEDED] = "Atomic Write Unit Exceeded",
+	[NVME_SC_OP_DENIED] = "Operation Denied",
+	[NVME_SC_SGL_INVALID_OFFSET] = "SGL Offset Invalid",
+	[NVME_SC_RESERVED] = "Reserved",
+	[NVME_SC_HOST_ID_INCONSIST] = "Host Identifier Inconsistent Format",
+	[NVME_SC_KA_TIMEOUT_EXPIRED] = "Keep Alive Timeout Expired",
+	[NVME_SC_KA_TIMEOUT_INVALID] = "Keep Alive Timeout Invalid",
+	[NVME_SC_ABORTED_PREEMPT_ABORT] = "Command Aborted due to Preempt and Abort",
+	[NVME_SC_SANITIZE_FAILED] = "Sanitize Failed",
+	[NVME_SC_SANITIZE_IN_PROGRESS] = "Sanitize In Progress",
+	[NVME_SC_SGL_INVALID_GRANULARITY] = "SGL Data Block Granularity Invalid",
+	[NVME_SC_CMD_NOT_SUP_CMB_QUEUE] = "Command Not Supported for Queue in CMB",
+	[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
+	[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
+	[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
+	[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
+	[NVME_SC_LBA_RANGE] = "LBA Out of Range",
+	[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
+	[NVME_SC_NS_NOT_READY] = "Namespace Not Ready",
+	[NVME_SC_RESERVATION_CONFLICT] = "Reservation Conflict",
+	[NVME_SC_FORMAT_IN_PROGRESS] = "Format In Progress",
+	[NVME_SC_CQ_INVALID] = "Completion Queue Invalid",
+	[NVME_SC_QID_INVALID] = "Invalid Queue Identifier",
+	[NVME_SC_QUEUE_SIZE] = "Invalid Queue Size",
+	[NVME_SC_ABORT_LIMIT] = "Abort Command Limit Exceeded",
+	[NVME_SC_ABORT_MISSING] = "Reserved", /* XXX */
+	[NVME_SC_ASYNC_LIMIT] = "Asynchronous Event Request Limit Exceeded",
+	[NVME_SC_FIRMWARE_SLOT] = "Invalid Firmware Slot",
+	[NVME_SC_FIRMWARE_IMAGE] = "Invalid Firmware Image",
+	[NVME_SC_INVALID_VECTOR] = "Invalid Interrupt Vector",
+	[NVME_SC_INVALID_LOG_PAGE] = "Invalid Log Page",
+	[NVME_SC_INVALID_FORMAT] = "Invalid Format",
+	[NVME_SC_FW_NEEDS_CONV_RESET] = "Firmware Activation Requires Conventional Reset",
+	[NVME_SC_INVALID_QUEUE] = "Invalid Queue Deletion",
+	[NVME_SC_FEATURE_NOT_SAVEABLE] = "Feature Identifier Not Saveable",
+	[NVME_SC_FEATURE_NOT_CHANGEABLE] = "Feature Not Changeable",
+	[NVME_SC_FEATURE_NOT_PER_NS] = "Feature Not Namespace Specific",
+	[NVME_SC_FW_NEEDS_SUBSYS_RESET] = "Firmware Activation Requires NVM Subsystem Reset",
+	[NVME_SC_FW_NEEDS_RESET] = "Firmware Activation Requires Reset",
+	[NVME_SC_FW_NEEDS_MAX_TIME] = "Firmware Activation Requires Maximum Time Violation",
+	[NVME_SC_FW_ACTIVATE_PROHIBITED] = "Firmware Activation Prohibited",
+	[NVME_SC_OVERLAPPING_RANGE] = "Overlapping Range",
+	[NVME_SC_NS_INSUFFICIENT_CAP] = "Namespace Insufficient Capacity",
+	[NVME_SC_NS_ID_UNAVAILABLE] = "Namespace Identifier Unavailable",
+	[NVME_SC_NS_ALREADY_ATTACHED] = "Namespace Already Attached",
+	[NVME_SC_NS_IS_PRIVATE] = "Namespace Is Private",
+	[NVME_SC_NS_NOT_ATTACHED] = "Namespace Not Attached",
+	[NVME_SC_THIN_PROV_NOT_SUPP] = "Thin Provisioning Not Supported",
+	[NVME_SC_CTRL_LIST_INVALID] = "Controller List Invalid",
+	[NVME_SC_SELT_TEST_IN_PROGRESS] = "Device Self-test In Progress",
+	[NVME_SC_BP_WRITE_PROHIBITED] = "Boot Partition Write Prohibited",
+	[NVME_SC_CTRL_ID_INVALID] = "Invalid Controller Identifier",
+	[NVME_SC_SEC_CTRL_STATE_INVALID] = "Invalid Secondary Controller State",
+	[NVME_SC_CTRL_RES_NUM_INVALID] = "Invalid Number of Controller Resources",
+	[NVME_SC_RES_ID_INVALID] = "Invalid Resource Identifier",
+	[NVME_SC_PMR_SAN_PROHIBITED] = "Sanitize Prohibited",
+	[NVME_SC_ANA_GROUP_ID_INVALID] = "ANA Group Identifier Invalid",
+	[NVME_SC_ANA_ATTACH_FAILED] = "ANA Attach Failed",
+	[NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes",
+	[NVME_SC_INVALID_PI] = "Invalid Protection Information",
+	[NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range",
+	[NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported",
+	[NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error",
+	[NVME_SC_ZONE_FULL] = "Zone Is Full",
+	[NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only",
+	[NVME_SC_ZONE_OFFLINE] = "Zone Is Offline",
+	[NVME_SC_ZONE_INVALID_WRITE] = "Zone Invalid Write",
+	[NVME_SC_ZONE_TOO_MANY_ACTIVE] = "Too Many Active Zones",
+	[NVME_SC_ZONE_TOO_MANY_OPEN] = "Too Many Open Zones",
+	[NVME_SC_ZONE_INVALID_TRANSITION] = "Invalid Zone State Transition",
+	[NVME_SC_WRITE_FAULT] = "Write Fault",
+	[NVME_SC_READ_ERROR] = "Unrecovered Read Error",
+	[NVME_SC_GUARD_CHECK] = "End-to-end Guard Check Error",
+	[NVME_SC_APPTAG_CHECK] = "End-to-end Application Tag Check Error",
+	[NVME_SC_REFTAG_CHECK] = "End-to-end Reference Tag Check Error",
+	[NVME_SC_COMPARE_FAILED] = "Compare Failure",
+	[NVME_SC_ACCESS_DENIED] = "Access Denied",
+	[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
+	[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
+	[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
+	[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
+	[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
+};
+
+const unsigned char *nvme_get_error_status_str(u16 status)
+{
+	status &= 0x7ff;
+	if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
+		return nvme_statuses[status & 0x7ff];
+	return "Unknown";
+}
+
+const unsigned char *nvme_get_opcode_str(u8 opcode)
+{
+	if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode])
+		return nvme_ops[opcode];
+	return "Unknown";
+}
+
+const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+{
+	if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode])
+		return nvme_admin_ops[opcode];
+	return "Unknown";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da0e19148177c..5e55732f3ea1e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -299,6 +299,37 @@ static void nvme_retry_req(struct request *req)
 	blk_mq_delay_kick_requeue_list(req->q, delay);
 }
 
+static void nvme_log_error(struct request *req)
+{
+	struct nvme_ns *ns = req->q->queuedata;
+	struct nvme_request *nr = nvme_req(req);
+
+	if (ns) {
+		pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
+		       ns->disk ? ns->disk->disk_name : "?",
+		       nvme_get_opcode_str(nr->cmd->common.opcode),
+		       nr->cmd->common.opcode,
+		       (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)),
+		       (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift,
+		       nvme_get_error_status_str(nr->status),
+		       nr->status >> 8 & 7,	/* Status Code Type */
+		       nr->status & 0xff,	/* Status Code */
+		       nr->status & NVME_SC_MORE ? "MORE " : "",
+		       nr->status & NVME_SC_DNR  ? "DNR "  : "");
+		return;
+	}
+
+	pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s\n",
+			   dev_name(nr->ctrl->device),
+			   nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+			   nr->cmd->common.opcode,
+			   nvme_get_error_status_str(nr->status),
+			   nr->status >> 8 & 7,	/* Status Code Type */
+			   nr->status & 0xff,	/* Status Code */
+			   nr->status & NVME_SC_MORE ? "MORE " : "",
+			   nr->status & NVME_SC_DNR  ? "DNR "  : "");
+}
+
 enum nvme_disposition {
 	COMPLETE,
 	RETRY,
@@ -339,6 +370,8 @@ static inline void nvme_end_req(struct request *req)
 {
 	blk_status_t status = nvme_error_status(nvme_req(req)->status);
 
+	if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS))
+		nvme_log_error(req);
 	nvme_end_req_zoned(req);
 	nvme_trace_bio_complete(req);
 	blk_mq_end_request(req, status);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f8658f984d649..08893c04ca898 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -938,4 +938,23 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
 	return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
 }
 
+#ifdef CONFIG_NVME_VERBOSE_ERRORS
+const unsigned char *nvme_get_error_status_str(u16 status);
+const unsigned char *nvme_get_opcode_str(u8 opcode);
+const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
+#else /* CONFIG_NVME_VERBOSE_ERRORS */
+static inline const unsigned char *nvme_get_error_status_str(u16 status)
+{
+	return "I/O Error";
+}
+static inline const unsigned char *nvme_get_opcode_str(u8 opcode)
+{
+	return "I/O Cmd";
+}
+static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+{
+	return "Admin Cmd";
+}
+#endif /* CONFIG_NVME_VERBOSE_ERRORS */
+
 #endif /* _NVME_H */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 855dd9b3e84be..1f946e5bf7c12 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1636,6 +1636,7 @@ enum {
 	NVME_SC_HOST_ABORTED_CMD	= 0x371,
 
 	NVME_SC_CRD			= 0x1800,
+	NVME_SC_MORE			= 0x2000,
 	NVME_SC_DNR			= 0x4000,
 };
 
-- 
GitLab


From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Thu, 10 Feb 2022 11:07:55 +0530
Subject: [PATCH 0803/1586] nvme: add vectored-io support for user-passthrough

Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing
NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs
and thus supports vectored I/O.

  - cmd.addr is base address of user iovec array
  - cmd.vec_cnt is count of iovec array elements

This patch does not include vectored-variant for admin-commands as most
of them are light on buffers and likely to have low invocation frequency.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/ioctl.c       | 35 ++++++++++++++++++++++++---------
 include/uapi/linux/nvme_ioctl.h |  6 +++++-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 22314962842da..aaf3dfad26574 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -56,7 +56,7 @@ out:
 static int nvme_submit_user_cmd(struct request_queue *q,
 		struct nvme_command *cmd, void __user *ubuffer,
 		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
-		u32 meta_seed, u64 *result, unsigned timeout)
+		u32 meta_seed, u64 *result, unsigned timeout, bool vec)
 {
 	bool write = nvme_is_write(cmd);
 	struct nvme_ns *ns = q->queuedata;
@@ -75,8 +75,22 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 	nvme_req(req)->flags |= NVME_REQ_USERCMD;
 
 	if (ubuffer && bufflen) {
-		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+		if (!vec)
+			ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
 				GFP_KERNEL);
+		else {
+			struct iovec fast_iov[UIO_FASTIOV];
+			struct iovec *iov = fast_iov;
+			struct iov_iter iter;
+
+			ret = import_iovec(rq_data_dir(req), ubuffer, bufflen,
+					UIO_FASTIOV, &iov, &iter);
+			if (ret < 0)
+				goto out;
+			ret = blk_rq_map_user_iov(q, req, NULL, &iter,
+					GFP_KERNEL);
+			kfree(iov);
+		}
 		if (ret)
 			goto out;
 		bio = req->bio;
@@ -170,7 +184,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
 	return nvme_submit_user_cmd(ns->queue, &c,
 			nvme_to_user_ptr(io.addr), length,
-			metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
+			metadata, meta_len, lower_32_bits(io.slba), NULL, 0,
+			false);
 }
 
 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
@@ -224,7 +239,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			nvme_to_user_ptr(cmd.addr), cmd.data_len,
 			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
-			0, &result, timeout);
+			0, &result, timeout, false);
 
 	if (status >= 0) {
 		if (put_user(result, &ucmd->result))
@@ -235,7 +250,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 }
 
 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-			struct nvme_passthru_cmd64 __user *ucmd)
+			struct nvme_passthru_cmd64 __user *ucmd, bool vec)
 {
 	struct nvme_passthru_cmd64 cmd;
 	struct nvme_command c;
@@ -270,7 +285,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			nvme_to_user_ptr(cmd.addr), cmd.data_len,
 			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
-			0, &cmd.result, timeout);
+			0, &cmd.result, timeout, vec);
 
 	if (status >= 0) {
 		if (put_user(cmd.result, &ucmd->result))
@@ -296,7 +311,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
 	case NVME_IOCTL_ADMIN_CMD:
 		return nvme_user_cmd(ctrl, NULL, argp);
 	case NVME_IOCTL_ADMIN64_CMD:
-		return nvme_user_cmd64(ctrl, NULL, argp);
+		return nvme_user_cmd64(ctrl, NULL, argp, false);
 	default:
 		return sed_ioctl(ctrl->opal_dev, cmd, argp);
 	}
@@ -340,7 +355,9 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, argp);
 	case NVME_IOCTL_IO64_CMD:
-		return nvme_user_cmd64(ns->ctrl, ns, argp);
+		return nvme_user_cmd64(ns->ctrl, ns, argp, false);
+	case NVME_IOCTL_IO64_CMD_VEC:
+		return nvme_user_cmd64(ns->ctrl, ns, argp, true);
 	default:
 		return -ENOTTY;
 	}
@@ -480,7 +497,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 	case NVME_IOCTL_ADMIN_CMD:
 		return nvme_user_cmd(ctrl, NULL, argp);
 	case NVME_IOCTL_ADMIN64_CMD:
-		return nvme_user_cmd64(ctrl, NULL, argp);
+		return nvme_user_cmd64(ctrl, NULL, argp, false);
 	case NVME_IOCTL_IO_CMD:
 		return nvme_dev_user_cmd(ctrl, argp);
 	case NVME_IOCTL_RESET:
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index d99b5a7726980..b2e43185e3b55 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 {
 	__u64	metadata;
 	__u64	addr;
 	__u32	metadata_len;
-	__u32	data_len;
+	union {
+		__u32	data_len; /* for non-vectored io */
+		__u32	vec_cnt; /* for vectored io */
+	};
 	__u32	cdw10;
 	__u32	cdw11;
 	__u32	cdw12;
@@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 {
 #define NVME_IOCTL_RESCAN	_IO('N', 0x46)
 #define NVME_IOCTL_ADMIN64_CMD	_IOWR('N', 0x47, struct nvme_passthru_cmd64)
 #define NVME_IOCTL_IO64_CMD	_IOWR('N', 0x48, struct nvme_passthru_cmd64)
+#define NVME_IOCTL_IO64_CMD_VEC	_IOWR('N', 0x49, struct nvme_passthru_cmd64)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
GitLab


From 20d64911e7580f7e29c0086d67860c18307377d7 Mon Sep 17 00:00:00 2001
From: Martin Belanger <martin.belanger@dell.com>
Date: Tue, 8 Feb 2022 14:33:45 -0500
Subject: [PATCH 0804/1586] nvme: send uevent on connection up

When connectivity with a controller is lost, the driver will keep
trying to reconnect once every 10 sec. When connection is restored,
user-space apps need to be informed so that they can take proper
action. For example, TP8010 introduces the DIM PDU, which is used to
register with a discovery controller (DC). The DIM PDU is sent from
user-space.  The DIM PDU must be sent every time a connection is
established with a DC. Therefore, the kernel must tell user-space apps
when connection is restored so that registration can happen.

The uevent sent is a "change" uevent with environmental data
set to: "NVME_EVENT=connected".

Signed-off-by: Martin Belanger <martin.belanger@dell.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5e55732f3ea1e..b3e70bb2e6dc5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4262,6 +4262,13 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return ret;
 }
 
+static void nvme_change_uevent(struct nvme_ctrl *ctrl, char *envdata)
+{
+	char *envp[2] = { envdata, NULL };
+
+	kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
+}
+
 static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
 {
 	char *envp[2] = { NULL, NULL };
@@ -4429,6 +4436,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
 		nvme_queue_scan(ctrl);
 		nvme_start_queues(ctrl);
 	}
+
+	nvme_change_uevent(ctrl, "NVME_EVENT=connected");
 }
 EXPORT_SYMBOL_GPL(nvme_start_ctrl);
 
-- 
GitLab


From 86c2457a8e8112f16af8fd10a3e1dd7a302c3c3e Mon Sep 17 00:00:00 2001
From: Martin Belanger <martin.belanger@dell.com>
Date: Tue, 8 Feb 2022 14:33:46 -0500
Subject: [PATCH 0805/1586] nvme: expose cntrltype and dctype through sysfs

TP8010 introduces the Discovery Controller Type attribute (dctype).
The dctype is returned in the response to the Identify command. This
patch exposes the dctype through the sysfs. Since the dctype depends on
the Controller Type (cntrltype), another attribute of the Identify
response, the patch also exposes the cntrltype as well. The dctype will
only be displayed for discovery controllers.

A note about the naming of this attribute:
Although TP8010 calls this attribute the Discovery Controller Type,
note that the dctype is now part of the response to the Identify
command for all controller types. I/O, Discovery, and Admin controllers
all share the same Identify response PDU structure. Non-discovery
controllers as well as pre-TP8010 discovery controllers will continue
to set this field to 0 (which has always been the default for reserved
bytes). Per TP8010, the value 0 now means "Discovery controller type is
not reported" instead of "Reserved". One could argue that this
definition is correct even for non-discovery controllers, and by
extension, exposing it in the sysfs for non-discovery controllers is
appropriate.

Signed-off-by: Martin Belanger <martin.belanger@dell.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  3 +++
 include/linux/nvme.h     | 10 +++++++++-
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b3e70bb2e6dc5..52a49ae5b6730 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3023,6 +3023,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->max_namespaces = le32_to_cpu(id->mnan);
 	ctrl->ctratt = le32_to_cpu(id->ctratt);
 
+	ctrl->cntrltype = id->cntrltype;
+	ctrl->dctype = id->dctype;
+
 	if (id->rtd3e) {
 		/* us -> s */
 		u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC;
@@ -3556,6 +3559,40 @@ static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev,
 static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
 	nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store);
 
+static ssize_t cntrltype_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	static const char * const type[] = {
+		[NVME_CTRL_IO] = "io\n",
+		[NVME_CTRL_DISC] = "discovery\n",
+		[NVME_CTRL_ADMIN] = "admin\n",
+	};
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype])
+		return sysfs_emit(buf, "reserved\n");
+
+	return sysfs_emit(buf, type[ctrl->cntrltype]);
+}
+static DEVICE_ATTR_RO(cntrltype);
+
+static ssize_t dctype_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	static const char * const type[] = {
+		[NVME_DCTYPE_NOT_REPORTED] = "none\n",
+		[NVME_DCTYPE_DDC] = "ddc\n",
+		[NVME_DCTYPE_CDC] = "cdc\n",
+	};
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype])
+		return sysfs_emit(buf, "reserved\n");
+
+	return sysfs_emit(buf, type[ctrl->dctype]);
+}
+static DEVICE_ATTR_RO(dctype);
+
 static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reset_controller.attr,
 	&dev_attr_rescan_controller.attr,
@@ -3577,6 +3614,8 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reconnect_delay.attr,
 	&dev_attr_fast_io_fail_tmo.attr,
 	&dev_attr_kato.attr,
+	&dev_attr_cntrltype.attr,
+	&dev_attr_dctype.attr,
 	NULL
 };
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 08893c04ca898..4ce48203e63f8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -349,6 +349,9 @@ struct nvme_ctrl {
 	unsigned long discard_page_busy;
 
 	struct nvme_fault_inject fault_inject;
+
+	enum nvme_ctrl_type cntrltype;
+	enum nvme_dctype dctype;
 };
 
 enum nvme_iopolicy {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1f946e5bf7c12..9dbc3ef4daf7c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -43,6 +43,12 @@ enum nvme_ctrl_type {
 	NVME_CTRL_ADMIN	= 3,		/* Administrative controller */
 };
 
+enum nvme_dctype {
+	NVME_DCTYPE_NOT_REPORTED	= 0,
+	NVME_DCTYPE_DDC			= 1, /* Direct Discovery Controller */
+	NVME_DCTYPE_CDC			= 2, /* Central Discovery Controller */
+};
+
 /* Address Family codes for Discovery Log Page entry ADRFAM field */
 enum {
 	NVMF_ADDR_FAMILY_PCI	= 0,	/* PCIe */
@@ -320,7 +326,9 @@ struct nvme_id_ctrl {
 	__le16			icdoff;
 	__u8			ctrattr;
 	__u8			msdbd;
-	__u8			rsvd1804[244];
+	__u8			rsvd1804[2];
+	__u8			dctype;
+	__u8			rsvd1807[241];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
-- 
GitLab


From 1c3adf0de1db86cf354dcb1a2dd1184e5e63a50a Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 15 Feb 2022 07:03:08 -0800
Subject: [PATCH 0806/1586] nvme: explicitly set non-error for directives

Stream directives is an optional feature. It is not an error if a
controller doesn't support as many as the kernel can optionally use.
Explicitly set the non-error return value on this condition with a
comment explaining why.

Note, the return value was already 0 in this condition, so the setting
is redundant. This patch should just silence bots that falsely believe
the condition contains an error omission.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 52a49ae5b6730..1ce579d583c06 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -809,6 +809,8 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
 	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
 		dev_info(ctrl->device, "too few streams (%u) available\n",
 					ctrl->nssa);
+		/* this condition is not an error: streams are optional */
+		ret = 0;
 		goto out_disable_stream;
 	}
 
-- 
GitLab


From 0a9f850061d9126b9a4aaf56ae1810138cac0f51 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 15 Feb 2022 07:37:19 -0800
Subject: [PATCH 0807/1586] nvme: remove nssa from struct nvme_ctrl

The reported number of streams is not used outside the function that
gets it, so no need to stash it in the controller structure. Use a local
variable instead.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 9 +++++----
 drivers/nvme/host/nvme.h | 1 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1ce579d583c06..9cffc4770e737 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -790,6 +790,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 static int nvme_configure_directives(struct nvme_ctrl *ctrl)
 {
 	struct streams_directive_params s;
+	u16 nssa;
 	int ret;
 
 	if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
@@ -805,16 +806,16 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
 	if (ret)
 		goto out_disable_stream;
 
-	ctrl->nssa = le16_to_cpu(s.nssa);
-	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+	nssa = le16_to_cpu(s.nssa);
+	if (nssa < BLK_MAX_WRITE_HINTS - 1) {
 		dev_info(ctrl->device, "too few streams (%u) available\n",
-					ctrl->nssa);
+					nssa);
 		/* this condition is not an error: streams are optional */
 		ret = 0;
 		goto out_disable_stream;
 	}
 
-	ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+	ctrl->nr_streams = min_t(u16, nssa, BLK_MAX_WRITE_HINTS - 1);
 	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
 	return 0;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4ce48203e63f8..587d92df118b7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,7 +280,6 @@ struct nvme_ctrl {
 	u16 crdt[3];
 	u16 oncs;
 	u16 oacs;
-	u16 nssa;
 	u16 nr_streams;
 	u16 sqsize;
 	u32 max_namespaces;
-- 
GitLab


From fd8099e7918cd2df39ef306dd1d1af7178a15b81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Feb 2022 10:57:15 +0100
Subject: [PATCH 0808/1586] nvme: cleanup __nvme_check_ids

Pass the actual nvme_ns_ids used for the comparison instead of the
ns_head that isn't needed and use a more descriptive function name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9cffc4770e737..076a03b801b7e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3673,16 +3673,15 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
 	return NULL;
 }
 
-static int __nvme_check_ids(struct nvme_subsystem *subsys,
-		struct nvme_ns_head *new)
+static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys,
+		struct nvme_ns_ids *ids)
 {
 	struct nvme_ns_head *h;
 
 	lockdep_assert_held(&subsys->lock);
 
 	list_for_each_entry(h, &subsys->nsheads, entry) {
-		if (nvme_ns_ids_valid(&new->ids) &&
-		    nvme_ns_ids_equal(&new->ids, &h->ids))
+		if (nvme_ns_ids_valid(ids) && nvme_ns_ids_equal(ids, &h->ids))
 			return -EINVAL;
 	}
 
@@ -3781,7 +3780,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->ids = *ids;
 	kref_init(&head->ref);
 
-	ret = __nvme_check_ids(ctrl->subsys, head);
+	ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids);
 	if (ret) {
 		dev_err(ctrl->device,
 			"duplicate IDs for nsid %d\n", nsid);
-- 
GitLab


From e2724cb9f0c406b8fb66efd3aa9e8b3edfd8d5c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Feb 2022 11:32:58 +0100
Subject: [PATCH 0809/1586] nvme: fix the check for duplicate unique
 identifiers

nvme_subsys_check_duplicate_ids should needs to return an error if any of
the identifiers matches, not just if all of them match.  But it does not
need to and should not look at the CSI value for this sanity check.

Rewrite the logic to be separate from nvme_ns_ids_equal and optimize it
by reducing duplicate checks for non-present identifiers.

Fixes: ed754e5deeb1 ("nvme: track shared namespaces")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 076a03b801b7e..3e6ac985b24f6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1716,13 +1716,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
 		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
 }
 
-static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
-{
-	return !uuid_is_null(&ids->uuid) ||
-		memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) ||
-		memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
-}
-
 static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 {
 	return uuid_equal(&a->uuid, &b->uuid) &&
@@ -3676,12 +3669,21 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
 static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys,
 		struct nvme_ns_ids *ids)
 {
+	bool has_uuid = !uuid_is_null(&ids->uuid);
+	bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid));
+	bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
 	struct nvme_ns_head *h;
 
 	lockdep_assert_held(&subsys->lock);
 
 	list_for_each_entry(h, &subsys->nsheads, entry) {
-		if (nvme_ns_ids_valid(ids) && nvme_ns_ids_equal(ids, &h->ids))
+		if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid))
+			return -EINVAL;
+		if (has_nguid &&
+		    memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0)
+			return -EINVAL;
+		if (has_eui64 &&
+		    memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0)
 			return -EINVAL;
 	}
 
-- 
GitLab


From e2d77d2e11c4f1e70a1a24cc8fe63ff3dc9b53ef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Feb 2022 17:46:50 +0100
Subject: [PATCH 0810/1586] nvme: check for duplicate identifiers earlier

Lift the check for duplicate identifiers into nvme_init_ns_head, which
avoids pointless error unwinding in case they don't match, and also
matches where we check identifier validity for the multipath case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3e6ac985b24f6..cece987ba1691 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3782,13 +3782,6 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->ids = *ids;
 	kref_init(&head->ref);
 
-	ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids);
-	if (ret) {
-		dev_err(ctrl->device,
-			"duplicate IDs for nsid %d\n", nsid);
-		goto out_cleanup_srcu;
-	}
-
 	if (head->ids.csi) {
 		ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects);
 		if (ret)
@@ -3827,6 +3820,12 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 	mutex_lock(&ctrl->subsys->lock);
 	head = nvme_find_ns_head(ctrl->subsys, nsid);
 	if (!head) {
+		ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
+		if (ret) {
+			dev_err(ctrl->device,
+				"duplicate IDs for nsid %d\n", nsid);
+			goto out_unlock;
+		}
 		head = nvme_alloc_ns_head(ctrl, nsid, ids);
 		if (IS_ERR(head)) {
 			ret = PTR_ERR(head);
-- 
GitLab


From 2079f41ec6ffaad9aa51ca550105b2228467aec7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Feb 2022 17:48:32 +0100
Subject: [PATCH 0811/1586] nvme: check that EUI/GUID/UUID are globally unique

Add a check to verify that the unique identifiers are unique globally
in addition to the existing check that verifies that they are unique
inside a single subsystem.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cece987ba1691..f8084ded69e50 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3810,12 +3810,45 @@ out:
 	return ERR_PTR(ret);
 }
 
+static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
+		struct nvme_ns_ids *ids)
+{
+	struct nvme_subsystem *s;
+	int ret = 0;
+
+	/*
+	 * Note that this check is racy as we try to avoid holding the global
+	 * lock over the whole ns_head creation.  But it is only intended as
+	 * a sanity check anyway.
+	 */
+	mutex_lock(&nvme_subsystems_lock);
+	list_for_each_entry(s, &nvme_subsystems, entry) {
+		if (s == this)
+			continue;
+		mutex_lock(&s->lock);
+		ret = nvme_subsys_check_duplicate_ids(s, ids);
+		mutex_unlock(&s->lock);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&nvme_subsystems_lock);
+
+	return ret;
+}
+
 static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 		struct nvme_ns_ids *ids, bool is_shared)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_ns_head *head = NULL;
-	int ret = 0;
+	int ret;
+
+	ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids);
+	if (ret) {
+		dev_err(ctrl->device,
+			"globally duplicate IDs for nsid %d\n", nsid);
+		return ret;
+	}
 
 	mutex_lock(&ctrl->subsys->lock);
 	head = nvme_find_ns_head(ctrl->subsys, nsid);
@@ -3823,7 +3856,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 		ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids);
 		if (ret) {
 			dev_err(ctrl->device,
-				"duplicate IDs for nsid %d\n", nsid);
+				"duplicate IDs in subsystem for nsid %d\n",
+				nsid);
 			goto out_unlock;
 		}
 		head = nvme_alloc_ns_head(ctrl, nsid, ids);
-- 
GitLab


From 78e27f970f73a4ee57dc050a6233e09a56963391 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:40 +0200
Subject: [PATCH 0812/1586] spi: pxa2xx-pci: Refactor CE4100 to use ->setup()

Refactor CE4100 handling code to use ->setup() instead of spreading
potentially confusing conditional.

Besides that, it will allow to refactor further to avoid intermediate
storage for the used configuration parameters.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 6d60972e4e207..bd20379d93424 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -30,7 +30,7 @@ enum {
 struct pxa_spi_info {
 	enum pxa_ssp_type type;
 	int port_id;
-	int num_chipselect;
+	unsigned int num_chipselect;
 	unsigned long max_clk_rate;
 
 	/* DMA channel request parameters */
@@ -114,6 +114,14 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
+static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+{
+	c->num_chipselect = dev->devfn;
+	c->max_clk_rate = 3686400;
+
+	return 0;
+}
+
 static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
 	struct dw_dma_slave *tx, *rx;
@@ -163,8 +171,7 @@ static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_CE4100] = {
 		.type = PXA25x_SSP,
 		.port_id =  -1,
-		.num_chipselect = -1,
-		.max_clk_rate = 3686400,
+		.setup = ce4100_spi_setup,
 	},
 	[PORT_BYT] = {
 		.type = LPSS_BYT_SSP,
@@ -248,7 +255,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	}
 
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
-	spi_pdata.num_chipselect = (c->num_chipselect > 0) ? c->num_chipselect : dev->devfn;
+	spi_pdata.num_chipselect = c->num_chipselect;
 	spi_pdata.dma_filter = c->dma_filter;
 	spi_pdata.tx_param = c->tx_param;
 	spi_pdata.rx_param = c->rx_param;
-- 
GitLab


From 71ea0e3ac70a50b0c56105e116ed903f8e504e8f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:41 +0200
Subject: [PATCH 0813/1586] spi: pxa2xx-pci: Refactor Quark X1000 to use
 ->setup()

Refactor Quark X1000 handling code to use ->setup() instead of using
the configuration data structure directly.

It will allow to refactor further to avoid intermediate storage for
the used configuration parameters.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index bd20379d93424..4d617ad72bcae 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -167,6 +167,14 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
+static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+{
+	c->num_chipselect = 1;
+	c->max_clk_rate = 50000000;
+
+	return 0;
+}
+
 static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_CE4100] = {
 		.type = PXA25x_SSP,
@@ -209,8 +217,7 @@ static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_QUARK_X1000] = {
 		.type = QUARK_X1000_SSP,
 		.port_id = -1,
-		.num_chipselect = 1,
-		.max_clk_rate = 50000000,
+		.setup = qrk_spi_setup,
 	},
 	[PORT_LPT0] = {
 		.type = LPSS_LPT_SSP,
-- 
GitLab


From 1d9d62959f1b52eb939df38b9fda8beea455c751 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:42 +0200
Subject: [PATCH 0814/1586] spi: pxa2xx-pci: Drop redundant NULL check in
 ->probe()

Since all platforms are using ->setup() function, drop unneeded check.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 4d617ad72bcae..90b95e49a164f 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -255,11 +255,9 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 		return ret;
 
 	c = &spi_info_configs[ent->driver_data];
-	if (c->setup) {
-		ret = c->setup(dev, c);
-		if (ret)
-			return ret;
-	}
+	ret = c->setup(dev, c);
+	if (ret)
+		return ret;
 
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
 	spi_pdata.num_chipselect = c->num_chipselect;
-- 
GitLab


From 108607ce4e39a51caca51aa97c44c31041a597d1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:43 +0200
Subject: [PATCH 0815/1586] spi: pxa2xx-pci: Move port_id assignment to
 ->setup()

Instead of using conditional, move port_id to the corresponding
->setup() functions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 90b95e49a164f..87629da3e544d 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -29,7 +29,7 @@ enum {
 
 struct pxa_spi_info {
 	enum pxa_ssp_type type;
-	int port_id;
+	unsigned int port_id;
 	unsigned int num_chipselect;
 	unsigned long max_clk_rate;
 
@@ -116,6 +116,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 
 static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
+	c->port_id = dev->devfn;
 	c->num_chipselect = dev->devfn;
 	c->max_clk_rate = 3686400;
 
@@ -169,6 +170,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 
 static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
+	c->port_id = dev->devfn;
 	c->num_chipselect = 1;
 	c->max_clk_rate = 50000000;
 
@@ -178,7 +180,6 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_CE4100] = {
 		.type = PXA25x_SSP,
-		.port_id =  -1,
 		.setup = ce4100_spi_setup,
 	},
 	[PORT_BYT] = {
@@ -216,7 +217,6 @@ static struct pxa_spi_info spi_info_configs[] = {
 	},
 	[PORT_QUARK_X1000] = {
 		.type = QUARK_X1000_SSP,
-		.port_id = -1,
 		.setup = qrk_spi_setup,
 	},
 	[PORT_LPT0] = {
@@ -271,8 +271,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	ssp->dev = &dev->dev;
 	ssp->phys_base = pci_resource_start(dev, 0);
 	ssp->mmio_base = pcim_iomap_table(dev)[0];
-	ssp->port_id = (c->port_id >= 0) ? c->port_id : dev->devfn;
 	ssp->type = c->type;
+	ssp->port_id = c->port_id;
 
 	pci_set_master(dev);
 
-- 
GitLab


From bd2e24de10da015147b02f8c2c4b8ebea8fa9574 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:44 +0200
Subject: [PATCH 0816/1586] spi: pxa2xx-pci: Move dma_burst_size assignment to
 ->setup()

Instead of using conditional, move dma_burst_size to the corresponding
->setup() function.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 87629da3e544d..c2cbb002784ac 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -38,7 +38,7 @@ struct pxa_spi_info {
 	void *tx_param;
 	void *rx_param;
 
-	int dma_burst_size;
+	unsigned int dma_burst_size;
 
 	int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c);
 };
@@ -111,6 +111,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	}
 
 	c->dma_filter = lpss_dma_filter;
+	c->dma_burst_size = 1;
 	return 0;
 }
 
@@ -265,7 +266,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	spi_pdata.tx_param = c->tx_param;
 	spi_pdata.rx_param = c->rx_param;
 	spi_pdata.enable_dma = c->rx_param && c->tx_param;
-	spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1;
+	spi_pdata.dma_burst_size = c->dma_burst_size;
 
 	ssp = &spi_pdata.ssp;
 	ssp->dev = &dev->dev;
-- 
GitLab


From 03f8e04e9f9be8d28c52ae801f37d49988f02ce4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:45 +0200
Subject: [PATCH 0817/1586] spi: pxa2xx-pci: Move max_clk_rate assignment to
 ->setup()

Move max_clk_rate to the corresponding ->setup() function to unify
with the rest.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index c2cbb002784ac..5ac1487c9b3f5 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -153,6 +153,8 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 		return -ENODEV;
 	}
 
+	c->max_clk_rate = 25000000;
+
 	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
 	ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
 	if (ret)
@@ -213,7 +215,6 @@ static struct pxa_spi_info spi_info_configs[] = {
 	},
 	[PORT_MRFLD] = {
 		.type = MRFLD_SSP,
-		.max_clk_rate = 25000000,
 		.setup = mrfld_spi_setup,
 	},
 	[PORT_QUARK_X1000] = {
-- 
GitLab


From 7e425c3c3d15241aa5b6c442a83f11b8bc4fee91 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:46 +0200
Subject: [PATCH 0818/1586] spi: pxa2xx-pci: Replace enum with direct use of
 PCI IDs

Instead of creating an abstraction on top of PCI IDs, just use them directly.
The corresponding enum can be dropped.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 161 ++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 78 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 5ac1487c9b3f5..a0f24e811e9fe 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -15,17 +15,17 @@
 #include <linux/dmaengine.h>
 #include <linux/platform_data/dma-dw.h>
 
-enum {
-	PORT_QUARK_X1000,
-	PORT_BYT,
-	PORT_MRFLD,
-	PORT_BSW0,
-	PORT_BSW1,
-	PORT_BSW2,
-	PORT_CE4100,
-	PORT_LPT0,
-	PORT_LPT1,
-};
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000		0x0935
+#define PCI_DEVICE_ID_INTEL_BYT			0x0f0e
+#define PCI_DEVICE_ID_INTEL_MRFLD		0x1194
+#define PCI_DEVICE_ID_INTEL_BSW0		0x228e
+#define PCI_DEVICE_ID_INTEL_BSW1		0x2290
+#define PCI_DEVICE_ID_INTEL_BSW2		0x22ac
+#define PCI_DEVICE_ID_INTEL_CE4100		0x2e6a
+#define PCI_DEVICE_ID_INTEL_LPT0_0		0x9c65
+#define PCI_DEVICE_ID_INTEL_LPT0_1		0x9c66
+#define PCI_DEVICE_ID_INTEL_LPT1_0		0x9ce5
+#define PCI_DEVICE_ID_INTEL_LPT1_1		0x9ce6
 
 struct pxa_spi_info {
 	enum pxa_ssp_type type;
@@ -86,6 +86,49 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	struct pci_dev *dma_dev;
 	int ret;
 
+	switch (dev->device) {
+	case PCI_DEVICE_ID_INTEL_BYT:
+		c->type = LPSS_BYT_SSP;
+		c->port_id = 0;
+		c->tx_param = &byt_tx_param;
+		c->rx_param = &byt_rx_param;
+		break;
+	case PCI_DEVICE_ID_INTEL_BSW0:
+		c->type = LPSS_BSW_SSP;
+		c->port_id = 0;
+		c->tx_param = &bsw0_tx_param;
+		c->rx_param = &bsw0_rx_param;
+		break;
+	case PCI_DEVICE_ID_INTEL_BSW1:
+		c->type = LPSS_BSW_SSP;
+		c->port_id = 1;
+		c->tx_param = &bsw1_tx_param;
+		c->rx_param = &bsw1_rx_param;
+		break;
+	case PCI_DEVICE_ID_INTEL_BSW2:
+		c->type = LPSS_BSW_SSP;
+		c->port_id = 2;
+		c->tx_param = &bsw2_tx_param;
+		c->rx_param = &bsw2_rx_param;
+		break;
+	case PCI_DEVICE_ID_INTEL_LPT0_0:
+	case PCI_DEVICE_ID_INTEL_LPT1_0:
+		c->type = LPSS_LPT_SSP;
+		c->port_id = 0;
+		c->tx_param = &lpt0_tx_param;
+		c->rx_param = &lpt0_rx_param;
+		break;
+	case PCI_DEVICE_ID_INTEL_LPT0_1:
+	case PCI_DEVICE_ID_INTEL_LPT1_1:
+		c->type = LPSS_LPT_SSP;
+		c->port_id = 1;
+		c->tx_param = &lpt1_tx_param;
+		c->rx_param = &lpt1_rx_param;
+		break;
+	default:
+		return -ENODEV;
+	}
+
 	c->num_chipselect = 1;
 	c->max_clk_rate = 50000000;
 
@@ -115,8 +158,13 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
+static struct pxa_spi_info lpss_info_config = {
+	.setup = lpss_spi_setup,
+};
+
 static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
+	c->type = PXA25x_SSP;
 	c->port_id = dev->devfn;
 	c->num_chipselect = dev->devfn;
 	c->max_clk_rate = 3686400;
@@ -124,6 +172,10 @@ static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
+static struct pxa_spi_info ce4100_info_config = {
+	.setup = ce4100_spi_setup,
+};
+
 static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
 	struct dw_dma_slave *tx, *rx;
@@ -153,6 +205,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 		return -ENODEV;
 	}
 
+	c->type = MRFLD_SSP;
 	c->max_clk_rate = 25000000;
 
 	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
@@ -171,8 +224,13 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
+static struct pxa_spi_info mrfld_info_config = {
+	.setup = mrfld_spi_setup,
+};
+
 static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
+	c->type = QUARK_X1000_SSP;
 	c->port_id = dev->devfn;
 	c->num_chipselect = 1;
 	c->max_clk_rate = 50000000;
@@ -180,61 +238,8 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	return 0;
 }
 
-static struct pxa_spi_info spi_info_configs[] = {
-	[PORT_CE4100] = {
-		.type = PXA25x_SSP,
-		.setup = ce4100_spi_setup,
-	},
-	[PORT_BYT] = {
-		.type = LPSS_BYT_SSP,
-		.port_id = 0,
-		.setup = lpss_spi_setup,
-		.tx_param = &byt_tx_param,
-		.rx_param = &byt_rx_param,
-	},
-	[PORT_BSW0] = {
-		.type = LPSS_BSW_SSP,
-		.port_id = 0,
-		.setup = lpss_spi_setup,
-		.tx_param = &bsw0_tx_param,
-		.rx_param = &bsw0_rx_param,
-	},
-	[PORT_BSW1] = {
-		.type = LPSS_BSW_SSP,
-		.port_id = 1,
-		.setup = lpss_spi_setup,
-		.tx_param = &bsw1_tx_param,
-		.rx_param = &bsw1_rx_param,
-	},
-	[PORT_BSW2] = {
-		.type = LPSS_BSW_SSP,
-		.port_id = 2,
-		.setup = lpss_spi_setup,
-		.tx_param = &bsw2_tx_param,
-		.rx_param = &bsw2_rx_param,
-	},
-	[PORT_MRFLD] = {
-		.type = MRFLD_SSP,
-		.setup = mrfld_spi_setup,
-	},
-	[PORT_QUARK_X1000] = {
-		.type = QUARK_X1000_SSP,
-		.setup = qrk_spi_setup,
-	},
-	[PORT_LPT0] = {
-		.type = LPSS_LPT_SSP,
-		.port_id = 0,
-		.setup = lpss_spi_setup,
-		.tx_param = &lpt0_tx_param,
-		.rx_param = &lpt0_rx_param,
-	},
-	[PORT_LPT1] = {
-		.type = LPSS_LPT_SSP,
-		.port_id = 1,
-		.setup = lpss_spi_setup,
-		.tx_param = &lpt1_tx_param,
-		.rx_param = &lpt1_rx_param,
-	},
+static struct pxa_spi_info qrk_info_config = {
+	.setup = qrk_spi_setup,
 };
 
 static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
@@ -256,7 +261,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	if (ret)
 		return ret;
 
-	c = &spi_info_configs[ent->driver_data];
+	c = (struct pxa_spi_info *)ent->driver_data;
 	ret = c->setup(dev, c);
 	if (ret)
 		return ret;
@@ -320,17 +325,17 @@ static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
 }
 
 static const struct pci_device_id pxa2xx_spi_pci_devices[] = {
-	{ PCI_VDEVICE(INTEL, 0x0935), PORT_QUARK_X1000 },
-	{ PCI_VDEVICE(INTEL, 0x0f0e), PORT_BYT },
-	{ PCI_VDEVICE(INTEL, 0x1194), PORT_MRFLD },
-	{ PCI_VDEVICE(INTEL, 0x228e), PORT_BSW0 },
-	{ PCI_VDEVICE(INTEL, 0x2290), PORT_BSW1 },
-	{ PCI_VDEVICE(INTEL, 0x22ac), PORT_BSW2 },
-	{ PCI_VDEVICE(INTEL, 0x2e6a), PORT_CE4100 },
-	{ PCI_VDEVICE(INTEL, 0x9c65), PORT_LPT0 },
-	{ PCI_VDEVICE(INTEL, 0x9c66), PORT_LPT1 },
-	{ PCI_VDEVICE(INTEL, 0x9ce5), PORT_LPT0 },
-	{ PCI_VDEVICE(INTEL, 0x9ce6), PORT_LPT1 },
+	{ PCI_DEVICE_DATA(INTEL, QUARK_X1000, &qrk_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, BYT, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, MRFLD, &mrfld_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, BSW0, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, BSW1, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, BSW2, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, CE4100, &ce4100_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, LPT0_0, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, LPT0_1, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, LPT1_0, &lpss_info_config) },
+	{ PCI_DEVICE_DATA(INTEL, LPT1_1, &lpss_info_config) },
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, pxa2xx_spi_pci_devices);
-- 
GitLab


From cb50f3f32a044ea45192a43e756b26048d35ba95 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:47 +0200
Subject: [PATCH 0819/1586] spi: pxa2xx-pci: Drop unneeded checks in
 lpss_spi_setup()

All of the LPSS devices are using DMA and set the parameters up,
hence no need to test for that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index a0f24e811e9fe..c041a9288d0c0 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -83,6 +83,7 @@ static void lpss_dma_put_device(void *dma_dev)
 
 static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 {
+	struct dw_dma_slave *tx, *rx;
 	struct pci_dev *dma_dev;
 	int ret;
 
@@ -137,21 +138,15 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	if (ret)
 		return ret;
 
-	if (c->tx_param) {
-		struct dw_dma_slave *slave = c->tx_param;
-
-		slave->dma_dev = &dma_dev->dev;
-		slave->m_master = 0;
-		slave->p_master = 1;
-	}
-
-	if (c->rx_param) {
-		struct dw_dma_slave *slave = c->rx_param;
+	tx = c->tx_param;
+	tx->dma_dev = &dma_dev->dev;
+	tx->m_master = 0;
+	tx->p_master = 1;
 
-		slave->dma_dev = &dma_dev->dev;
-		slave->m_master = 0;
-		slave->p_master = 1;
-	}
+	rx = c->rx_param;
+	rx->dma_dev = &dma_dev->dev;
+	rx->m_master = 0;
+	rx->p_master = 1;
 
 	c->dma_filter = lpss_dma_filter;
 	c->dma_burst_size = 1;
-- 
GitLab


From c3f4fc096b37bc2e4535f16ac3d65d517bbc14eb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:48 +0200
Subject: [PATCH 0820/1586] spi: pxa2xx-pci: Extract
 pxa2xx_spi_pci_clk_register()

Extract pxa2xx_spi_pci_clk_register() from ->probe() in order to reuse it
later on for getting rid of max_clk_rate temporary storage.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-9-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index c041a9288d0c0..2dbe08034ad03 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -65,6 +65,24 @@ static struct dw_dma_slave lpt1_rx_param = { .src_id = 1 };
 static struct dw_dma_slave lpt0_tx_param = { .dst_id = 2 };
 static struct dw_dma_slave lpt0_rx_param = { .src_id = 3 };
 
+static void pxa2xx_spi_pci_clk_unregister(void *clk)
+{
+	clk_unregister(clk);
+}
+
+static int pxa2xx_spi_pci_clk_register(struct pci_dev *dev, struct ssp_device *ssp,
+				       unsigned long rate)
+{
+	char buf[40];
+
+	snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id);
+	ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0, rate);
+	if (IS_ERR(ssp->clk))
+		return PTR_ERR(ssp->clk);
+
+	return devm_add_action_or_reset(&dev->dev, pxa2xx_spi_pci_clk_unregister, ssp->clk);
+}
+
 static bool lpss_dma_filter(struct dma_chan *chan, void *param)
 {
 	struct dw_dma_slave *dws = param;
@@ -246,7 +264,6 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	struct pxa2xx_spi_controller spi_pdata;
 	struct ssp_device *ssp;
 	struct pxa_spi_info *c;
-	char buf[40];
 
 	ret = pcim_enable_device(dev);
 	if (ret)
@@ -283,11 +300,9 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 		return ret;
 	ssp->irq = pci_irq_vector(dev, 0);
 
-	snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id);
-	ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0,
-					   c->max_clk_rate);
-	if (IS_ERR(ssp->clk))
-		return PTR_ERR(ssp->clk);
+	ret = pxa2xx_spi_pci_clk_register(dev, ssp, c->max_clk_rate);
+	if (ret)
+		return ret;
 
 	memset(&pi, 0, sizeof(pi));
 	pi.fwnode = dev_fwnode(&dev->dev);
@@ -298,10 +313,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	pi.size_data = sizeof(spi_pdata);
 
 	pdev = platform_device_register_full(&pi);
-	if (IS_ERR(pdev)) {
-		clk_unregister(ssp->clk);
+	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
-	}
 
 	pci_set_drvdata(dev, pdev);
 
@@ -311,12 +324,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
 {
 	struct platform_device *pdev = pci_get_drvdata(dev);
-	struct pxa2xx_spi_controller *spi_pdata;
-
-	spi_pdata = dev_get_platdata(&pdev->dev);
 
 	platform_device_unregister(pdev);
-	clk_unregister(spi_pdata->ssp.clk);
 }
 
 static const struct pci_device_id pxa2xx_spi_pci_devices[] = {
-- 
GitLab


From ba8d1353d9c2d9190a523860e37bd7cb7b9de31b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:49 +0200
Subject: [PATCH 0821/1586] spi: pxa2xx-pci: Drop temporary storage use for a
 handful of members

Instead of using temporary storage, assign the values directly
to the corresponding struct pxa2xx_spi_controller members.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-10-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 110 ++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 61 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 2dbe08034ad03..3c5d14affa956 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -28,19 +28,7 @@
 #define PCI_DEVICE_ID_INTEL_LPT1_1		0x9ce6
 
 struct pxa_spi_info {
-	enum pxa_ssp_type type;
-	unsigned int port_id;
-	unsigned int num_chipselect;
-	unsigned long max_clk_rate;
-
-	/* DMA channel request parameters */
-	bool (*dma_filter)(struct dma_chan *chan, void *param);
-	void *tx_param;
-	void *rx_param;
-
-	unsigned int dma_burst_size;
-
-	int (*setup)(struct pci_dev *pdev, struct pxa_spi_info *c);
+	int (*setup)(struct pci_dev *pdev, struct pxa2xx_spi_controller *c);
 };
 
 static struct dw_dma_slave byt_tx_param = { .dst_id = 0 };
@@ -99,48 +87,49 @@ static void lpss_dma_put_device(void *dma_dev)
 	pci_dev_put(dma_dev);
 }
 
-static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static int lpss_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 {
+	struct ssp_device *ssp = &c->ssp;
 	struct dw_dma_slave *tx, *rx;
 	struct pci_dev *dma_dev;
 	int ret;
 
 	switch (dev->device) {
 	case PCI_DEVICE_ID_INTEL_BYT:
-		c->type = LPSS_BYT_SSP;
-		c->port_id = 0;
+		ssp->type = LPSS_BYT_SSP;
+		ssp->port_id = 0;
 		c->tx_param = &byt_tx_param;
 		c->rx_param = &byt_rx_param;
 		break;
 	case PCI_DEVICE_ID_INTEL_BSW0:
-		c->type = LPSS_BSW_SSP;
-		c->port_id = 0;
+		ssp->type = LPSS_BSW_SSP;
+		ssp->port_id = 0;
 		c->tx_param = &bsw0_tx_param;
 		c->rx_param = &bsw0_rx_param;
 		break;
 	case PCI_DEVICE_ID_INTEL_BSW1:
-		c->type = LPSS_BSW_SSP;
-		c->port_id = 1;
+		ssp->type = LPSS_BSW_SSP;
+		ssp->port_id = 1;
 		c->tx_param = &bsw1_tx_param;
 		c->rx_param = &bsw1_rx_param;
 		break;
 	case PCI_DEVICE_ID_INTEL_BSW2:
-		c->type = LPSS_BSW_SSP;
-		c->port_id = 2;
+		ssp->type = LPSS_BSW_SSP;
+		ssp->port_id = 2;
 		c->tx_param = &bsw2_tx_param;
 		c->rx_param = &bsw2_rx_param;
 		break;
 	case PCI_DEVICE_ID_INTEL_LPT0_0:
 	case PCI_DEVICE_ID_INTEL_LPT1_0:
-		c->type = LPSS_LPT_SSP;
-		c->port_id = 0;
+		ssp->type = LPSS_LPT_SSP;
+		ssp->port_id = 0;
 		c->tx_param = &lpt0_tx_param;
 		c->rx_param = &lpt0_rx_param;
 		break;
 	case PCI_DEVICE_ID_INTEL_LPT0_1:
 	case PCI_DEVICE_ID_INTEL_LPT1_1:
-		c->type = LPSS_LPT_SSP;
-		c->port_id = 1;
+		ssp->type = LPSS_LPT_SSP;
+		ssp->port_id = 1;
 		c->tx_param = &lpt1_tx_param;
 		c->rx_param = &lpt1_rx_param;
 		break;
@@ -149,7 +138,10 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 	}
 
 	c->num_chipselect = 1;
-	c->max_clk_rate = 50000000;
+
+	ret = pxa2xx_spi_pci_clk_register(dev, ssp, 50000000);
+	if (ret)
+		return ret;
 
 	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
 	ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
@@ -168,6 +160,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 
 	c->dma_filter = lpss_dma_filter;
 	c->dma_burst_size = 1;
+	c->enable_dma = 1;
 	return 0;
 }
 
@@ -175,41 +168,45 @@ static struct pxa_spi_info lpss_info_config = {
 	.setup = lpss_spi_setup,
 };
 
-static int ce4100_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static int ce4100_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 {
-	c->type = PXA25x_SSP;
-	c->port_id = dev->devfn;
+	struct ssp_device *ssp = &c->ssp;
+
+	ssp->type = PXA25x_SSP;
+	ssp->port_id = dev->devfn;
 	c->num_chipselect = dev->devfn;
-	c->max_clk_rate = 3686400;
 
-	return 0;
+	return pxa2xx_spi_pci_clk_register(dev, ssp, 3686400);
 }
 
 static struct pxa_spi_info ce4100_info_config = {
 	.setup = ce4100_spi_setup,
 };
 
-static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static int mrfld_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 {
+	struct ssp_device *ssp = &c->ssp;
 	struct dw_dma_slave *tx, *rx;
 	struct pci_dev *dma_dev;
 	int ret;
 
+	ssp->type = MRFLD_SSP;
+
 	switch (PCI_FUNC(dev->devfn)) {
 	case 0:
-		c->port_id = 3;
+		ssp->port_id = 3;
 		c->num_chipselect = 1;
 		c->tx_param = &mrfld3_tx_param;
 		c->rx_param = &mrfld3_rx_param;
 		break;
 	case 1:
-		c->port_id = 5;
+		ssp->port_id = 5;
 		c->num_chipselect = 4;
 		c->tx_param = &mrfld5_tx_param;
 		c->rx_param = &mrfld5_rx_param;
 		break;
 	case 2:
-		c->port_id = 6;
+		ssp->port_id = 6;
 		c->num_chipselect = 1;
 		c->tx_param = &mrfld6_tx_param;
 		c->rx_param = &mrfld6_rx_param;
@@ -218,8 +215,9 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 		return -ENODEV;
 	}
 
-	c->type = MRFLD_SSP;
-	c->max_clk_rate = 25000000;
+	ret = pxa2xx_spi_pci_clk_register(dev, ssp, 25000000);
+	if (ret)
+		return ret;
 
 	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0));
 	ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev);
@@ -234,6 +232,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
 
 	c->dma_filter = lpss_dma_filter;
 	c->dma_burst_size = 8;
+	c->enable_dma = 1;
 	return 0;
 }
 
@@ -241,14 +240,15 @@ static struct pxa_spi_info mrfld_info_config = {
 	.setup = mrfld_spi_setup,
 };
 
-static int qrk_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c)
+static int qrk_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 {
-	c->type = QUARK_X1000_SSP;
-	c->port_id = dev->devfn;
+	struct ssp_device *ssp = &c->ssp;
+
+	ssp->type = QUARK_X1000_SSP;
+	ssp->port_id = dev->devfn;
 	c->num_chipselect = 1;
-	c->max_clk_rate = 50000000;
 
-	return 0;
+	return pxa2xx_spi_pci_clk_register(dev, ssp, 50000000);
 }
 
 static struct pxa_spi_info qrk_info_config = {
@@ -262,8 +262,8 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	int ret;
 	struct platform_device *pdev;
 	struct pxa2xx_spi_controller spi_pdata;
+	struct pxa_spi_info *info;
 	struct ssp_device *ssp;
-	struct pxa_spi_info *c;
 
 	ret = pcim_enable_device(dev);
 	if (ret)
@@ -273,25 +273,17 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	if (ret)
 		return ret;
 
-	c = (struct pxa_spi_info *)ent->driver_data;
-	ret = c->setup(dev, c);
-	if (ret)
-		return ret;
-
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
-	spi_pdata.num_chipselect = c->num_chipselect;
-	spi_pdata.dma_filter = c->dma_filter;
-	spi_pdata.tx_param = c->tx_param;
-	spi_pdata.rx_param = c->rx_param;
-	spi_pdata.enable_dma = c->rx_param && c->tx_param;
-	spi_pdata.dma_burst_size = c->dma_burst_size;
 
 	ssp = &spi_pdata.ssp;
 	ssp->dev = &dev->dev;
 	ssp->phys_base = pci_resource_start(dev, 0);
 	ssp->mmio_base = pcim_iomap_table(dev)[0];
-	ssp->type = c->type;
-	ssp->port_id = c->port_id;
+
+	info = (struct pxa_spi_info *)ent->driver_data;
+	ret = info->setup(dev, &spi_pdata);
+	if (ret)
+		return ret;
 
 	pci_set_master(dev);
 
@@ -300,10 +292,6 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 		return ret;
 	ssp->irq = pci_irq_vector(dev, 0);
 
-	ret = pxa2xx_spi_pci_clk_register(dev, ssp, c->max_clk_rate);
-	if (ret)
-		return ret;
-
 	memset(&pi, 0, sizeof(pi));
 	pi.fwnode = dev_fwnode(&dev->dev);
 	pi.parent = &dev->dev;
-- 
GitLab


From fcaaf76ed5f3bbf346db9e49d9d9c0978d8f8dce Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Feb 2022 19:23:50 +0200
Subject: [PATCH 0822/1586] spi: pxa2xx-pci: Constify struct pxa_spi_info
 variables

Now when there are no dynamical changes required, we may
constify struct pxa_spi_info variables.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220225172350.69797-11-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 3c5d14affa956..861b21c635043 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -164,7 +164,7 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 	return 0;
 }
 
-static struct pxa_spi_info lpss_info_config = {
+static const struct pxa_spi_info lpss_info_config = {
 	.setup = lpss_spi_setup,
 };
 
@@ -179,7 +179,7 @@ static int ce4100_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c
 	return pxa2xx_spi_pci_clk_register(dev, ssp, 3686400);
 }
 
-static struct pxa_spi_info ce4100_info_config = {
+static const struct pxa_spi_info ce4100_info_config = {
 	.setup = ce4100_spi_setup,
 };
 
@@ -236,7 +236,7 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 	return 0;
 }
 
-static struct pxa_spi_info mrfld_info_config = {
+static const struct pxa_spi_info mrfld_info_config = {
 	.setup = mrfld_spi_setup,
 };
 
@@ -251,18 +251,18 @@ static int qrk_spi_setup(struct pci_dev *dev, struct pxa2xx_spi_controller *c)
 	return pxa2xx_spi_pci_clk_register(dev, ssp, 50000000);
 }
 
-static struct pxa_spi_info qrk_info_config = {
+static const struct pxa_spi_info qrk_info_config = {
 	.setup = qrk_spi_setup,
 };
 
 static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 		const struct pci_device_id *ent)
 {
+	const struct pxa_spi_info *info;
 	struct platform_device_info pi;
 	int ret;
 	struct platform_device *pdev;
 	struct pxa2xx_spi_controller spi_pdata;
-	struct pxa_spi_info *info;
 	struct ssp_device *ssp;
 
 	ret = pcim_enable_device(dev);
-- 
GitLab


From 6bb477df04366e0f69dd2f49e1ae1099069326bc Mon Sep 17 00:00:00 2001
From: Yun Zhou <yun.zhou@windriver.com>
Date: Thu, 17 Feb 2022 22:12:34 +0800
Subject: [PATCH 0823/1586] spi: use specific last_cs instead of last_cs_enable

Commit d40f0b6f2e21 instroduced last_cs_enable to avoid setting
chipselect if it's not necessary, but it also introduces a bug. The
chipselect may not be set correctly on multi-device SPI busses. The
reason is that we can't judge the chipselect by bool last_cs_enable,
since chipselect may be modified after other devices were accessed.

So we should record the specific state of chipselect in case of
confusion.

Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
Link: https://lore.kernel.org/r/20220217141234.72737-1-yun.zhou@windriver.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 8 ++++++--
 include/linux/spi/spi.h | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index cd4dc3131e17f..6326e592fcfdb 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -926,13 +926,14 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 	 * Avoid calling into the driver (or doing delays) if the chip select
 	 * isn't actually changing from the last time this was called.
 	 */
-	if (!force && (spi->controller->last_cs_enable == enable) &&
+	if (!force && ((enable && spi->controller->last_cs == spi->chip_select) ||
+				(!enable && spi->controller->last_cs != spi->chip_select)) &&
 	    (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
 		return;
 
 	trace_spi_set_cs(spi, activate);
 
-	spi->controller->last_cs_enable = enable;
+	spi->controller->last_cs = enable ? spi->chip_select : -1;
 	spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
 
 	if ((spi->cs_gpiod || !spi->controller->set_cs_timing) && !activate) {
@@ -3016,6 +3017,9 @@ int spi_register_controller(struct spi_controller *ctlr)
 		goto free_bus_id;
 	}
 
+	/* setting last_cs to -1 means no chip selected */
+	ctlr->last_cs = -1;
+
 	status = device_add(&ctlr->dev);
 	if (status < 0)
 		goto free_bus_id;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 579d71cdf6fab..7d005fa4631c3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -370,7 +370,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @cur_msg_prepared: spi_prepare_message was called for the currently
  *                    in-flight message
  * @cur_msg_mapped: message has been mapped for DMA
- * @last_cs_enable: was enable true on the last call to set_cs.
+ * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip
+ *           selected
  * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
  * @xfer_completion: used by core transfer_one_message()
  * @busy: message pump is busy
@@ -603,7 +604,7 @@ struct spi_controller {
 	bool				auto_runtime_pm;
 	bool                            cur_msg_prepared;
 	bool				cur_msg_mapped;
-	bool				last_cs_enable;
+	char				last_cs;
 	bool				last_cs_mode_high;
 	bool                            fallback;
 	struct completion               xfer_completion;
-- 
GitLab


From f5ec592dd3bcf7c91f7c262a7f5011e001d269cd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 24 Jan 2022 13:59:36 -0800
Subject: [PATCH 0824/1586] block: simplify calling convention of
 elv_unregister_queue()

Make elv_unregister_queue() a no-op if q->elevator is NULL or is not
registered.

This simplifies the existing callers, as well as the future caller in
the error path of blk_register_queue().

Also don't bother checking whether q is NULL, since it never is.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124215938.2769-2-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 3 +--
 block/elevator.c  | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4c6b7dff71e5b..49854c7ea29b0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -960,8 +960,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
 	mutex_lock(&q->sysfs_lock);
-	if (q->elevator)
-		elv_unregister_queue(q);
+	elv_unregister_queue(q);
 	disk_unregister_independent_access_ranges(disk);
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/elevator.c b/block/elevator.c
index 6847ab6e7aa50..a842e4b8ebc66 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -516,9 +516,11 @@ int elv_register_queue(struct request_queue *q, bool uevent)
 
 void elv_unregister_queue(struct request_queue *q)
 {
+	struct elevator_queue *e = q->elevator;
+
 	lockdep_assert_held(&q->sysfs_lock);
 
-	if (q) {
+	if (e && e->registered) {
 		struct elevator_queue *e = q->elevator;
 
 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
@@ -593,9 +595,7 @@ int elevator_switch_mq(struct request_queue *q,
 	lockdep_assert_held(&q->sysfs_lock);
 
 	if (q->elevator) {
-		if (q->elevator->registered)
-			elv_unregister_queue(q);
-
+		elv_unregister_queue(q);
 		ioc_clear_queue(q);
 		blk_mq_sched_free_rqs(q);
 		elevator_exit(q);
-- 
GitLab


From 0f69288253e9fc7c495047720e523b9f1aba5712 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 24 Jan 2022 13:59:37 -0800
Subject: [PATCH 0825/1586] block: don't delete queue kobject before its
 children

kobjects aren't supposed to be deleted before their child kobjects are
deleted.  Apparently this is usually benign; however, a WARN will be
triggered if one of the child kobjects has a named attribute group:

    sysfs group 'modes' not found for kobject 'crypto'
    WARNING: CPU: 0 PID: 1 at fs/sysfs/group.c:278 sysfs_remove_group+0x72/0x80
    ...
    Call Trace:
      sysfs_remove_groups+0x29/0x40 fs/sysfs/group.c:312
      __kobject_del+0x20/0x80 lib/kobject.c:611
      kobject_cleanup+0xa4/0x140 lib/kobject.c:696
      kobject_release lib/kobject.c:736 [inline]
      kref_put include/linux/kref.h:65 [inline]
      kobject_put+0x53/0x70 lib/kobject.c:753
      blk_crypto_sysfs_unregister+0x10/0x20 block/blk-crypto-sysfs.c:159
      blk_unregister_queue+0xb0/0x110 block/blk-sysfs.c:962
      del_gendisk+0x117/0x250 block/genhd.c:610

Fix this by moving the kobject_del() and the corresponding
kobject_uevent() to the correct place.

Fixes: 2c2086afc2b8 ("block: Protect less code with sysfs_lock in blk_{un,}register_queue()")
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220124215938.2769-3-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 49854c7ea29b0..b38ef1bbf389c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -954,15 +954,17 @@ void blk_unregister_queue(struct gendisk *disk)
 	 */
 	if (queue_is_mq(q))
 		blk_mq_unregister_dev(disk_to_dev(disk), q);
-
-	kobject_uevent(&q->kobj, KOBJ_REMOVE);
-	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
 	mutex_lock(&q->sysfs_lock);
 	elv_unregister_queue(q);
 	disk_unregister_independent_access_ranges(disk);
 	mutex_unlock(&q->sysfs_lock);
+
+	/* Now that we've deleted all child objects, we can delete the queue. */
+	kobject_uevent(&q->kobj, KOBJ_REMOVE);
+	kobject_del(&q->kobj);
+
 	mutex_unlock(&q->sysfs_dir_lock);
 
 	kobject_put(&disk_to_dev(disk)->kobj);
-- 
GitLab


From 20f01f163203666010ee1560852590a0c0572726 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 24 Jan 2022 13:59:38 -0800
Subject: [PATCH 0826/1586] blk-crypto: show crypto capabilities in sysfs

Add sysfs files that expose the inline encryption capabilities of
request queues:

	/sys/block/$disk/queue/crypto/max_dun_bits
	/sys/block/$disk/queue/crypto/modes/$mode
	/sys/block/$disk/queue/crypto/num_keyslots

Userspace can use these new files to decide what encryption settings to
use, or whether to use inline encryption at all.  This also brings the
crypto capabilities in line with the other queue properties, which are
already discoverable via the queue directory in sysfs.

Design notes:

  - Place the new files in a new subdirectory "crypto" to group them
    together and to avoid complicating the main "queue" directory.  This
    also makes it possible to replace "crypto" with a symlink later if
    we ever make the blk_crypto_profiles into real kobjects (see below).

  - It was necessary to define a new kobject that corresponds to the
    crypto subdirectory.  For now, this kobject just contains a pointer
    to the blk_crypto_profile.  Note that multiple queues (and hence
    multiple such kobjects) may refer to the same blk_crypto_profile.

    An alternative design would more closely match the current kernel
    data structures: the blk_crypto_profile could be a kobject itself,
    located directly under the host controller device's kobject, while
    /sys/block/$disk/queue/crypto would be a symlink to it.

    I decided not to do that for now because it would require a lot more
    changes, such as no longer embedding blk_crypto_profile in other
    structures, and also because I'm not sure we can rule out moving the
    crypto capabilities into 'struct queue_limits' in the future.  (Even
    if multiple queues share the same crypto engine, maybe the supported
    data unit sizes could differ due to other queue properties.)  It
    would also still be possible to switch to that design later without
    breaking userspace, by replacing the directory with a symlink.

  - Use "max_dun_bits" instead of "max_dun_bytes".  Currently, the
    kernel internally stores this value in bytes, but that's an
    implementation detail.  It probably makes more sense to talk about
    this value in bits, and choosing bits is more future-proof.

  - "modes" is a sub-subdirectory, since there may be multiple supported
    crypto modes, sysfs is supposed to have one value per file, and it
    makes sense to group all the mode files together.

  - Each mode had to be named.  The crypto API names like "xts(aes)" are
    not appropriate because they don't specify the key size.  Therefore,
    I assigned new names.  The exact names chosen are arbitrary, but
    they happen to match the names used in log messages in fs/crypto/.

  - The "num_keyslots" file is a bit different from the others in that
    it is only useful to know for performance reasons.  However, it's
    included as it can still be useful.  For example, a user might not
    want to use inline encryption if there aren't very many keyslots.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20220124215938.2769-4-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/stable/sysfs-block |  49 ++++++++
 block/Makefile                       |   3 +-
 block/blk-crypto-internal.h          |  12 ++
 block/blk-crypto-sysfs.c             | 172 +++++++++++++++++++++++++++
 block/blk-crypto.c                   |   3 +
 block/blk-sysfs.c                    |   6 +
 include/linux/blkdev.h               |   1 +
 7 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 block/blk-crypto-sysfs.c

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 8dd3e84a8aade..e8797cd09aff9 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -155,6 +155,55 @@ Description:
 		last zone of the device which may be smaller.
 
 
+What:		/sys/block/<disk>/queue/crypto/
+Date:		February 2022
+Contact:	linux-block@vger.kernel.org
+Description:
+		The presence of this subdirectory of /sys/block/<disk>/queue/
+		indicates that the device supports inline encryption.  This
+		subdirectory contains files which describe the inline encryption
+		capabilities of the device.  For more information about inline
+		encryption, refer to Documentation/block/inline-encryption.rst.
+
+
+What:		/sys/block/<disk>/queue/crypto/max_dun_bits
+Date:		February 2022
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] This file shows the maximum length, in bits, of data unit
+		numbers accepted by the device in inline encryption requests.
+
+
+What:		/sys/block/<disk>/queue/crypto/modes/<mode>
+Date:		February 2022
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] For each crypto mode (i.e., encryption/decryption
+		algorithm) the device supports with inline encryption, a file
+		will exist at this location.  It will contain a hexadecimal
+		number that is a bitmask of the supported data unit sizes, in
+		bytes, for that crypto mode.
+
+		Currently, the crypto modes that may be supported are:
+
+		   * AES-256-XTS
+		   * AES-128-CBC-ESSIV
+		   * Adiantum
+
+		For example, if a device supports AES-256-XTS inline encryption
+		with data unit sizes of 512 and 4096 bytes, the file
+		/sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
+		will contain "0x1200".
+
+
+What:		/sys/block/<disk>/queue/crypto/num_keyslots
+Date:		February 2022
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] This file shows the number of keyslots the device has for
+		use with inline encryption.
+
+
 What:		/sys/block/<disk>/queue/dax
 Date:		June 2016
 Contact:	linux-block@vger.kernel.org
diff --git a/block/Makefile b/block/Makefile
index f38eaa6129296..3950ecbc5c263 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
 obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
 obj-$(CONFIG_BLK_PM)		+= blk-pm.o
-obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= blk-crypto.o blk-crypto-profile.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= blk-crypto.o blk-crypto-profile.o \
+					   blk-crypto-sysfs.o
 obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK)	+= blk-crypto-fallback.o
 obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED)	+= holder.o
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index 2fb0d65a464ca..e6818ffaddbf8 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -11,6 +11,7 @@
 
 /* Represents a crypto mode supported by blk-crypto  */
 struct blk_crypto_mode {
+	const char *name; /* name of this mode, shown in sysfs */
 	const char *cipher_str; /* crypto API name (for fallback case) */
 	unsigned int keysize; /* key size in bytes */
 	unsigned int ivsize; /* iv size in bytes */
@@ -20,6 +21,10 @@ extern const struct blk_crypto_mode blk_crypto_modes[];
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 
+int blk_crypto_sysfs_register(struct request_queue *q);
+
+void blk_crypto_sysfs_unregister(struct request_queue *q);
+
 void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
 			     unsigned int inc);
 
@@ -62,6 +67,13 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
 
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
 
+static inline int blk_crypto_sysfs_register(struct request_queue *q)
+{
+	return 0;
+}
+
+static inline void blk_crypto_sysfs_unregister(struct request_queue *q) { }
+
 static inline bool bio_crypt_rq_ctx_compatible(struct request *rq,
 					       struct bio *bio)
 {
diff --git a/block/blk-crypto-sysfs.c b/block/blk-crypto-sysfs.c
new file mode 100644
index 0000000000000..fd93bd2f33b75
--- /dev/null
+++ b/block/blk-crypto-sysfs.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2021 Google LLC
+ *
+ * sysfs support for blk-crypto.  This file contains the code which exports the
+ * crypto capabilities of devices via /sys/block/$disk/queue/crypto/.
+ */
+
+#include <linux/blk-crypto-profile.h>
+
+#include "blk-crypto-internal.h"
+
+struct blk_crypto_kobj {
+	struct kobject kobj;
+	struct blk_crypto_profile *profile;
+};
+
+struct blk_crypto_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct blk_crypto_profile *profile,
+			struct blk_crypto_attr *attr, char *page);
+};
+
+static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj)
+{
+	return container_of(kobj, struct blk_crypto_kobj, kobj)->profile;
+}
+
+static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr)
+{
+	return container_of(attr, struct blk_crypto_attr, attr);
+}
+
+static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile,
+				 struct blk_crypto_attr *attr, char *page)
+{
+	return sysfs_emit(page, "%u\n", 8 * profile->max_dun_bytes_supported);
+}
+
+static ssize_t num_keyslots_show(struct blk_crypto_profile *profile,
+				 struct blk_crypto_attr *attr, char *page)
+{
+	return sysfs_emit(page, "%u\n", profile->num_slots);
+}
+
+#define BLK_CRYPTO_RO_ATTR(_name) \
+	static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
+
+BLK_CRYPTO_RO_ATTR(max_dun_bits);
+BLK_CRYPTO_RO_ATTR(num_keyslots);
+
+static struct attribute *blk_crypto_attrs[] = {
+	&max_dun_bits_attr.attr,
+	&num_keyslots_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group blk_crypto_attr_group = {
+	.attrs = blk_crypto_attrs,
+};
+
+/*
+ * The encryption mode attributes.  To avoid hard-coding the list of encryption
+ * modes, these are initialized at boot time by blk_crypto_sysfs_init().
+ */
+static struct blk_crypto_attr __blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX];
+static struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1];
+
+static umode_t blk_crypto_mode_is_visible(struct kobject *kobj,
+					  struct attribute *attr, int n)
+{
+	struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
+	struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
+	int mode_num = a - __blk_crypto_mode_attrs;
+
+	if (profile->modes_supported[mode_num])
+		return 0444;
+	return 0;
+}
+
+static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile,
+				    struct blk_crypto_attr *attr, char *page)
+{
+	int mode_num = attr - __blk_crypto_mode_attrs;
+
+	return sysfs_emit(page, "0x%x\n", profile->modes_supported[mode_num]);
+}
+
+static const struct attribute_group blk_crypto_modes_attr_group = {
+	.name = "modes",
+	.attrs = blk_crypto_mode_attrs,
+	.is_visible = blk_crypto_mode_is_visible,
+};
+
+static const struct attribute_group *blk_crypto_attr_groups[] = {
+	&blk_crypto_attr_group,
+	&blk_crypto_modes_attr_group,
+	NULL,
+};
+
+static ssize_t blk_crypto_attr_show(struct kobject *kobj,
+				    struct attribute *attr, char *page)
+{
+	struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
+	struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
+
+	return a->show(profile, a, page);
+}
+
+static const struct sysfs_ops blk_crypto_attr_ops = {
+	.show = blk_crypto_attr_show,
+};
+
+static void blk_crypto_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct blk_crypto_kobj, kobj));
+}
+
+static struct kobj_type blk_crypto_ktype = {
+	.default_groups = blk_crypto_attr_groups,
+	.sysfs_ops	= &blk_crypto_attr_ops,
+	.release	= blk_crypto_release,
+};
+
+/*
+ * If the request_queue has a blk_crypto_profile, create the "crypto"
+ * subdirectory in sysfs (/sys/block/$disk/queue/crypto/).
+ */
+int blk_crypto_sysfs_register(struct request_queue *q)
+{
+	struct blk_crypto_kobj *obj;
+	int err;
+
+	if (!q->crypto_profile)
+		return 0;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+	obj->profile = q->crypto_profile;
+
+	err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype, &q->kobj,
+				   "crypto");
+	if (err) {
+		kobject_put(&obj->kobj);
+		return err;
+	}
+	q->crypto_kobject = &obj->kobj;
+	return 0;
+}
+
+void blk_crypto_sysfs_unregister(struct request_queue *q)
+{
+	kobject_put(q->crypto_kobject);
+}
+
+static int __init blk_crypto_sysfs_init(void)
+{
+	int i;
+
+	BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
+	for (i = 1; i < BLK_ENCRYPTION_MODE_MAX; i++) {
+		struct blk_crypto_attr *attr = &__blk_crypto_mode_attrs[i];
+
+		attr->attr.name = blk_crypto_modes[i].name;
+		attr->attr.mode = 0444;
+		attr->show = blk_crypto_mode_show;
+		blk_crypto_mode_attrs[i - 1] = &attr->attr;
+	}
+	return 0;
+}
+subsys_initcall(blk_crypto_sysfs_init);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 773dae4c329ba..a496aaef85ba4 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -19,16 +19,19 @@
 
 const struct blk_crypto_mode blk_crypto_modes[] = {
 	[BLK_ENCRYPTION_MODE_AES_256_XTS] = {
+		.name = "AES-256-XTS",
 		.cipher_str = "xts(aes)",
 		.keysize = 64,
 		.ivsize = 16,
 	},
 	[BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = {
+		.name = "AES-128-CBC-ESSIV",
 		.cipher_str = "essiv(cbc(aes),sha256)",
 		.keysize = 16,
 		.ivsize = 16,
 	},
 	[BLK_ENCRYPTION_MODE_ADIANTUM] = {
+		.name = "Adiantum",
 		.cipher_str = "adiantum(xchacha12,aes)",
 		.keysize = 32,
 		.ivsize = 32,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b38ef1bbf389c..241ded62f458f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -880,6 +880,10 @@ int blk_register_queue(struct gendisk *disk)
 			goto put_dev;
 	}
 
+	ret = blk_crypto_sysfs_register(q);
+	if (ret)
+		goto put_dev;
+
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
 	wbt_enable_default(q);
 	blk_throtl_register_queue(q);
@@ -910,6 +914,7 @@ unlock:
 	return ret;
 
 put_dev:
+	elv_unregister_queue(q);
 	disk_unregister_independent_access_ranges(disk);
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
@@ -954,6 +959,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	 */
 	if (queue_is_mq(q))
 		blk_mq_unregister_dev(disk_to_dev(disk), q);
+	blk_crypto_sysfs_unregister(q);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
 	mutex_lock(&q->sysfs_lock);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f757f9c2871f8..e19947d84f128 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -413,6 +413,7 @@ struct request_queue {
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 	struct blk_crypto_profile *crypto_profile;
+	struct kobject *crypto_kobject;
 #endif
 
 	unsigned int		rq_timeout;
-- 
GitLab


From 25d490eb46486e88c16e64d9eb7cfd33a642d596 Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Sat, 18 Dec 2021 09:30:40 +0100
Subject: [PATCH 0827/1586] ARM: 9172/1: amba: Cleanup amba pclk operation

There is no user about amba_pclk_[un]prepare() besides pl330.c,
directly use clk_[un]prepare(). After this, all the function about
amba pclk operation, enable, disable, [un]prepare could be killed.

Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 drivers/dma/pl330.c      |  4 ++--
 include/linux/amba/bus.h | 20 --------------------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 110de8a600588..858400e42ec05 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2968,7 +2968,7 @@ static int __maybe_unused pl330_suspend(struct device *dev)
 	struct amba_device *pcdev = to_amba_device(dev);
 
 	pm_runtime_force_suspend(dev);
-	amba_pclk_unprepare(pcdev);
+	clk_unprepare(pcdev->pclk);
 
 	return 0;
 }
@@ -2978,7 +2978,7 @@ static int __maybe_unused pl330_resume(struct device *dev)
 	struct amba_device *pcdev = to_amba_device(dev);
 	int ret;
 
-	ret = amba_pclk_prepare(pcdev);
+	ret = clk_prepare(pcdev->pclk);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6c7f478469715..09174970b8555 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -121,26 +121,6 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-static inline int amba_pclk_enable(struct amba_device *dev)
-{
-	return clk_enable(dev->pclk);
-}
-
-static inline void amba_pclk_disable(struct amba_device *dev)
-{
-	clk_disable(dev->pclk);
-}
-
-static inline int amba_pclk_prepare(struct amba_device *dev)
-{
-	return clk_prepare(dev->pclk);
-}
-
-static inline void amba_pclk_unprepare(struct amba_device *dev)
-{
-	clk_unprepare(dev->pclk);
-}
-
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
-- 
GitLab


From dacf3ca134d0dc105caee77651a349a86bd77456 Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Sat, 18 Dec 2021 09:30:41 +0100
Subject: [PATCH 0828/1586] ARM: 9173/1: amba: kill amba_find_match()

There is no one use amba_find_match(), kill it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 drivers/amba/bus.c       | 61 ----------------------------------------
 include/linux/amba/bus.h |  1 -
 2 files changed, 62 deletions(-)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index e1a5eca3ae3cc..dd0ef65e5c3a5 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -669,66 +669,6 @@ void amba_device_unregister(struct amba_device *dev)
 	device_unregister(&dev->dev);
 }
 
-
-struct find_data {
-	struct amba_device *dev;
-	struct device *parent;
-	const char *busid;
-	unsigned int id;
-	unsigned int mask;
-};
-
-static int amba_find_match(struct device *dev, void *data)
-{
-	struct find_data *d = data;
-	struct amba_device *pcdev = to_amba_device(dev);
-	int r;
-
-	r = (pcdev->periphid & d->mask) == d->id;
-	if (d->parent)
-		r &= d->parent == dev->parent;
-	if (d->busid)
-		r &= strcmp(dev_name(dev), d->busid) == 0;
-
-	if (r) {
-		get_device(dev);
-		d->dev = pcdev;
-	}
-
-	return r;
-}
-
-/**
- *	amba_find_device - locate an AMBA device given a bus id
- *	@busid: bus id for device (or NULL)
- *	@parent: parent device (or NULL)
- *	@id: peripheral ID (or 0)
- *	@mask: peripheral ID mask (or 0)
- *
- *	Return the AMBA device corresponding to the supplied parameters.
- *	If no device matches, returns NULL.
- *
- *	NOTE: When a valid device is found, its refcount is
- *	incremented, and must be decremented before the returned
- *	reference.
- */
-struct amba_device *
-amba_find_device(const char *busid, struct device *parent, unsigned int id,
-		 unsigned int mask)
-{
-	struct find_data data;
-
-	data.dev = NULL;
-	data.parent = parent;
-	data.busid = busid;
-	data.id = id;
-	data.mask = mask;
-
-	bus_for_each_dev(&amba_bustype, NULL, &data, amba_find_match);
-
-	return data.dev;
-}
-
 /**
  *	amba_request_regions - request all mem regions associated with device
  *	@dev: amba_device structure for device
@@ -768,6 +708,5 @@ EXPORT_SYMBOL(amba_driver_register);
 EXPORT_SYMBOL(amba_driver_unregister);
 EXPORT_SYMBOL(amba_device_register);
 EXPORT_SYMBOL(amba_device_unregister);
-EXPORT_SYMBOL(amba_find_device);
 EXPORT_SYMBOL(amba_request_regions);
 EXPORT_SYMBOL(amba_release_regions);
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 09174970b8555..6562f543c3e04 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -117,7 +117,6 @@ void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
 int amba_device_register(struct amba_device *, struct resource *);
 void amba_device_unregister(struct amba_device *);
-struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-- 
GitLab


From a2e7ae86a6eb1e93401214802c4d53872e310d4c Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Sat, 18 Dec 2021 09:30:42 +0100
Subject: [PATCH 0829/1586] ARM: 9174/1: amba: Move EXPORT_SYMBOL() closer to
 definition

Some EXPORT_SYMBOL() is at the end of the function, but some is
at the end of file. For reader sanity and be consistent, move all
EXPORT_SYMBOL() declarations just after the end of the function.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 drivers/amba/bus.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index dd0ef65e5c3a5..d3bd14aaabf6e 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -370,6 +370,7 @@ int amba_driver_register(struct amba_driver *drv)
 
 	return driver_register(&drv->drv);
 }
+EXPORT_SYMBOL(amba_driver_register);
 
 /**
  *	amba_driver_unregister - remove an AMBA device driver
@@ -383,7 +384,7 @@ void amba_driver_unregister(struct amba_driver *drv)
 {
 	driver_unregister(&drv->drv);
 }
-
+EXPORT_SYMBOL(amba_driver_unregister);
 
 static void amba_device_release(struct device *dev)
 {
@@ -642,6 +643,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent)
 
 	return amba_device_add(dev, parent);
 }
+EXPORT_SYMBOL(amba_device_register);
 
 /**
  *	amba_device_put - put an AMBA device
@@ -668,6 +670,7 @@ void amba_device_unregister(struct amba_device *dev)
 {
 	device_unregister(&dev->dev);
 }
+EXPORT_SYMBOL(amba_device_unregister);
 
 /**
  *	amba_request_regions - request all mem regions associated with device
@@ -689,6 +692,7 @@ int amba_request_regions(struct amba_device *dev, const char *name)
 
 	return ret;
 }
+EXPORT_SYMBOL(amba_request_regions);
 
 /**
  *	amba_release_regions - release mem regions associated with device
@@ -703,10 +707,4 @@ void amba_release_regions(struct amba_device *dev)
 	size = resource_size(&dev->res);
 	release_mem_region(dev->res.start, size);
 }
-
-EXPORT_SYMBOL(amba_driver_register);
-EXPORT_SYMBOL(amba_driver_unregister);
-EXPORT_SYMBOL(amba_device_register);
-EXPORT_SYMBOL(amba_device_unregister);
-EXPORT_SYMBOL(amba_request_regions);
 EXPORT_SYMBOL(amba_release_regions);
-- 
GitLab


From e46e45f00d9ea54edc590ee056212295635c9a6e Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Wed, 22 Dec 2021 14:18:17 +0100
Subject: [PATCH 0830/1586] ARM: 9175/1: Convert to reserve_initrd_mem()

Covert to the generic reserve_initrd_mem() function.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 43 +------------------------------------------
 1 file changed, 1 insertion(+), 42 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 6d0cb0f7bc54b..fe249ea919083 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 	return phys;
 }
 
-static void __init arm_initrd_init(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-	phys_addr_t start;
-	unsigned long size;
-
-	initrd_start = initrd_end = 0;
-
-	if (!phys_initrd_size)
-		return;
-
-	/*
-	 * Round the memory region to page boundaries as per free_initrd_mem()
-	 * This allows us to detect whether the pages overlapping the initrd
-	 * are in use, but more importantly, reserves the entire set of pages
-	 * as we don't want these pages allocated for other purposes.
-	 */
-	start = round_down(phys_initrd_start, PAGE_SIZE);
-	size = phys_initrd_size + (phys_initrd_start - start);
-	size = round_up(size, PAGE_SIZE);
-
-	if (!memblock_is_region_memory(start, size)) {
-		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
-		       (u64)start, size);
-		return;
-	}
-
-	if (memblock_is_region_reserved(start, size)) {
-		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-		       (u64)start, size);
-		return;
-	}
-
-	memblock_reserve(start, size);
-
-	/* Now convert initrd to virtual addresses */
-	initrd_start = __phys_to_virt(phys_initrd_start);
-	initrd_end = initrd_start + phys_initrd_size;
-#endif
-}
-
 #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
 void check_cpu_icache_size(int cpuid)
 {
@@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 	/* Register the kernel text, kernel data and initrd with memblock. */
 	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
 
-	arm_initrd_init();
+	reserve_initrd_mem();
 
 	arm_mm_memblock_reserve();
 
-- 
GitLab


From 9bc19d473014f5b79ae6d4f99150c535781eb19f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 20 Jan 2022 06:44:18 +0100
Subject: [PATCH 0831/1586] ARM: 9181/1: vdso: remove -nostdlib compiler flag

The -nostdlib option requests the compiler to not use the standard
system startup files or libraries when linking. It is effective only
when $(CC) is used as a linker driver.

Since commit fe00e50b2db8 ("ARM: 8858/1: vdso: use $(LD) instead of
$(CC) to link VDSO"), $(LD) is directly used, hence -nostdlib is
unneeded.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 7c9e395b77f7c..ec52b776f9267 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32
 
 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-	    -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
+	    -z max-page-size=4096 -shared $(ldflags-y) \
 	    --hash-style=sysv --build-id=sha1 \
 	    -T
 
-- 
GitLab


From 64276a9939ff414f2f0db38036cf4e1a0a703394 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 24 Feb 2022 23:04:56 +0100
Subject: [PATCH 0832/1586] random: cleanup UUID handling

Rather than hard coding various lengths, we can use the right constants.
Strings should be `char *` while buffers should be `u8 *`. Rather than
have a nonsensical and unused maxlength, just remove it. Finally, use
snprintf instead of sprintf, just out of good hygiene.

As well, remove the old comment about returning a binary UUID via the
binary sysctl syscall. That syscall was removed from the kernel in 5.5,
and actually, the "uuid_strategy" function and related infrastructure
for even serving it via the binary sysctl syscall was removed with
894d2491153a ("sysctl drivers: Remove dead binary sysctl support") back
in 2.6.33.

Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 536237a0f073b..4cdf39567bae2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1661,22 +1661,25 @@ const struct file_operations urandom_fops = {
 static int sysctl_random_min_urandom_seed = 60;
 static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
 static int sysctl_poolsize = POOL_BITS;
-static char sysctl_bootid[16];
+static u8 sysctl_bootid[UUID_SIZE];
 
 /*
  * This function is used to return both the bootid UUID, and random
- * UUID.  The difference is in whether table->data is NULL; if it is,
+ * UUID. The difference is in whether table->data is NULL; if it is,
  * then a new UUID is generated and returned to the user.
- *
- * If the user accesses this via the proc interface, the UUID will be
- * returned as an ASCII string in the standard UUID format; if via the
- * sysctl system call, as 16 bytes of binary data.
  */
 static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 			size_t *lenp, loff_t *ppos)
 {
-	struct ctl_table fake_table;
-	unsigned char buf[64], tmp_uuid[16], *uuid;
+	u8 tmp_uuid[UUID_SIZE], *uuid;
+	char uuid_string[UUID_STRING_LEN + 1];
+	struct ctl_table fake_table = {
+		.data = uuid_string,
+		.maxlen = UUID_STRING_LEN
+	};
+
+	if (write)
+		return -EPERM;
 
 	uuid = table->data;
 	if (!uuid) {
@@ -1691,12 +1694,8 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 		spin_unlock(&bootid_spinlock);
 	}
 
-	sprintf(buf, "%pU", uuid);
-
-	fake_table.data = buf;
-	fake_table.maxlen = sizeof(buf);
-
-	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+	snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
+	return proc_dostring(&fake_table, 0, buffer, lenp, ppos);
 }
 
 static struct ctl_table random_table[] = {
@@ -1731,13 +1730,11 @@ static struct ctl_table random_table[] = {
 	{
 		.procname	= "boot_id",
 		.data		= &sysctl_bootid,
-		.maxlen		= 16,
 		.mode		= 0444,
 		.proc_handler	= proc_do_uuid,
 	},
 	{
 		.procname	= "uuid",
-		.maxlen		= 16,
 		.mode		= 0444,
 		.proc_handler	= proc_do_uuid,
 	},
-- 
GitLab


From abded93ec1e9692920fe309f07f40bd1035f2940 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 24 Feb 2022 18:30:58 +0100
Subject: [PATCH 0833/1586] random: unify cycles_t and jiffies usage and types

random_get_entropy() returns a cycles_t, not an unsigned long, which is
sometimes 64 bits on various 32-bit platforms, including x86.
Conversely, jiffies is always unsigned long. This commit fixes things to
use cycles_t for fields that use random_get_entropy(), named "cycles",
and unsigned long for fields that use jiffies, named "now". It's also
good to mix in a cycles_t and a jiffies in the same way for both
add_device_randomness and add_timer_randomness, rather than using xor in
one case. Finally, we unify the order of these volatile reads, always
reading the more precise cycles counter, and then jiffies, so that the
cycle counter is as close to the event as possible.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 56 +++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4cdf39567bae2..3b83a6ac9f596 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1020,12 +1020,6 @@ int __init rand_initialize(void)
 	return 0;
 }
 
-/* There is one of these per entropy source */
-struct timer_rand_state {
-	cycles_t last_time;
-	long last_delta, last_delta2;
-};
-
 /*
  * Add device- or boot-specific data to the input pool to help
  * initialize it.
@@ -1036,19 +1030,26 @@ struct timer_rand_state {
  */
 void add_device_randomness(const void *buf, size_t size)
 {
-	unsigned long time = random_get_entropy() ^ jiffies;
-	unsigned long flags;
+	cycles_t cycles = random_get_entropy();
+	unsigned long flags, now = jiffies;
 
 	if (crng_init == 0 && size)
 		crng_pre_init_inject(buf, size, false, false);
 
 	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(&cycles, sizeof(cycles));
+	_mix_pool_bytes(&now, sizeof(now));
 	_mix_pool_bytes(buf, size);
-	_mix_pool_bytes(&time, sizeof(time));
 	spin_unlock_irqrestore(&input_pool.lock, flags);
 }
 EXPORT_SYMBOL(add_device_randomness);
 
+/* There is one of these per entropy source */
+struct timer_rand_state {
+	unsigned long last_time;
+	long last_delta, last_delta2;
+};
+
 /*
  * This function adds entropy to the entropy "pool" by using timing
  * delays.  It uses the timer_rand_state structure to make an estimate
@@ -1057,29 +1058,26 @@ EXPORT_SYMBOL(add_device_randomness);
  * The number "num" is also added to the pool - it should somehow describe
  * the type of event which just happened.  This is currently 0-255 for
  * keyboard scan codes, and 256 upwards for interrupts.
- *
  */
 static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
 {
-	struct {
-		long jiffies;
-		unsigned int cycles;
-		unsigned int num;
-	} sample;
+	cycles_t cycles = random_get_entropy();
+	unsigned long flags, now = jiffies;
 	long delta, delta2, delta3;
 
-	sample.jiffies = jiffies;
-	sample.cycles = random_get_entropy();
-	sample.num = num;
-	mix_pool_bytes(&sample, sizeof(sample));
+	spin_lock_irqsave(&input_pool.lock, flags);
+	_mix_pool_bytes(&cycles, sizeof(cycles));
+	_mix_pool_bytes(&now, sizeof(now));
+	_mix_pool_bytes(&num, sizeof(num));
+	spin_unlock_irqrestore(&input_pool.lock, flags);
 
 	/*
 	 * Calculate number of bits of randomness we probably added.
 	 * We take into account the first, second and third-order deltas
 	 * in order to make our estimate.
 	 */
-	delta = sample.jiffies - READ_ONCE(state->last_time);
-	WRITE_ONCE(state->last_time, sample.jiffies);
+	delta = now - READ_ONCE(state->last_time);
+	WRITE_ONCE(state->last_time, now);
 
 	delta2 = delta - READ_ONCE(state->last_delta);
 	WRITE_ONCE(state->last_delta, delta);
@@ -1305,10 +1303,10 @@ static void mix_interrupt_randomness(struct work_struct *work)
 void add_interrupt_randomness(int irq)
 {
 	enum { MIX_INFLIGHT = 1U << 31 };
+	cycles_t cycles = random_get_entropy();
+	unsigned long now = jiffies;
 	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
 	struct pt_regs *regs = get_irq_regs();
-	unsigned long now = jiffies;
-	cycles_t cycles = random_get_entropy();
 	unsigned int new_count;
 
 	if (cycles == 0)
@@ -1383,28 +1381,28 @@ static void entropy_timer(struct timer_list *t)
 static void try_to_generate_entropy(void)
 {
 	struct {
-		unsigned long now;
+		cycles_t cycles;
 		struct timer_list timer;
 	} stack;
 
-	stack.now = random_get_entropy();
+	stack.cycles = random_get_entropy();
 
 	/* Slow counter - or none. Don't even bother */
-	if (stack.now == random_get_entropy())
+	if (stack.cycles == random_get_entropy())
 		return;
 
 	timer_setup_on_stack(&stack.timer, entropy_timer, 0);
 	while (!crng_ready()) {
 		if (!timer_pending(&stack.timer))
 			mod_timer(&stack.timer, jiffies + 1);
-		mix_pool_bytes(&stack.now, sizeof(stack.now));
+		mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
 		schedule();
-		stack.now = random_get_entropy();
+		stack.cycles = random_get_entropy();
 	}
 
 	del_timer_sync(&stack.timer);
 	destroy_timer_on_stack(&stack.timer);
-	mix_pool_bytes(&stack.now, sizeof(stack.now));
+	mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
 }
 
 
-- 
GitLab


From c2a7de4feb6e09f23af7accc0f882a8fa92e7ae5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 13 Feb 2022 18:25:07 +0100
Subject: [PATCH 0834/1586] random: do crng pre-init loading in worker rather
 than irq

Taking spinlocks from IRQ context is generally problematic for
PREEMPT_RT. That is, in part, why we take trylocks instead. However, a
spin_try_lock() is also problematic since another spin_lock() invocation
can potentially PI-boost the wrong task, as the spin_try_lock() is
invoked from an IRQ-context, so the task on CPU (random task or idle) is
not the actual owner.

Additionally, by deferring the crng pre-init loading to the worker, we
can use the cryptographic hash function rather than xor, which is
perhaps a meaningful difference when considering this data has only been
through the relatively weak fast_mix() function.

The biggest downside of this approach is that the pre-init loading is
now deferred until later, which means things that need random numbers
after interrupts are enabled, but before workqueues are running -- or
before this particular worker manages to run -- are going to get into
trouble. Hopefully in the real world, this window is rather small,
especially since this code won't run until 64 interrupts had occurred.

Cc: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 65 +++++++++++++------------------------------
 1 file changed, 19 insertions(+), 46 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3b83a6ac9f596..8171c3bbf460f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -443,10 +443,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
  * boot time when it's better to have something there rather than
  * nothing.
  *
- * There are two paths, a slow one and a fast one. The slow one
- * hashes the input along with the current key. The fast one simply
- * xors it in, and should only be used from interrupt context.
- *
  * If account is set, then the crng_init_cnt counter is incremented.
  * This shouldn't be set by functions like add_device_randomness(),
  * where we can't trust the buffer passed to it is guaranteed to be
@@ -455,19 +451,15 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
  * Returns the number of bytes processed from input, which is bounded
  * by CRNG_INIT_CNT_THRESH if account is true.
  */
-static size_t crng_pre_init_inject(const void *input, size_t len,
-				   bool fast, bool account)
+static size_t crng_pre_init_inject(const void *input, size_t len, bool account)
 {
 	static int crng_init_cnt = 0;
+	struct blake2s_state hash;
 	unsigned long flags;
 
-	if (fast) {
-		if (!spin_trylock_irqsave(&base_crng.lock, flags))
-			return 0;
-	} else {
-		spin_lock_irqsave(&base_crng.lock, flags);
-	}
+	blake2s_init(&hash, sizeof(base_crng.key));
 
+	spin_lock_irqsave(&base_crng.lock, flags);
 	if (crng_init != 0) {
 		spin_unlock_irqrestore(&base_crng.lock, flags);
 		return 0;
@@ -476,21 +468,9 @@ static size_t crng_pre_init_inject(const void *input, size_t len,
 	if (account)
 		len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
 
-	if (fast) {
-		const u8 *src = input;
-		size_t i;
-
-		for (i = 0; i < len; ++i)
-			base_crng.key[(crng_init_cnt + i) %
-				      sizeof(base_crng.key)] ^= src[i];
-	} else {
-		struct blake2s_state hash;
-
-		blake2s_init(&hash, sizeof(base_crng.key));
-		blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
-		blake2s_update(&hash, input, len);
-		blake2s_final(&hash, base_crng.key);
-	}
+	blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
+	blake2s_update(&hash, input, len);
+	blake2s_final(&hash, base_crng.key);
 
 	if (account) {
 		crng_init_cnt += len;
@@ -1034,7 +1014,7 @@ void add_device_randomness(const void *buf, size_t size)
 	unsigned long flags, now = jiffies;
 
 	if (crng_init == 0 && size)
-		crng_pre_init_inject(buf, size, false, false);
+		crng_pre_init_inject(buf, size, false);
 
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(&cycles, sizeof(cycles));
@@ -1155,7 +1135,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count,
 				size_t entropy)
 {
 	if (unlikely(crng_init == 0)) {
-		size_t ret = crng_pre_init_inject(buffer, count, false, true);
+		size_t ret = crng_pre_init_inject(buffer, count, true);
 		mix_pool_bytes(buffer, ret);
 		count -= ret;
 		buffer += ret;
@@ -1295,8 +1275,14 @@ static void mix_interrupt_randomness(struct work_struct *work)
 	fast_pool->last = jiffies;
 	local_irq_enable();
 
-	mix_pool_bytes(pool, sizeof(pool));
-	credit_entropy_bits(1);
+	if (unlikely(crng_init == 0)) {
+		crng_pre_init_inject(pool, sizeof(pool), true);
+		mix_pool_bytes(pool, sizeof(pool));
+	} else {
+		mix_pool_bytes(pool, sizeof(pool));
+		credit_entropy_bits(1);
+	}
+
 	memzero_explicit(pool, sizeof(pool));
 }
 
@@ -1329,24 +1315,11 @@ void add_interrupt_randomness(int irq)
 	fast_mix(fast_pool->pool32);
 	new_count = ++fast_pool->count;
 
-	if (unlikely(crng_init == 0)) {
-		if (new_count >= 64 &&
-		    crng_pre_init_inject(fast_pool->pool32, sizeof(fast_pool->pool32),
-					 true, true) > 0) {
-			fast_pool->count = 0;
-			fast_pool->last = now;
-			if (spin_trylock(&input_pool.lock)) {
-				_mix_pool_bytes(&fast_pool->pool32, sizeof(fast_pool->pool32));
-				spin_unlock(&input_pool.lock);
-			}
-		}
-		return;
-	}
-
 	if (new_count & MIX_INFLIGHT)
 		return;
 
-	if (new_count < 64 && !time_after(now, fast_pool->last + HZ))
+	if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) ||
+			       unlikely(crng_init == 0)))
 		return;
 
 	if (unlikely(!fast_pool->mix.func))
-- 
GitLab


From f16ed63e53c79070283d3c264de5309794272ae9 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Sun, 27 Feb 2022 22:59:56 +0000
Subject: [PATCH 0835/1586] spi: dt-bindings: renesas,rspi: Document RZ/V2L SoC

Add RSPI binding documentation for Renesas RZ/V2L SoC.

RSPI block is identical to one found on RZ/A, so no driver changes are
required. The fallback compatible string "renesas,rspi-rz" will be used
on RZ/V2L.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20220227225956.29570-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/renesas,rspi.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
index 76e6d9e52fc72..a902f0ca21983 100644
--- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -22,6 +22,7 @@ properties:
               - renesas,rspi-r7s72100  # RZ/A1H
               - renesas,rspi-r7s9210   # RZ/A2
               - renesas,r9a07g044-rspi # RZ/G2{L,LC}
+              - renesas,r9a07g054-rspi # RZ/V2L
           - const: renesas,rspi-rz     # RZ/A and RZ/G2{L,LC}
 
       - items:
@@ -124,6 +125,7 @@ allOf:
             enum:
               - renesas,qspi
               - renesas,r9a07g044-rspi
+              - renesas,r9a07g054-rspi
     then:
       required:
         - resets
-- 
GitLab


From e377a3e698fb56cb63f6bddbebe7da76dc37e316 Mon Sep 17 00:00:00 2001
From: Ondrej Valousek <ondrej.valousek.xm@renesas.com>
Date: Tue, 11 Jan 2022 13:08:42 +0100
Subject: [PATCH 0836/1586] nfsd: Add support for the birth time attribute

For filesystems that supports "btime" timestamp (i.e. most modern
filesystems do) we share it via kernel nfsd. Btime support for NFS
client has already been added by Trond recently.

Suggested-by: Bruce Fields <bfields@fieldses.org>
Signed-off-by: Ondrej Valousek <ondrej.valousek.xm@renesas.com>
[ cel: addressed some whitespace/checkpatch nits ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 10 ++++++++++
 fs/nfsd/nfsd.h    |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 714a3a3bd50c3..da92e7d2ab6a1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2854,6 +2854,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 	err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_nfserr;
+	if (!(stat.result_mask & STATX_BTIME))
+		/* underlying FS does not offer btime so we can't share it */
+		bmval1 &= ~FATTR4_WORD1_TIME_CREATE;
 	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
 			FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
@@ -3254,6 +3257,13 @@ out_acl:
 		p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
 		*p++ = cpu_to_be32(stat.mtime.tv_nsec);
 	}
+	if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
+			goto out_resource;
+		p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
+		*p++ = cpu_to_be32(stat.btime.tv_nsec);
+	}
 	if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
 		struct kstat parent_stat;
 		u64 ino = stat.ino;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3e5008b475ff0..4fc1fd639527a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -364,7 +364,7 @@ void		nfsd_lockd_shutdown(void);
  | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
  | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
  | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
- | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
+ | FATTR4_WORD1_TIME_DELTA      | FATTR4_WORD1_TIME_METADATA   | FATTR4_WORD1_TIME_CREATE      \
  | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
 
 #define NFSD4_SUPPORTED_ATTRS_WORD2 0
-- 
GitLab


From 378a6109dd142a678f629b740f558365150f60f9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 30 Sep 2021 19:19:57 -0400
Subject: [PATCH 0837/1586] NFSD: De-duplicate hash bucket indexing

Clean up: The details of finding the right hash bucket are exactly
the same in both nfsd_cache_lookup() and nfsd_cache_update().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfscache.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a4a69ab6ab280..f79790d367288 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,12 +84,6 @@ nfsd_hashsize(unsigned int limit)
 	return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
 }
 
-static u32
-nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
-{
-	return hash_32((__force u32)xid, nn->maskbits);
-}
-
 static struct svc_cacherep *
 nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
 			struct nfsd_net *nn)
@@ -241,6 +235,14 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
 	list_move_tail(&rp->c_lru, &b->lru_head);
 }
 
+static noinline struct nfsd_drc_bucket *
+nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
+{
+	unsigned int hash = hash_32((__force u32)xid, nn->maskbits);
+
+	return &nn->drc_hashtbl[hash];
+}
+
 static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
 			 unsigned int max)
 {
@@ -421,10 +423,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct svc_cacherep	*rp, *found;
-	__be32			xid = rqstp->rq_xid;
 	__wsum			csum;
-	u32 hash = nfsd_cache_hash(xid, nn);
-	struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
+	struct nfsd_drc_bucket	*b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
 	int type = rqstp->rq_cachetype;
 	int rtn = RC_DOIT;
 
@@ -528,7 +528,6 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct svc_cacherep *rp = rqstp->rq_cacherep;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
-	u32		hash;
 	struct nfsd_drc_bucket *b;
 	int		len;
 	size_t		bufsize = 0;
@@ -536,8 +535,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 	if (!rp)
 		return;
 
-	hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
-	b = &nn->drc_hashtbl[hash];
+	b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn);
 
 	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 	len >>= 2;
-- 
GitLab


From 0f29ce32fbc56cfdb304eec8a4deb920ccfd89c3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 28 Sep 2021 11:39:02 -0400
Subject: [PATCH 0838/1586] NFSD: Skip extra computation for RC_NOCACHE case

Force the compiler to skip unneeded initialization for cases that
don't need those values. For example, NFSv4 COMPOUND operations are
RC_NOCACHE.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfscache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index f79790d367288..34087a7e4f93c 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -421,10 +421,10 @@ out:
  */
 int nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd_net		*nn;
 	struct svc_cacherep	*rp, *found;
 	__wsum			csum;
-	struct nfsd_drc_bucket	*b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
+	struct nfsd_drc_bucket	*b;
 	int type = rqstp->rq_cachetype;
 	int rtn = RC_DOIT;
 
@@ -440,10 +440,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 	 * Since the common case is a cache miss followed by an insert,
 	 * preallocate an entry.
 	 */
+	nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
 	if (!rp)
 		goto out;
 
+	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
 	spin_lock(&b->cache_lock);
 	found = nfsd_cache_insert(b, rp, nn);
 	if (found != rp) {
-- 
GitLab


From add1511c38166cf1036765f8c4aa939f0275a799 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 28 Sep 2021 11:40:59 -0400
Subject: [PATCH 0839/1586] NFSD: Streamline the rare "found" case

Move a rarely called function call site out of the hot path.

This is an exceptionally small improvement because the compiler
inlines most of the functions that nfsd_cache_lookup() calls.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfscache.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 34087a7e4f93c..0b3f12aa37ff5 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -448,11 +448,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
 	spin_lock(&b->cache_lock);
 	found = nfsd_cache_insert(b, rp, nn);
-	if (found != rp) {
-		nfsd_reply_cache_free_locked(NULL, rp, nn);
-		rp = found;
+	if (found != rp)
 		goto found_entry;
-	}
 
 	nfsd_stats_rc_misses_inc();
 	rqstp->rq_cacherep = rp;
@@ -470,8 +467,10 @@ out:
 
 found_entry:
 	/* We found a matching entry which is either in progress or done. */
+	nfsd_reply_cache_free_locked(NULL, rp, nn);
 	nfsd_stats_rc_hits_inc();
 	rtn = RC_DROPIT;
+	rp = found;
 
 	/* Request being processed */
 	if (rp->c_state == RC_INPROG)
-- 
GitLab


From d07c9ad622474616e94572e59e725c2c4a494fb4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Jan 2022 12:43:55 -0500
Subject: [PATCH 0840/1586] tracing: Introduce helpers to safely handle
 dynamic-sized sockaddrs

Enable a struct sockaddr to be stored in a trace record as a
dynamically-sized field. The common cases are AF_INET and AF_INET6
which are different sizes, and are vastly smaller than a struct
sockaddr_storage.

These are safer because, when used properly, the size of the
sockaddr destination field in each trace record is now guaranteed
to be the same as the source address that is being copied into it.

Link: https://lore.kernel.org/all/164182978641.8391.8277203495236105391.stgit@bazille.1015granger.net/
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/bpf_probe.h    |  6 ++++
 include/trace/perf.h         |  6 ++++
 include/trace/trace_events.h | 55 ++++++++++++++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 7660a7846586c..6a13220d2d27b 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -21,6 +21,9 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
 #undef __get_rel_dynamic_array
 #define __get_rel_dynamic_array(field)	\
 		((void *)(&__entry->__rel_loc_##field) +	\
@@ -37,6 +40,9 @@
 #undef __get_rel_bitmask
 #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
 
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
 #undef __perf_count
 #define __perf_count(c)	(c)
 
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 5d48c46a30083..5800d13146c3d 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -21,6 +21,9 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_sockaddr
+#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
+
 #undef __get_rel_dynamic_array
 #define __get_rel_dynamic_array(field)	\
 		((void *)__entry +					\
@@ -38,6 +41,9 @@
 #undef __get_rel_bitmask
 #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
 
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+
 #undef __perf_count
 #define __perf_count(c)	(__count = (c))
 
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 3d29919045af2..7c86cc541c7a6 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -108,6 +108,9 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
 
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
 #undef __rel_dynamic_array
 #define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item;
 
@@ -120,6 +123,9 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
 
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -212,11 +218,14 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __string_len
+#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
-#undef __string_len
-#define __string_len(item, src, len) __dynamic_array(char, item, -1)
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
 #undef __rel_dynamic_array
 #define __rel_dynamic_array(type, item, len)	u32 item;
@@ -230,6 +239,9 @@ TRACE_MAKE_SYSTEM_STR();
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct trace_event_data_offsets_##call {			\
@@ -349,6 +361,12 @@ TRACE_MAKE_SYSTEM_STR();
 		trace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
 	})
 
+#undef __get_sockaddr
+#define __get_sockaddr(field)	((struct sockaddr *)__get_dynamic_array(field))
+
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field)	((struct sockaddr *)__get_rel_dynamic_array(field))
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -518,6 +536,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = {	\
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
 #undef __rel_dynamic_array
 #define __rel_dynamic_array(_type, _item, _len) {			\
 	.type = "__rel_loc " #_type "[]", .name = #_item,		\
@@ -533,6 +554,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = {	\
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static struct trace_event_fields trace_event_fields_##call[] = {	\
@@ -624,6 +648,12 @@ static struct trace_event_fields trace_event_fields_##call[] = {	\
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item,	\
 					 __bitmask_size_in_longs(nr_bits))
 
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int trace_event_get_offsets_##call(		\
@@ -788,6 +818,15 @@ static inline notrace int trace_event_get_offsets_##call(		\
 #define __assign_bitmask(dst, src, nr_bits)					\
 	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
 
+#undef __sockaddr
+#define __sockaddr(field, len) __dynamic_array(u8, field, len)
+
+#undef __get_sockaddr
+#define __get_sockaddr(field)	((struct sockaddr *)__get_dynamic_array(field))
+
+#define __assign_sockaddr(dest, src, len)					\
+	memcpy(__get_dynamic_array(dest), src, len)
+
 #undef __rel_dynamic_array
 #define __rel_dynamic_array(type, item, len)				\
 	__entry->__rel_loc_##item = __data_offsets.item;
@@ -819,6 +858,16 @@ static inline notrace int trace_event_get_offsets_##call(		\
 #define __assign_rel_bitmask(dst, src, nr_bits)					\
 	memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
 
+#undef __rel_sockaddr
+#define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
+
+#undef __get_rel_sockaddr
+#define __get_rel_sockaddr(field)	((struct sockaddr *)__get_rel_dynamic_array(field))
+
+#define __assign_rel_sockaddr(dest, src, len)					\
+	memcpy(__get_rel_dynamic_array(dest), src, len)
+
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -883,10 +932,12 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __get_dynamic_array_len
 #undef __get_str
 #undef __get_bitmask
+#undef __get_sockaddr
 #undef __get_rel_dynamic_array
 #undef __get_rel_dynamic_array_len
 #undef __get_rel_str
 #undef __get_rel_bitmask
+#undef __get_rel_sockaddr
 #undef __print_array
 #undef __print_hex_dump
 
-- 
GitLab


From c6ced22997ad56a05377221bded7bb30973a62f2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 25 Jan 2022 10:44:08 -0500
Subject: [PATCH 0841/1586] tracing: Update print fmt check to handle new
 __get_sockaddr() macro

A helper macro was added to make reading socket addresses easier in trace
events. It pairs %pISpc with __get_sockaddr() that reads the socket
address from the ring buffer into a human readable format.

The boot up check that makes sure that trace events do not reference
pointers to memory that can later be freed when the trace event is read,
incorrectly flagged this as a delayed reference. Update the check to
handle "__get_sockaddr" and not report an error on it.

Link: https://lore.kernel.org/all/20220125160505.068dbb52@canb.auug.org.au/

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 kernel/trace/trace_events.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3147614c1812a..f527ae807e77d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -384,6 +384,12 @@ static void test_event_printk(struct trace_event_call *call)
 			if (!(dereference_flags & (1ULL << arg)))
 				goto next_arg;
 
+			/* Check for __get_sockaddr */;
+			if (str_has_prefix(fmt + i, "__get_sockaddr(")) {
+				dereference_flags &= ~(1ULL << arg);
+				goto next_arg;
+			}
+
 			/* Find the REC-> in the argument */
 			c = strchr(fmt + i, ',');
 			r = strstr(fmt + i, "REC->");
-- 
GitLab


From 9db0e15fb32b7418608d9d45011837a049ec4a28 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 11 Jan 2022 12:44:55 -0500
Subject: [PATCH 0842/1586] NFSD: Use __sockaddr field to store socket
 addresses

As an example usage of the new __sockaddr field, convert some NFSD
trace points to use it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 79 ++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 5889db66409df..f0d2ccb633a68 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -613,20 +613,21 @@ TRACE_EVENT(nfsd_clid_cred_mismatch,
 		__field(u32, cl_id)
 		__field(unsigned long, cl_flavor)
 		__field(unsigned long, new_flavor)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
 	),
 	TP_fast_assign(
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
 		__entry->cl_flavor = clp->cl_cred.cr_flavor;
 		__entry->new_flavor = rqstp->rq_cred.cr_flavor;
-		memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+				  rqstp->rq_xprt->xpt_remotelen);
 	),
 	TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc",
 		__entry->cl_boot, __entry->cl_id,
 		show_nfsd_authflavor(__entry->cl_flavor),
-		show_nfsd_authflavor(__entry->new_flavor), __entry->addr
+		show_nfsd_authflavor(__entry->new_flavor),
+		__get_sockaddr(addr)
 	)
 )
 
@@ -642,7 +643,7 @@ TRACE_EVENT(nfsd_clid_verf_mismatch,
 		__field(u32, cl_id)
 		__array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE)
 		__array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
 	),
 	TP_fast_assign(
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -651,14 +652,14 @@ TRACE_EVENT(nfsd_clid_verf_mismatch,
 		       NFS4_VERIFIER_SIZE);
 		memcpy(__entry->new_verifier, (void *)verf,
 		       NFS4_VERIFIER_SIZE);
-		memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+				  rqstp->rq_xprt->xpt_remotelen);
 	),
 	TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc",
 		__entry->cl_boot, __entry->cl_id,
 		__print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE),
 		__print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE),
-		__entry->addr
+		__get_sockaddr(addr)
 	)
 );
 
@@ -908,18 +909,17 @@ TRACE_EVENT(nfsd_cb_args,
 		__field(u32, cl_id)
 		__field(u32, prog)
 		__field(u32, ident)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, conn->cb_addrlen)
 	),
 	TP_fast_assign(
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
 		__entry->prog = conn->cb_prog;
 		__entry->ident = conn->cb_ident;
-		memcpy(__entry->addr, &conn->cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &conn->cb_addr, conn->cb_addrlen);
 	),
 	TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		__entry->prog, __entry->ident)
 );
 
@@ -951,17 +951,17 @@ DECLARE_EVENT_CLASS(nfsd_cb_class,
 		__field(unsigned long, state)
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_fast_assign(
 		__entry->state = clp->cl_cb_state;
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x state=%s",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		show_cb_state(__entry->state))
 );
 
@@ -1001,7 +1001,7 @@ TRACE_EVENT(nfsd_cb_setup,
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
 		__field(unsigned long, authflavor)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
 		__array(unsigned char, netid, 8)
 	),
 	TP_fast_assign(
@@ -1009,11 +1009,11 @@ TRACE_EVENT(nfsd_cb_setup,
 		__entry->cl_id = clp->cl_clientid.cl_id;
 		strlcpy(__entry->netid, netid, sizeof(__entry->netid));
 		__entry->authflavor = authflavor;
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		__entry->netid, show_nfsd_authflavor(__entry->authflavor))
 );
 
@@ -1027,30 +1027,32 @@ TRACE_EVENT(nfsd_cb_setup_err,
 		__field(long, error)
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_fast_assign(
 		__entry->error = error;
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x error=%ld",
-		__entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error)
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+		__entry->error)
 );
 
-TRACE_EVENT(nfsd_cb_recall,
+TRACE_EVENT_CONDITION(nfsd_cb_recall,
 	TP_PROTO(
 		const struct nfs4_stid *stid
 	),
 	TP_ARGS(stid),
+	TP_CONDITION(stid->sc_client),
 	TP_STRUCT__entry(
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
 		__field(u32, si_id)
 		__field(u32, si_generation)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, stid->sc_client->cl_cb_conn.cb_addrlen)
 	),
 	TP_fast_assign(
 		const stateid_t *stp = &stid->sc_stateid;
@@ -1060,14 +1062,11 @@ TRACE_EVENT(nfsd_cb_recall,
 		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
 		__entry->si_id = stp->si_opaque.so_id;
 		__entry->si_generation = stp->si_generation;
-		if (clp)
-			memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-				sizeof(struct sockaddr_in6));
-		else
-			memset(__entry->addr, 0, sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		__entry->si_id, __entry->si_generation)
 );
 
@@ -1081,7 +1080,7 @@ TRACE_EVENT(nfsd_cb_notify_lock,
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
 		__field(u32, fh_hash)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, lo->lo_owner.so_client->cl_cb_conn.cb_addrlen)
 	),
 	TP_fast_assign(
 		const struct nfs4_client *clp = lo->lo_owner.so_client;
@@ -1089,11 +1088,11 @@ TRACE_EVENT(nfsd_cb_notify_lock,
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
 		__entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh);
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		__entry->fh_hash)
 );
 
@@ -1114,7 +1113,7 @@ TRACE_EVENT(nfsd_cb_offload,
 		__field(u32, fh_hash)
 		__field(int, status)
 		__field(u64, count)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_fast_assign(
 		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
@@ -1124,11 +1123,11 @@ TRACE_EVENT(nfsd_cb_offload,
 		__entry->fh_hash = knfsd_fh_hash(fh);
 		__entry->status = be32_to_cpu(status);
 		__entry->count = count;
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
+		__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+				  clp->cl_cb_conn.cb_addrlen)
 	),
 	TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
 		__entry->si_id, __entry->si_generation,
 		__entry->fh_hash, __entry->count, __entry->status)
 );
-- 
GitLab


From c1a3f2ce66c80cd9f2a4376fa35a5c8d05441c73 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Oct 2021 14:53:30 -0400
Subject: [PATCH 0843/1586] NFSD: Remove NFSD_PROC_ARGS_* macros

Clean up.

The PROC_ARGS macros were added when I thought that NFSD tracepoints
would be reporting endpoint information. However, tracepoints in the
RPC server now report transport endpoint information, so in general
there's no need for the upper layers to do that any more, and these
macros can be retired.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index f0d2ccb633a68..242fa123e0e94 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -13,22 +13,6 @@
 #include "export.h"
 #include "nfsfh.h"
 
-#define NFSD_TRACE_PROC_ARG_FIELDS \
-		__field(unsigned int, netns_ino) \
-		__field(u32, xid) \
-		__array(unsigned char, server, sizeof(struct sockaddr_in6)) \
-		__array(unsigned char, client, sizeof(struct sockaddr_in6))
-
-#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \
-		do { \
-			__entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
-			__entry->xid = be32_to_cpu(rqstp->rq_xid); \
-			memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
-			       rqstp->rq_xprt->xpt_locallen); \
-			memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
-			       rqstp->rq_xprt->xpt_remotelen); \
-		} while (0);
-
 #define NFSD_TRACE_PROC_RES_FIELDS \
 		__field(unsigned int, netns_ino) \
 		__field(u32, xid) \
@@ -53,16 +37,22 @@ DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
 	),
 	TP_ARGS(rqstp),
 	TP_STRUCT__entry(
-		NFSD_TRACE_PROC_ARG_FIELDS
-
+		__field(unsigned int, netns_ino)
+		__field(u32, xid)
 		__field(u32, vers)
 		__field(u32, proc)
+		__sockaddr(server, rqstp->rq_xprt->xpt_locallen)
+		__sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
 	),
 	TP_fast_assign(
-		NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+		const struct svc_xprt *xprt = rqstp->rq_xprt;
 
+		__entry->netns_ino = xprt->xpt_net->ns.inum;
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
 		__entry->vers = rqstp->rq_vers;
 		__entry->proc = rqstp->rq_proc;
+		__assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen);
+		__assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen);
 	),
 	TP_printk("xid=0x%08x vers=%u proc=%u",
 		__entry->xid, __entry->vers, __entry->proc
-- 
GitLab


From 26ce14e77a827fd73a650cffea4db7ddcc62ebc4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 24 Jan 2022 11:31:45 -0500
Subject: [PATCH 0844/1586] SUNRPC: Improve sockaddr handling in the
 svc_xprt_create_error trace point

Clean up: Use the new __sockaddr field to record the socket address.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 29982d60b68ab..39ed91dca5aba 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1774,18 +1774,18 @@ TRACE_EVENT(svc_xprt_create_err,
 		__field(long, error)
 		__string(program, program)
 		__string(protocol, protocol)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+		__sockaddr(addr, salen)
 	),
 
 	TP_fast_assign(
 		__entry->error = PTR_ERR(xprt);
 		__assign_str(program, program);
 		__assign_str(protocol, protocol);
-		memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr)));
+		__assign_sockaddr(addr, sap, salen);
 	),
 
 	TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
-		__entry->addr, __get_str(program), __get_str(protocol),
+		__get_sockaddr(addr), __get_str(program), __get_str(protocol),
 		__entry->error)
 );
 
-- 
GitLab


From aca3ed791553f1f9f994273a12b30c35b40f2769 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 19 Oct 2021 17:07:44 -0400
Subject: [PATCH 0845/1586] SUNRPC: Same as SVC_RQST_ENDPOINT, but without the
 xid

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 112 +++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 41 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 39ed91dca5aba..84f5853196953 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1789,50 +1789,99 @@ TRACE_EVENT(svc_xprt_create_err,
 		__entry->error)
 );
 
+#define SVC_XPRT_ENDPOINT_FIELDS(x) \
+		__sockaddr(server, (x)->xpt_locallen) \
+		__sockaddr(client, (x)->xpt_remotelen) \
+		__field(unsigned long, flags) \
+		__field(unsigned int, netns_ino)
+
+#define SVC_XPRT_ENDPOINT_ASSIGNMENTS(x) \
+		do { \
+			__assign_sockaddr(server, &(x)->xpt_local, \
+					  (x)->xpt_locallen); \
+			__assign_sockaddr(client, &(x)->xpt_remote, \
+					  (x)->xpt_remotelen); \
+			__entry->flags = (x)->xpt_flags; \
+			__entry->netns_ino = (x)->xpt_net->ns.inum; \
+		} while (0)
+
+#define SVC_XPRT_ENDPOINT_FORMAT \
+		"server=%pISpc client=%pISpc flags=%s"
+
+#define SVC_XPRT_ENDPOINT_VARARGS \
+		__get_sockaddr(server), __get_sockaddr(client), \
+		show_svc_xprt_flags(__entry->flags)
+
 TRACE_EVENT(svc_xprt_enqueue,
-	TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
+	TP_PROTO(
+		const struct svc_xprt *xprt,
+		const struct svc_rqst *rqst
+	),
 
 	TP_ARGS(xprt, rqst),
 
 	TP_STRUCT__entry(
+		SVC_XPRT_ENDPOINT_FIELDS(xprt)
+
 		__field(int, pid)
-		__field(unsigned long, flags)
-		__string(addr, xprt->xpt_remotebuf)
 	),
 
 	TP_fast_assign(
+		SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
+
 		__entry->pid = rqst? rqst->rq_task->pid : 0;
-		__entry->flags = xprt->xpt_flags;
-		__assign_str(addr, xprt->xpt_remotebuf);
 	),
 
-	TP_printk("addr=%s pid=%d flags=%s", __get_str(addr),
-		__entry->pid, show_svc_xprt_flags(__entry->flags))
+	TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d",
+		SVC_XPRT_ENDPOINT_VARARGS, __entry->pid)
+);
+
+TRACE_EVENT(svc_xprt_dequeue,
+	TP_PROTO(
+		const struct svc_rqst *rqst
+	),
+
+	TP_ARGS(rqst),
+
+	TP_STRUCT__entry(
+		SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt)
+
+		__field(unsigned long, wakeup)
+	),
+
+	TP_fast_assign(
+		SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt);
+
+		__entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
+							rqst->rq_qtime));
+	),
+
+	TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu",
+		SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup)
 );
 
 DECLARE_EVENT_CLASS(svc_xprt_event,
-	TP_PROTO(struct svc_xprt *xprt),
+	TP_PROTO(
+		const struct svc_xprt *xprt
+	),
 
 	TP_ARGS(xprt),
 
 	TP_STRUCT__entry(
-		__field(unsigned long, flags)
-		__string(addr, xprt->xpt_remotebuf)
+		SVC_XPRT_ENDPOINT_FIELDS(xprt)
 	),
 
 	TP_fast_assign(
-		__entry->flags = xprt->xpt_flags;
-		__assign_str(addr, xprt->xpt_remotebuf);
+		SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
 	),
 
-	TP_printk("addr=%s flags=%s", __get_str(addr),
-		show_svc_xprt_flags(__entry->flags))
+	TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS)
 );
 
 #define DEFINE_SVC_XPRT_EVENT(name) \
 	DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \
 			TP_PROTO( \
-				struct svc_xprt *xprt \
+				const struct svc_xprt *xprt \
 			), \
 			TP_ARGS(xprt))
 
@@ -1850,44 +1899,25 @@ TRACE_EVENT(svc_xprt_accept,
 	TP_ARGS(xprt, service),
 
 	TP_STRUCT__entry(
-		__string(addr, xprt->xpt_remotebuf)
+		SVC_XPRT_ENDPOINT_FIELDS(xprt)
+
 		__string(protocol, xprt->xpt_class->xcl_name)
 		__string(service, service)
 	),
 
 	TP_fast_assign(
-		__assign_str(addr, xprt->xpt_remotebuf);
+		SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
+
 		__assign_str(protocol, xprt->xpt_class->xcl_name);
 		__assign_str(service, service);
 	),
 
-	TP_printk("addr=%s protocol=%s service=%s",
-		__get_str(addr), __get_str(protocol), __get_str(service)
+	TP_printk(SVC_XPRT_ENDPOINT_FORMAT " protocol=%s service=%s",
+		SVC_XPRT_ENDPOINT_VARARGS,
+		__get_str(protocol), __get_str(service)
 	)
 );
 
-TRACE_EVENT(svc_xprt_dequeue,
-	TP_PROTO(struct svc_rqst *rqst),
-
-	TP_ARGS(rqst),
-
-	TP_STRUCT__entry(
-		__field(unsigned long, flags)
-		__field(unsigned long, wakeup)
-		__string(addr, rqst->rq_xprt->xpt_remotebuf)
-	),
-
-	TP_fast_assign(
-		__entry->flags = rqst->rq_xprt->xpt_flags;
-		__entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
-							rqst->rq_qtime));
-		__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
-	),
-
-	TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr),
-		show_svc_xprt_flags(__entry->flags), __entry->wakeup)
-);
-
 TRACE_EVENT(svc_wake_up,
 	TP_PROTO(int pid),
 
-- 
GitLab


From 70a60cbfb613d8f6ffd1d9ade187d0a868066500 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 19 Oct 2021 13:47:24 -0400
Subject: [PATCH 0846/1586] SUNRPC: Record endpoint information in trace log

To make server-side trace events more useful in container-ized
environments, capture not just the remote's IP address, but the
local IP address and network namespace as well.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 126 +++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 48 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 84f5853196953..ab8ae1f6ba84d 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1625,26 +1625,53 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
 		{ SVC_PENDING,	"SVC_PENDING" },	\
 		{ SVC_COMPLETE,	"SVC_COMPLETE" })
 
+#define SVC_RQST_ENDPOINT_FIELDS(r) \
+		__sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+		__sockaddr(client, (r)->rq_xprt->xpt_remotelen) \
+		__field(unsigned int, netns_ino) \
+		__field(u32, xid)
+
+#define SVC_RQST_ENDPOINT_ASSIGNMENTS(r) \
+		do { \
+			struct svc_xprt *xprt = (r)->rq_xprt; \
+			__assign_sockaddr(server, &xprt->xpt_local, \
+					  xprt->xpt_locallen); \
+			__assign_sockaddr(client, &xprt->xpt_remote, \
+					  xprt->xpt_remotelen); \
+			__entry->netns_ino = xprt->xpt_net->ns.inum; \
+			__entry->xid = be32_to_cpu((r)->rq_xid); \
+		} while (0)
+
+#define SVC_RQST_ENDPOINT_FORMAT \
+		"xid=0x%08x server=%pISpc client=%pISpc"
+
+#define SVC_RQST_ENDPOINT_VARARGS \
+		__entry->xid, __get_sockaddr(server), __get_sockaddr(client)
+
 TRACE_EVENT(svc_authenticate,
 	TP_PROTO(const struct svc_rqst *rqst, int auth_res),
 
 	TP_ARGS(rqst, auth_res),
 
 	TP_STRUCT__entry(
-		__field(u32, xid)
+		SVC_RQST_ENDPOINT_FIELDS(rqst)
+
 		__field(unsigned long, svc_status)
 		__field(unsigned long, auth_stat)
 	),
 
 	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
 		__entry->svc_status = auth_res;
 		__entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat);
 	),
 
-	TP_printk("xid=0x%08x auth_res=%s auth_stat=%s",
-			__entry->xid, svc_show_status(__entry->svc_status),
-			rpc_show_auth_stat(__entry->auth_stat))
+	TP_printk(SVC_RQST_ENDPOINT_FORMAT
+		" auth_res=%s auth_stat=%s",
+		SVC_RQST_ENDPOINT_VARARGS,
+		svc_show_status(__entry->svc_status),
+		rpc_show_auth_stat(__entry->auth_stat))
 );
 
 TRACE_EVENT(svc_process,
@@ -1680,7 +1707,6 @@ TRACE_EVENT(svc_process,
 );
 
 DECLARE_EVENT_CLASS(svc_rqst_event,
-
 	TP_PROTO(
 		const struct svc_rqst *rqst
 	),
@@ -1688,20 +1714,20 @@ DECLARE_EVENT_CLASS(svc_rqst_event,
 	TP_ARGS(rqst),
 
 	TP_STRUCT__entry(
-		__field(u32, xid)
+		SVC_RQST_ENDPOINT_FIELDS(rqst)
+
 		__field(unsigned long, flags)
-		__string(addr, rqst->rq_xprt->xpt_remotebuf)
 	),
 
 	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
 		__entry->flags = rqst->rq_flags;
-		__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
 	),
 
-	TP_printk("addr=%s xid=0x%08x flags=%s",
-			__get_str(addr), __entry->xid,
-			show_rqstp_flags(__entry->flags))
+	TP_printk(SVC_RQST_ENDPOINT_FORMAT " flags=%s",
+		SVC_RQST_ENDPOINT_VARARGS,
+		show_rqstp_flags(__entry->flags))
 );
 #define DEFINE_SVC_RQST_EVENT(name) \
 	DEFINE_EVENT(svc_rqst_event, svc_##name, \
@@ -1714,34 +1740,63 @@ DEFINE_SVC_RQST_EVENT(defer);
 DEFINE_SVC_RQST_EVENT(drop);
 
 DECLARE_EVENT_CLASS(svc_rqst_status,
-
-	TP_PROTO(struct svc_rqst *rqst, int status),
+	TP_PROTO(
+		const struct svc_rqst *rqst,
+		int status
+	),
 
 	TP_ARGS(rqst, status),
 
 	TP_STRUCT__entry(
-		__field(u32, xid)
+		SVC_RQST_ENDPOINT_FIELDS(rqst)
+
 		__field(int, status)
 		__field(unsigned long, flags)
-		__string(addr, rqst->rq_xprt->xpt_remotebuf)
 	),
 
 	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
 		__entry->status = status;
 		__entry->flags = rqst->rq_flags;
-		__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
 	),
 
-	TP_printk("addr=%s xid=0x%08x status=%d flags=%s",
-		  __get_str(addr), __entry->xid,
-		  __entry->status, show_rqstp_flags(__entry->flags))
+	TP_printk(SVC_RQST_ENDPOINT_FORMAT " status=%d flags=%s",
+		SVC_RQST_ENDPOINT_VARARGS,
+		__entry->status, show_rqstp_flags(__entry->flags))
 );
 
 DEFINE_EVENT(svc_rqst_status, svc_send,
-	TP_PROTO(struct svc_rqst *rqst, int status),
+	TP_PROTO(const struct svc_rqst *rqst, int status),
 	TP_ARGS(rqst, status));
 
+TRACE_EVENT(svc_stats_latency,
+	TP_PROTO(
+		const struct svc_rqst *rqst
+	),
+
+	TP_ARGS(rqst),
+
+	TP_STRUCT__entry(
+		SVC_RQST_ENDPOINT_FIELDS(rqst)
+
+		__field(unsigned long, execute)
+		__string(procedure, svc_proc_name(rqst))
+	),
+
+	TP_fast_assign(
+		SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
+		__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
+							 rqst->rq_stime));
+		__assign_str(procedure, svc_proc_name(rqst));
+	),
+
+	TP_printk(SVC_RQST_ENDPOINT_FORMAT " proc=%s execute-us=%lu",
+		SVC_RQST_ENDPOINT_VARARGS,
+		__get_str(procedure), __entry->execute)
+);
+
 #define show_svc_xprt_flags(flags)					\
 	__print_flags(flags, "|",					\
 		{ (1UL << XPT_BUSY),		"XPT_BUSY"},		\
@@ -1952,31 +2007,6 @@ TRACE_EVENT(svc_alloc_arg_err,
 	TP_printk("pages=%u", __entry->pages)
 );
 
-TRACE_EVENT(svc_stats_latency,
-	TP_PROTO(const struct svc_rqst *rqst),
-
-	TP_ARGS(rqst),
-
-	TP_STRUCT__entry(
-		__field(u32, xid)
-		__field(unsigned long, execute)
-		__string(procedure, svc_proc_name(rqst))
-		__string(addr, rqst->rq_xprt->xpt_remotebuf)
-	),
-
-	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqst->rq_xid);
-		__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
-							 rqst->rq_stime));
-		__assign_str(procedure, svc_proc_name(rqst));
-		__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
-	),
-
-	TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu",
-		__get_str(addr), __entry->xid, __get_str(procedure),
-		__entry->execute)
-);
-
 DECLARE_EVENT_CLASS(svc_deferred_event,
 	TP_PROTO(
 		const struct svc_deferred_req *dr
-- 
GitLab


From a9ff2e99e9fa501ec965da03c18a5422b37a2f44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 25 Jan 2022 10:17:59 -0500
Subject: [PATCH 0847/1586] SUNRPC: Remove the .svo_enqueue_xprt method

We have never been able to track down and address the underlying
cause of the performance issues with workqueue-based service
support. svo_enqueue_xprt is called multiple times per RPC, so
it adds instruction path length, but always ends up at the same
function: svc_xprt_do_enqueue(). We do not anticipate needing
this flexibility for dynamic nfsd thread management support.

As a micro-optimization, remove .svo_enqueue_xprt because
Spectre/Meltdown makes virtual function calls more costly.

This change essentially reverts commit b9e13cdfac70 ("nfsd/sunrpc:
turn enqueueing a svc_xprt into a svc_serv operation").

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c                  |  1 -
 fs/nfs/callback.c               |  2 --
 fs/nfsd/nfssvc.c                |  1 -
 include/linux/sunrpc/svc.h      |  3 ---
 include/linux/sunrpc/svc_xprt.h |  1 -
 net/sunrpc/svc_xprt.c           | 10 +++++-----
 6 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 0475c5a5d061e..3a05af8736259 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -353,7 +353,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
 static const struct svc_serv_ops lockd_sv_ops = {
 	.svo_shutdown		= svc_rpcb_cleanup,
 	.svo_function		= lockd,
-	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_module		= THIS_MODULE,
 };
 
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 054cc1255fac6..7a810f8850632 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -234,13 +234,11 @@ err_bind:
 
 static const struct svc_serv_ops nfs40_cb_sv_ops = {
 	.svo_function		= nfs4_callback_svc,
-	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_module		= THIS_MODULE,
 };
 #if defined(CONFIG_NFS_V4_1)
 static const struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_function		= nfs41_callback_svc,
-	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_module		= THIS_MODULE,
 };
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b8c682b62d299..aeeac6de1f0ac 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -615,7 +615,6 @@ static int nfsd_get_default_max_blksize(void)
 static const struct svc_serv_ops nfsd_thread_sv_ops = {
 	.svo_shutdown		= nfsd_last_thread,
 	.svo_function		= nfsd,
-	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
 	.svo_module		= THIS_MODULE,
 };
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f35c22b3355ff..6ef9c1cafd0b2 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -61,9 +61,6 @@ struct svc_serv_ops {
 	/* function for service threads to run */
 	int		(*svo_function)(void *);
 
-	/* queue up a transport for servicing */
-	void		(*svo_enqueue_xprt)(struct svc_xprt *);
-
 	/* optional module to count when adding threads.
 	 * Thread function must call module_put_and_kthread_exit() to exit.
 	 */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 571f605bc91ef..a3ba027fb4ba7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -131,7 +131,6 @@ int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
 			const int, const unsigned short, int,
 			const struct cred *);
 void	svc_xprt_received(struct svc_xprt *xprt);
-void	svc_xprt_do_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b21ad79941474..9fce4f7774bb9 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -32,6 +32,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(struct timer_list *t);
 static void svc_delete_xprt(struct svc_xprt *xprt);
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -266,12 +267,12 @@ void svc_xprt_received(struct svc_xprt *xprt)
 	}
 
 	/* As soon as we clear busy, the xprt could be closed and
-	 * 'put', so we need a reference to call svc_enqueue_xprt with:
+	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
 	 */
 	svc_xprt_get(xprt);
 	smp_mb__before_atomic();
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+	svc_xprt_do_enqueue(xprt);
 	svc_xprt_put(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_received);
@@ -423,7 +424,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	return false;
 }
 
-void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 {
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp = NULL;
@@ -467,7 +468,6 @@ out_unlock:
 	put_cpu();
 	trace_svc_xprt_enqueue(xprt, rqstp);
 }
-EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
 
 /*
  * Queue up a transport with data pending. If there are idle nfsd
@@ -478,7 +478,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 {
 	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
 		return;
-	xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+	svc_xprt_do_enqueue(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
-- 
GitLab


From c0219c499799c1e92bd570c15a47e6257a27bb15 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 25 Jan 2022 17:57:23 -0500
Subject: [PATCH 0848/1586] SUNRPC: Merge svc_do_enqueue_xprt() into
 svc_enqueue_xprt()

Neil says:
"These functions were separated in commit 0971374e2818 ("SUNRPC:
Reduce contention in svc_xprt_enqueue()") so that the XPT_BUSY check
happened before taking any spinlocks.

We have since moved or removed the spinlocks so the extra test is
fairly pointless."

I've made this a separate patch in case the XPT_BUSY change has
unexpected consequences and needs to be reverted.

Suggested-by: Neil Brown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/svc_xprt.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9fce4f7774bb9..1c2295209d083 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -32,7 +32,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(struct timer_list *t);
 static void svc_delete_xprt(struct svc_xprt *xprt);
-static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -267,12 +266,12 @@ void svc_xprt_received(struct svc_xprt *xprt)
 	}
 
 	/* As soon as we clear busy, the xprt could be closed and
-	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
+	 * 'put', so we need a reference to call svc_xprt_enqueue with:
 	 */
 	svc_xprt_get(xprt);
 	smp_mb__before_atomic();
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	svc_xprt_do_enqueue(xprt);
+	svc_xprt_enqueue(xprt);
 	svc_xprt_put(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_received);
@@ -412,6 +411,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	smp_rmb();
 	xpt_flags = READ_ONCE(xprt->xpt_flags);
 
+	if (xpt_flags & BIT(XPT_BUSY))
+		return false;
 	if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
 		return true;
 	if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
@@ -424,7 +425,12 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	return false;
 }
 
-static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+/**
+ * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
+ * @xprt: transport with data pending
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
 {
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp = NULL;
@@ -468,18 +474,6 @@ out_unlock:
 	put_cpu();
 	trace_svc_xprt_enqueue(xprt, rqstp);
 }
-
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
-{
-	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
-		return;
-	svc_xprt_do_enqueue(xprt);
-}
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
 /*
-- 
GitLab


From 87cdd8641c8a1ec6afd2468265e20840a57fd888 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 25 Jan 2022 13:49:29 -0500
Subject: [PATCH 0849/1586] SUNRPC: Remove svo_shutdown method

Clean up. Neil observed that "any code that calls svc_shutdown_net()
knows what the shutdown function should be, and so can call it
directly."

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: NeilBrown <neilb@suse.de>
---
 fs/lockd/svc.c             | 5 ++---
 fs/nfsd/nfssvc.c           | 2 +-
 include/linux/sunrpc/svc.h | 3 ---
 net/sunrpc/svc.c           | 3 ---
 4 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 3a05af8736259..f5b688a844aa5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -249,6 +249,7 @@ out_err:
 		printk(KERN_WARNING
 			"lockd_up: makesock failed, error=%d\n", err);
 	svc_shutdown_net(serv, net);
+	svc_rpcb_cleanup(serv, net);
 	return err;
 }
 
@@ -287,8 +288,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
 			cancel_delayed_work_sync(&ln->grace_period_end);
 			locks_end_grace(&ln->lockd_manager);
 			svc_shutdown_net(serv, net);
-			dprintk("%s: per-net data destroyed; net=%x\n",
-				__func__, net->ns.inum);
+			svc_rpcb_cleanup(serv, net);
 		}
 	} else {
 		pr_err("%s: no users! net=%x\n",
@@ -351,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
 #endif
 
 static const struct svc_serv_ops lockd_sv_ops = {
-	.svo_shutdown		= svc_rpcb_cleanup,
 	.svo_function		= lockd,
 	.svo_module		= THIS_MODULE,
 };
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index aeeac6de1f0ac..0c6b216e439ec 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -613,7 +613,6 @@ static int nfsd_get_default_max_blksize(void)
 }
 
 static const struct svc_serv_ops nfsd_thread_sv_ops = {
-	.svo_shutdown		= nfsd_last_thread,
 	.svo_function		= nfsd,
 	.svo_module		= THIS_MODULE,
 };
@@ -724,6 +723,7 @@ void nfsd_put(struct net *net)
 
 	if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
 		svc_shutdown_net(nn->nfsd_serv, net);
+		nfsd_last_thread(nn->nfsd_serv, net);
 		svc_destroy(&nn->nfsd_serv->sv_refcnt);
 		spin_lock(&nfsd_notifier_lock);
 		nn->nfsd_serv = NULL;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6ef9c1cafd0b2..63794d772eb33 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -55,9 +55,6 @@ struct svc_pool {
 struct svc_serv;
 
 struct svc_serv_ops {
-	/* Callback to use when last thread exits. */
-	void		(*svo_shutdown)(struct svc_serv *, struct net *);
-
 	/* function for service threads to run */
 	int		(*svo_function)(void *);
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2aabec2b4becc..74a75a22da9a5 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -539,9 +539,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
 void svc_shutdown_net(struct svc_serv *serv, struct net *net)
 {
 	svc_close_net(serv, net);
-
-	if (serv->sv_ops->svo_shutdown)
-		serv->sv_ops->svo_shutdown(serv, net);
 }
 EXPORT_SYMBOL_GPL(svc_shutdown_net);
 
-- 
GitLab


From 352ad31448fecc78a2e9b78da64eea5d63b8d0ce Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 26 Jan 2022 11:42:08 -0500
Subject: [PATCH 0850/1586] SUNRPC: Rename svc_create_xprt()

Clean up: Use the "svc_xprt_<task>" function naming convention as
is used for other external APIs.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c                  |  4 ++--
 fs/nfs/callback.c               | 12 ++++++------
 fs/nfsd/nfsctl.c                |  8 ++++----
 fs/nfsd/nfssvc.c                |  8 ++++----
 include/linux/sunrpc/svc_xprt.h |  7 ++++---
 net/sunrpc/svc_xprt.c           | 24 +++++++++++++++++++-----
 6 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f5b688a844aa5..bba6f2b45b64a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -197,8 +197,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
 
 	xprt = svc_find_xprt(serv, name, net, family, 0);
 	if (xprt == NULL)
-		return svc_create_xprt(serv, name, net, family, port,
-						SVC_SOCK_DEFAULTS, cred);
+		return svc_xprt_create(serv, name, net, family, port,
+				       SVC_SOCK_DEFAULTS, cred);
 	svc_xprt_put(xprt);
 	return 0;
 }
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7a810f8850632..c1a8767100ae9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -45,18 +45,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
 	int ret;
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 
-	ret = svc_create_xprt(serv, "tcp", net, PF_INET,
-				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
-				cred);
+	ret = svc_xprt_create(serv, "tcp", net, PF_INET,
+			      nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+			      cred);
 	if (ret <= 0)
 		goto out_err;
 	nn->nfs_callback_tcpport = ret;
 	dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
 		nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
 
-	ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
-				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
-				cred);
+	ret = svc_xprt_create(serv, "tcp", net, PF_INET6,
+			      nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+			      cred);
 	if (ret > 0) {
 		nn->nfs_callback_tcpport6 = ret;
 		dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 68b020f2002b7..8fec779994f7b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -772,13 +772,13 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
 	if (err != 0)
 		return err;
 
-	err = svc_create_xprt(nn->nfsd_serv, transport, net,
-				PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
+	err = svc_xprt_create(nn->nfsd_serv, transport, net,
+			      PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_create_xprt(nn->nfsd_serv, transport, net,
-				PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
+	err = svc_xprt_create(nn->nfsd_serv, transport, net,
+			      PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0c6b216e439ec..ae25b7b3af99e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -293,13 +293,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
 	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
-					SVC_SOCK_DEFAULTS, cred);
+	error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
+				SVC_SOCK_DEFAULTS, cred);
 	if (error < 0)
 		return error;
 
-	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
-					SVC_SOCK_DEFAULTS, cred);
+	error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
+				SVC_SOCK_DEFAULTS, cred);
 	if (error < 0)
 		return error;
 
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a3ba027fb4ba7..a7f6f17c3dc5e 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -127,9 +127,10 @@ int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
-int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
-			const int, const unsigned short, int,
-			const struct cred *);
+int	svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+			struct net *net, const int family,
+			const unsigned short port, int flags,
+			const struct cred *cred);
 void	svc_xprt_received(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1c2295209d083..44be7193cd9b4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -285,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
 	svc_xprt_received(new);
 }
 
-static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 			    struct net *net, const int family,
 			    const unsigned short port, int flags,
 			    const struct cred *cred)
@@ -321,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 	return -EPROTONOSUPPORT;
 }
 
-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+/**
+ * svc_xprt_create - Add a new listener to @serv
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @family: network address family
+ * @port: listener port
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return values:
+ *   %0: New listener added successfully
+ *   %-EPROTONOSUPPORT: Requested transport type not supported
+ */
+int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 		    struct net *net, const int family,
 		    const unsigned short port, int flags,
 		    const struct cred *cred)
 {
 	int err;
 
-	err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+	err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
 	if (err == -EPROTONOSUPPORT) {
 		request_module("svc%s", xprt_name);
-		err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+		err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
 	}
 	return err;
 }
-EXPORT_SYMBOL_GPL(svc_create_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_create);
 
 /*
  * Copy the local and remote xprt addresses to the rqstp structure
-- 
GitLab


From 4355d767a21b9445958fc11bce9a9701f76529d3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 31 Jan 2022 13:34:29 -0500
Subject: [PATCH 0851/1586] SUNRPC: Rename svc_close_xprt()

Clean up: Use the "svc_xprt_<task>" function naming convention as
is used for other external APIs.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsctl.c                           | 2 +-
 include/linux/sunrpc/svc_xprt.h            | 2 +-
 net/sunrpc/svc.c                           | 2 +-
 net/sunrpc/svc_xprt.c                      | 9 +++++++--
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +-
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8fec779994f7b..16920e4512bde 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -790,7 +790,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
 out_close:
 	xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
 	if (xprt != NULL) {
-		svc_close_xprt(xprt);
+		svc_xprt_close(xprt);
 		svc_xprt_put(xprt);
 	}
 out_err:
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a7f6f17c3dc5e..bf7d029fb48c0 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -135,7 +135,7 @@ void	svc_xprt_received(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
-void	svc_close_xprt(struct svc_xprt *xprt);
+void	svc_xprt_close(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 74a75a22da9a5..53efef3db3a9c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1352,7 +1352,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_authorise(rqstp);
 close_xprt:
 	if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
-		svc_close_xprt(rqstp->rq_xprt);
+		svc_xprt_close(rqstp->rq_xprt);
 	dprintk("svc: svc_process close\n");
 	return 0;
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 44be7193cd9b4..6809116c996a6 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1068,7 +1068,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 	svc_xprt_put(xprt);
 }
 
-void svc_close_xprt(struct svc_xprt *xprt)
+/**
+ * svc_xprt_close - Close a client connection
+ * @xprt: transport to disconnect
+ *
+ */
+void svc_xprt_close(struct svc_xprt *xprt)
 {
 	trace_svc_xprt_close(xprt);
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
@@ -1083,7 +1088,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
 	 */
 	svc_delete_xprt(xprt);
 }
-EXPORT_SYMBOL_GPL(svc_close_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_close);
 
 static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
 {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 16897fcb659c1..85c8cdda98b18 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
 
 	ret = rpcrdma_bc_send_request(rdma, rqst);
 	if (ret == -ENOTCONN)
-		svc_close_xprt(sxprt);
+		svc_xprt_close(sxprt);
 	return ret;
 }
 
-- 
GitLab


From c7d7ec8f043e53ad16e30f5ebb8b9df415ec0f2b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 26 Jan 2022 11:30:55 -0500
Subject: [PATCH 0852/1586] SUNRPC: Remove svc_shutdown_net()

Clean up: svc_shutdown_net() now does nothing but call
svc_close_net(). Replace all external call sites.

svc_close_net() is renamed to be the inverse of svc_xprt_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c                  | 4 ++--
 fs/nfs/callback.c               | 2 +-
 fs/nfsd/nfssvc.c                | 2 +-
 include/linux/sunrpc/svc.h      | 1 -
 include/linux/sunrpc/svc_xprt.h | 1 +
 net/sunrpc/svc.c                | 6 ------
 net/sunrpc/svc_xprt.c           | 9 +++++++--
 7 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index bba6f2b45b64a..c83ec4a375bc1 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -248,7 +248,7 @@ out_err:
 	if (warned++ == 0)
 		printk(KERN_WARNING
 			"lockd_up: makesock failed, error=%d\n", err);
-	svc_shutdown_net(serv, net);
+	svc_xprt_destroy_all(serv, net);
 	svc_rpcb_cleanup(serv, net);
 	return err;
 }
@@ -287,7 +287,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
 			nlm_shutdown_hosts_net(net);
 			cancel_delayed_work_sync(&ln->grace_period_end);
 			locks_end_grace(&ln->lockd_manager);
-			svc_shutdown_net(serv, net);
+			svc_xprt_destroy_all(serv, net);
 			svc_rpcb_cleanup(serv, net);
 		}
 	} else {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c1a8767100ae9..c98c68513590f 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -189,7 +189,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
 		return;
 
 	dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
-	svc_shutdown_net(serv, net);
+	svc_xprt_destroy_all(serv, net);
 }
 
 static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ae25b7b3af99e..b92d272f4ba6b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -722,7 +722,7 @@ void nfsd_put(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
-		svc_shutdown_net(nn->nfsd_serv, net);
+		svc_xprt_destroy_all(nn->nfsd_serv, net);
 		nfsd_last_thread(nn->nfsd_serv, net);
 		svc_destroy(&nn->nfsd_serv->sv_refcnt);
 		spin_lock(&nfsd_notifier_lock);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 63794d772eb33..5603158b2aa71 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -508,7 +508,6 @@ struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 			const struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
-void		   svc_shutdown_net(struct svc_serv *, struct net *);
 int		   svc_process(struct svc_rqst *);
 int		   bc_svc_process(struct svc_serv *, struct rpc_rqst *,
 			struct svc_rqst *);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index bf7d029fb48c0..42e113742429b 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -131,6 +131,7 @@ int	svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 			struct net *net, const int family,
 			const unsigned short port, int flags,
 			const struct cred *cred);
+void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
 void	svc_xprt_received(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 53efef3db3a9c..08d6847464529 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -536,12 +536,6 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(svc_create_pooled);
 
-void svc_shutdown_net(struct svc_serv *serv, struct net *net)
-{
-	svc_close_net(serv, net);
-}
-EXPORT_SYMBOL_GPL(svc_shutdown_net);
-
 /*
  * Destroy an RPC service. Should be called with appropriate locking to
  * protect sv_permsocks and sv_tempsocks.
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6809116c996a6..0c117d3bfda80 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1140,7 +1140,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
 	}
 }
 
-/*
+/**
+ * svc_xprt_destroy_all - Destroy transports associated with @serv
+ * @serv: RPC service to be shut down
+ * @net: target network namespace
+ *
  * Server threads may still be running (especially in the case where the
  * service is still running in other network namespaces).
  *
@@ -1152,7 +1156,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
  * threads, we may need to wait a little while and then check again to
  * see if they're done.
  */
-void svc_close_net(struct svc_serv *serv, struct net *net)
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
 {
 	int delay = 0;
 
@@ -1163,6 +1167,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
 		msleep(delay++);
 	}
 }
+EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
 
 /*
  * Handle defer and revisit of requests
-- 
GitLab


From f49169c97fceb21ad6a0aaf671c50b0f520f15a5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Feb 2022 12:31:09 -0500
Subject: [PATCH 0853/1586] NFSD: Remove svc_serv_ops::svo_module

struct svc_serv_ops is about to be removed.

Neil Brown says:
> I suspect svo_module can go as well - I don't think the thread is
> ever the thing that primarily keeps a module active.

A random sample of kthread_create() callers shows sunrpc is the only
one that manages module reference count in this way.

Suggested-by: Neil Brown <neilb@suse.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c             | 4 +---
 fs/nfs/callback.c          | 7 ++-----
 fs/nfs/nfs4state.c         | 1 -
 fs/nfsd/nfssvc.c           | 3 ---
 include/linux/sunrpc/svc.h | 5 -----
 kernel/module.c            | 2 +-
 net/sunrpc/svc.c           | 2 --
 7 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index c83ec4a375bc1..bfde31124f3af 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -184,8 +184,7 @@ lockd(void *vrqstp)
 	dprintk("lockd_down: service stopped\n");
 
 	svc_exit_thread(rqstp);
-
-	module_put_and_kthread_exit(0);
+	return 0;
 }
 
 static int create_lockd_listener(struct svc_serv *serv, const char *name,
@@ -352,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
 
 static const struct svc_serv_ops lockd_sv_ops = {
 	.svo_function		= lockd,
-	.svo_module		= THIS_MODULE,
 };
 
 static int lockd_get(void)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c98c68513590f..a494f9e7bd0a0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,7 +17,6 @@
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
-#include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
 
@@ -92,8 +91,8 @@ nfs4_callback_svc(void *vrqstp)
 			continue;
 		svc_process(rqstp);
 	}
+
 	svc_exit_thread(rqstp);
-	module_put_and_kthread_exit(0);
 	return 0;
 }
 
@@ -136,8 +135,8 @@ nfs41_callback_svc(void *vrqstp)
 			finish_wait(&serv->sv_cb_waitq, &wq);
 		}
 	}
+
 	svc_exit_thread(rqstp);
-	module_put_and_kthread_exit(0);
 	return 0;
 }
 
@@ -234,12 +233,10 @@ err_bind:
 
 static const struct svc_serv_ops nfs40_cb_sv_ops = {
 	.svo_function		= nfs4_callback_svc,
-	.svo_module		= THIS_MODULE,
 };
 #if defined(CONFIG_NFS_V4_1)
 static const struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_function		= nfs41_callback_svc,
-	.svo_module		= THIS_MODULE,
 };
 
 static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f5a62c0d999b4..02a899e4390fa 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2697,6 +2697,5 @@ static int nfs4_run_state_manager(void *ptr)
 	allow_signal(SIGKILL);
 	nfs4_state_manager(clp);
 	nfs_put_client(clp);
-	module_put_and_kthread_exit(0);
 	return 0;
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b92d272f4ba6b..544187a8a22bb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -614,7 +614,6 @@ static int nfsd_get_default_max_blksize(void)
 
 static const struct svc_serv_ops nfsd_thread_sv_ops = {
 	.svo_function		= nfsd,
-	.svo_module		= THIS_MODULE,
 };
 
 void nfsd_shutdown_threads(struct net *net)
@@ -1018,8 +1017,6 @@ out:
 		msleep(20);
 	}
 
-	/* Release module */
-	module_put_and_kthread_exit(0);
 	return 0;
 }
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5603158b2aa71..dfc9283f412f9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -57,11 +57,6 @@ struct svc_serv;
 struct svc_serv_ops {
 	/* function for service threads to run */
 	int		(*svo_function)(void *);
-
-	/* optional module to count when adding threads.
-	 * Thread function must call module_put_and_kthread_exit() to exit.
-	 */
-	struct module	*svo_module;
 };
 
 /*
diff --git a/kernel/module.c b/kernel/module.c
index 46a5c2ed19285..6cea788fd965c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -335,7 +335,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
 
 /*
  * A thread that wants to hold a reference to a module only while it
- * is running can call this to safely exit.  nfsd and lockd use this.
+ * is running can call this to safely exit.
  */
 void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
 {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 08d6847464529..a90d555aa163a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -736,11 +736,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		if (IS_ERR(rqstp))
 			return PTR_ERR(rqstp);
 
-		__module_get(serv->sv_ops->svo_module);
 		task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
-			module_put(serv->sv_ops->svo_module);
 			svc_exit_thread(rqstp);
 			return PTR_ERR(task);
 		}
-- 
GitLab


From 37902c6313090235c847af89c5515591261ee338 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Feb 2022 12:16:27 -0500
Subject: [PATCH 0854/1586] NFSD: Move svc_serv_ops::svo_function into struct
 svc_serv

Hoist svo_function back into svc_serv and remove struct
svc_serv_ops, since the struct is now devoid of fields.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc.c             |  6 +-----
 fs/nfs/callback.c          | 43 ++++++++++----------------------------
 fs/nfsd/nfssvc.c           |  7 +------
 include/linux/sunrpc/svc.h | 14 ++++---------
 net/sunrpc/svc.c           | 37 ++++++++++++++++++++++----------
 5 files changed, 43 insertions(+), 64 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index bfde31124f3af..59ef8a1f843f3 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -349,10 +349,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
 };
 #endif
 
-static const struct svc_serv_ops lockd_sv_ops = {
-	.svo_function		= lockd,
-};
-
 static int lockd_get(void)
 {
 	struct svc_serv *serv;
@@ -376,7 +372,7 @@ static int lockd_get(void)
 		nlm_timeout = LOCKD_DFLT_TIMEO;
 	nlmsvc_timeout = nlm_timeout * HZ;
 
-	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
+	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd);
 	if (!serv) {
 		printk(KERN_WARNING "lockd_up: create service failed\n");
 		return -ENOMEM;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a494f9e7bd0a0..456af7d230cf1 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -231,29 +231,10 @@ err_bind:
 	return ret;
 }
 
-static const struct svc_serv_ops nfs40_cb_sv_ops = {
-	.svo_function		= nfs4_callback_svc,
-};
-#if defined(CONFIG_NFS_V4_1)
-static const struct svc_serv_ops nfs41_cb_sv_ops = {
-	.svo_function		= nfs41_callback_svc,
-};
-
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
-	[0] = &nfs40_cb_sv_ops,
-	[1] = &nfs41_cb_sv_ops,
-};
-#else
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
-	[0] = &nfs40_cb_sv_ops,
-	[1] = NULL,
-};
-#endif
-
 static struct svc_serv *nfs_callback_create_svc(int minorversion)
 {
 	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
-	const struct svc_serv_ops *sv_ops;
+	int (*threadfn)(void *data);
 	struct svc_serv *serv;
 
 	/*
@@ -262,17 +243,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 	if (cb_info->serv)
 		return svc_get(cb_info->serv);
 
-	switch (minorversion) {
-	case 0:
-		sv_ops = nfs4_cb_sv_ops[0];
-		break;
-	default:
-		sv_ops = nfs4_cb_sv_ops[1];
-	}
-
-	if (sv_ops == NULL)
-		return ERR_PTR(-ENOTSUPP);
-
 	/*
 	 * Sanity check: if there's no task,
 	 * we should be the first user ...
@@ -281,7 +251,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 		printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
 			cb_info->users);
 
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+	threadfn = nfs4_callback_svc;
+#if defined(CONFIG_NFS_V4_1)
+	if (minorversion)
+		threadfn = nfs41_callback_svc;
+#else
+	if (minorversion)
+		return ERR_PTR(-ENOTSUPP);
+#endif
+	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
+			  threadfn);
 	if (!serv) {
 		printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 544187a8a22bb..5abbe5d1c77ff 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -612,10 +612,6 @@ static int nfsd_get_default_max_blksize(void)
 	return ret;
 }
 
-static const struct svc_serv_ops nfsd_thread_sv_ops = {
-	.svo_function		= nfsd,
-};
-
 void nfsd_shutdown_threads(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -654,8 +650,7 @@ int nfsd_create_serv(struct net *net)
 	if (nfsd_max_blksize == 0)
 		nfsd_max_blksize = nfsd_get_default_max_blksize();
 	nfsd_reset_versions(nn);
-	serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-				 &nfsd_thread_sv_ops);
+	serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
 	if (serv == NULL)
 		return -ENOMEM;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index dfc9283f412f9..a5dda4987e8ba 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -52,13 +52,6 @@ struct svc_pool {
 	unsigned long		sp_flags;
 } ____cacheline_aligned_in_smp;
 
-struct svc_serv;
-
-struct svc_serv_ops {
-	/* function for service threads to run */
-	int		(*svo_function)(void *);
-};
-
 /*
  * RPC service.
  *
@@ -91,7 +84,8 @@ struct svc_serv {
 
 	unsigned int		sv_nrpools;	/* number of thread pools */
 	struct svc_pool *	sv_pools;	/* array of thread pools */
-	const struct svc_serv_ops *sv_ops;	/* server operations */
+	int			(*sv_threadfn)(void *data);
+
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
@@ -492,7 +486,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
-			    const struct svc_serv_ops *);
+			    int (*threadfn)(void *data));
 struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
 					struct svc_pool *pool, int node);
 void		   svc_rqst_replace_page(struct svc_rqst *rqstp,
@@ -500,7 +494,7 @@ void		   svc_rqst_replace_page(struct svc_rqst *rqstp,
 void		   svc_rqst_free(struct svc_rqst *);
 void		   svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-			const struct svc_serv_ops *);
+				     int (*threadfn)(void *data));
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 int		   svc_process(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a90d555aa163a..5570040175480 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -448,7 +448,7 @@ __svc_init_bc(struct svc_serv *serv)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	     const struct svc_serv_ops *ops)
+	     int (*threadfn)(void *data))
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -465,7 +465,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		bufsize = RPCSVC_MAXPAYLOAD;
 	serv->sv_max_payload = bufsize? bufsize : 4096;
 	serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
-	serv->sv_ops = ops;
+	serv->sv_threadfn = threadfn;
 	xdrsize = 0;
 	while (prog) {
 		prog->pg_lovers = prog->pg_nvers-1;
@@ -511,22 +511,37 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 	return serv;
 }
 
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize,
-	   const struct svc_serv_ops *ops)
+/**
+ * svc_create - Create an RPC service
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
+			    int (*threadfn)(void *data))
 {
-	return __svc_create(prog, bufsize, /*npools*/1, ops);
+	return __svc_create(prog, bufsize, 1, threadfn);
 }
 EXPORT_SYMBOL_GPL(svc_create);
 
-struct svc_serv *
-svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-		  const struct svc_serv_ops *ops)
+/**
+ * svc_create_pooled - Create an RPC service with pooled threads
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create_pooled(struct svc_program *prog,
+				   unsigned int bufsize,
+				   int (*threadfn)(void *data))
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
 
-	serv = __svc_create(prog, bufsize, npools, ops);
+	serv = __svc_create(prog, bufsize, npools, threadfn);
 	if (!serv)
 		goto out_err;
 	return serv;
@@ -736,7 +751,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		if (IS_ERR(rqstp))
 			return PTR_ERR(rqstp);
 
-		task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
+		task = kthread_create_on_node(serv->sv_threadfn, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
 			svc_exit_thread(rqstp);
-- 
GitLab


From 74aaf96feaca80285912cc6f19575b3e97177918 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Feb 2022 13:10:52 -0500
Subject: [PATCH 0855/1586] SUNRPC: Teach server to recognize RPC_AUTH_TLS

Initial support for the RPC_AUTH_TLS authentication flavor enables
NFSD to eventually accept an RPC_AUTH_TLS probe from clients. This
patch simply prevents NFSD from rejecting these probes completely.

In the meantime, graft this support in now so that RPC_AUTH_TLS
support keeps up with generic code and API changes in the RPC
server.

Down the road, server-side transport implementations will populate
xpo_start_tls when they can support RPC-with-TLS. For example, TCP
will eventually populate it, but RDMA won't.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/svcauth.c            |  2 ++
 net/sunrpc/svcauth_unix.c       | 60 +++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 42e113742429b..20068ccfd0cc0 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -28,6 +28,7 @@ struct svc_xprt_ops {
 	void		(*xpo_free)(struct svc_xprt *);
 	void		(*xpo_secure_port)(struct svc_rqst *rqstp);
 	void		(*xpo_kill_temp_xprt)(struct svc_xprt *);
+	void		(*xpo_start_tls)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5a8b8e03fdd42..e72ba2f13f6c6 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -31,10 +31,12 @@
  */
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
 
 static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
 	[RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
 	[RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
+	[RPC_AUTH_TLS]  = (struct auth_ops __force __rcu *)&svcauth_tls,
 };
 
 static struct auth_ops *
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d7ed7d49115ac..b1efc34db6ed8 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,6 +37,7 @@ struct unix_domain {
 
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
 
 static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
 {
@@ -788,6 +789,65 @@ struct auth_ops svcauth_null = {
 };
 
 
+static int
+svcauth_tls_accept(struct svc_rqst *rqstp)
+{
+	struct svc_cred	*cred = &rqstp->rq_cred;
+	struct kvec *argv = rqstp->rq_arg.head;
+	struct kvec *resv = rqstp->rq_res.head;
+
+	if (argv->iov_len < XDR_UNIT * 3)
+		return SVC_GARBAGE;
+
+	/* Call's cred length */
+	if (svc_getu32(argv) != xdr_zero) {
+		rqstp->rq_auth_stat = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+
+	/* Call's verifier flavor and its length */
+	if (svc_getu32(argv) != rpc_auth_null ||
+	    svc_getu32(argv) != xdr_zero) {
+		rqstp->rq_auth_stat = rpc_autherr_badverf;
+		return SVC_DENIED;
+	}
+
+	/* AUTH_TLS is not valid on non-NULL procedures */
+	if (rqstp->rq_proc != 0) {
+		rqstp->rq_auth_stat = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+
+	/* Mapping to nobody uid/gid is required */
+	cred->cr_uid = INVALID_UID;
+	cred->cr_gid = INVALID_GID;
+	cred->cr_group_info = groups_alloc(0);
+	if (cred->cr_group_info == NULL)
+		return SVC_CLOSE; /* kmalloc failure - client must retry */
+
+	/* Reply's verifier */
+	svc_putnl(resv, RPC_AUTH_NULL);
+	if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+		svc_putnl(resv, 8);
+		memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
+		resv->iov_len += 8;
+	} else
+		svc_putnl(resv, 0);
+
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
+	return SVC_OK;
+}
+
+struct auth_ops svcauth_tls = {
+	.name		= "tls",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_TLS,
+	.accept 	= svcauth_tls_accept,
+	.release	= svcauth_null_release,
+	.set_client	= svcauth_unix_set_client,
+};
+
+
 static int
 svcauth_unix_accept(struct svc_rqst *rqstp)
 {
-- 
GitLab


From 4d2eeafecd6c83b4444db3dc0ada201c89b1aa44 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 24 Feb 2022 18:17:05 +0200
Subject: [PATCH 0856/1586] nfsd: more robust allocation failure handling in
 nfsd_file_cache_init

The nfsd file cache table can be pretty large and its allocation
may require as many as 80 contigious pages.

Employ the same fix that was employed for similar issue that was
reported for the reply cache hash table allocation several years ago
by commit 8f97514b423a ("nfsd: more robust allocation failure handling
in nfsd_reply_cache_init").

Fixes: 65294c1f2c5e ("nfsd: add a new struct file caching facility to nfsd")
Link: https://lore.kernel.org/linux-nfs/e3cdaeec85a6cfec980e87fc294327c0381c1778.camel@kernel.org/
Suggested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/nfsd/filecache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 8bc807c5fea4c..cc2831cec6695 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -632,7 +632,7 @@ nfsd_file_cache_init(void)
 	if (!nfsd_filecache_wq)
 		goto out;
 
-	nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+	nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
 				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
 	if (!nfsd_file_hashtbl) {
 		pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
@@ -700,7 +700,7 @@ out_err:
 	nfsd_file_slab = NULL;
 	kmem_cache_destroy(nfsd_file_mark_slab);
 	nfsd_file_mark_slab = NULL;
-	kfree(nfsd_file_hashtbl);
+	kvfree(nfsd_file_hashtbl);
 	nfsd_file_hashtbl = NULL;
 	destroy_workqueue(nfsd_filecache_wq);
 	nfsd_filecache_wq = NULL;
@@ -811,7 +811,7 @@ nfsd_file_cache_shutdown(void)
 	fsnotify_wait_marks_destroyed();
 	kmem_cache_destroy(nfsd_file_mark_slab);
 	nfsd_file_mark_slab = NULL;
-	kfree(nfsd_file_hashtbl);
+	kvfree(nfsd_file_hashtbl);
 	nfsd_file_hashtbl = NULL;
 	destroy_workqueue(nfsd_filecache_wq);
 	nfsd_filecache_wq = NULL;
-- 
GitLab


From 70868c6b8fd80db585da57a264c50a69af8fd3c3 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 28 Feb 2022 10:56:41 +0800
Subject: [PATCH 0857/1586] docs: fix 'make htmldocs' warning in SCTP.rst

Fix following 'make htmldocs' warnings:
./Documentation/security/SCTP.rst:123: WARNING: Title underline too short.
security_sctp_assoc_established()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./Documentation/security/SCTP.rst:123: WARNING: Title underline too short.
security_sctp_assoc_established()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./Documentation/security/SCTP.rst:273: WARNING: Title underline too short.
security_sctp_assoc_established()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./Documentation/security/SCTP.rst:273: WARNING: Title underline too short.
security_sctp_assoc_established()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 5e50f5d4ff31 ("security: add sctp_assoc_established hook")
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 Documentation/security/SCTP.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index 406cc68b88087..b73eb764a0017 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -120,7 +120,7 @@ calls **sctp_peeloff**\(3).
 
 
 security_sctp_assoc_established()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received, and the peer secid will be
 saved into ``@asoc->peer_secid`` for client::
 
@@ -270,7 +270,7 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
 
 
 security_sctp_assoc_established()
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-- 
GitLab


From a5cd1ab7ab679d252a6d2f483eee7d45ebf2040c Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Mon, 28 Feb 2022 15:45:32 -0800
Subject: [PATCH 0858/1586] Fix incorrect type in assignment of ipv6 port for
 audit

Remove inappropriate use of ntohs() and assign the
port value directly.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_lsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 14b279cc75c96..6207762dbdb13 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2510,7 +2510,7 @@ static int smk_ipv6_check(struct smack_known *subject,
 #ifdef CONFIG_AUDIT
 	smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net);
 	ad.a.u.net->family = PF_INET6;
-	ad.a.u.net->dport = ntohs(address->sin6_port);
+	ad.a.u.net->dport = address->sin6_port;
 	if (act == SMK_RECEIVING)
 		ad.a.u.net->v6info.saddr = address->sin6_addr;
 	else
-- 
GitLab


From 4a48b4c428dc92b5e0b19de83e7eb8d530dddd48 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Wed, 23 Feb 2022 11:41:18 -0500
Subject: [PATCH 0859/1586] MAINTAINERS: add missing
 security/integrity/platform_certs

Define a new KEYS/KEYRINGS_INTEGRITY record so that any changes
to platform_certs/ are posted on the linux-integrity mailing list
as well.

Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fdf0420ba477e..6328cd4535bcf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10660,6 +10660,14 @@ F:	include/linux/keyctl.h
 F:	include/uapi/linux/keyctl.h
 F:	security/keys/
 
+KEYS/KEYRINGS_INTEGRITY
+M:	Jarkko Sakkinen <jarkko@kernel.org>
+M:	Mimi Zohar <zohar@linux.ibm.com>
+L:	linux-integrity@vger.kernel.org
+L:	keyrings@vger.kernel.org
+S:	Supported
+F:	security/integrity/platform_certs
+
 KFENCE
 M:	Alexander Potapenko <glider@google.com>
 M:	Marco Elver <elver@google.com>
-- 
GitLab


From 9feaf8b387ee0ece9c1d7add308776b502a35d0c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Feb 2022 20:18:51 -0800
Subject: [PATCH 0860/1586] efi: fix return value of __setup handlers

When "dump_apple_properties" is used on the kernel boot command line,
it causes an Unknown parameter message and the string is added to init's
argument strings:

  Unknown kernel command line parameters "dump_apple_properties
    BOOT_IMAGE=/boot/bzImage-517rc6 efivar_ssdt=newcpu_ssdt", will be
    passed to user space.

 Run /sbin/init as init process
   with arguments:
     /sbin/init
     dump_apple_properties
   with environment:
     HOME=/
     TERM=linux
     BOOT_IMAGE=/boot/bzImage-517rc6
     efivar_ssdt=newcpu_ssdt

Similarly when "efivar_ssdt=somestring" is used, it is added to the
Unknown parameter message and to init's environment strings, polluting
them (see examples above).

Change the return value of the __setup functions to 1 to indicate
that the __setup options have been handled.

Fixes: 58c5475aba67 ("x86/efi: Retrieve and assign Apple device properties")
Fixes: 475fb4e8b2f4 ("efi / ACPI: load SSTDs from EFI variables")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: linux-efi@vger.kernel.org
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Octavian Purdila <octavian.purdila@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Link: https://lore.kernel.org/r/20220301041851.12459-1-rdunlap@infradead.org
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/apple-properties.c | 2 +-
 drivers/firmware/efi/efi.c              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 4c3201e290e29..ea84108035eb0 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -24,7 +24,7 @@ static bool dump_properties __initdata;
 static int __init dump_properties_enable(char *arg)
 {
 	dump_properties = true;
-	return 0;
+	return 1;
 }
 
 __setup("dump_apple_properties", dump_properties_enable);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 7de3f5b6e8d0a..5502e176d51be 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str)
 		memcpy(efivar_ssdt, str, strlen(str));
 	else
 		pr_warn("efivar_ssdt: name too long: %s\n", str);
-	return 0;
+	return 1;
 }
 __setup("efivar_ssdt=", efivar_ssdt_setup);
 
-- 
GitLab


From 8d4c998919320206f8832dc413e23fdd27ef2274 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Feb 2022 10:12:22 +0000
Subject: [PATCH 0861/1586] irqchip/qcom-pdc: Kill PDC_NO_PARENT_IRQ

PDC_NO_PARENT_IRQ is pretty pointless, as all it indicates is
that the PDC terminates the interrupt hierarchy. Which is
exactly the same as not having a mapping in the GIC space.
This is also bad practice to treat the absence of a hwirq
as a hwirq itself.

Just explicitly use the region mapping pointer, and drop
the definition.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Maulik Shah <quic_mkshah@quicinc.com>
Link: https://lore.kernel.org/r/20220224101226.88373-2-maz@kernel.org
---
 drivers/irqchip/qcom-pdc.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 173e6520e06ec..3b214c4e6755a 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -30,14 +30,14 @@
 #define IRQ_ENABLE_BANK		0x10
 #define IRQ_i_CFG		0x110
 
-#define PDC_NO_PARENT_IRQ	~0UL
-
 struct pdc_pin_region {
 	u32 pin_base;
 	u32 parent_base;
 	u32 cnt;
 };
 
+#define pin_to_hwirq(r, p)	((r)->parent_base + (p) - (r)->pin_base)
+
 static DEFINE_RAW_SPINLOCK(pdc_lock);
 static void __iomem *pdc_base;
 static struct pdc_pin_region *pdc_region;
@@ -186,19 +186,17 @@ static struct irq_chip qcom_pdc_gic_chip = {
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 };
 
-static irq_hw_number_t get_parent_hwirq(int pin)
+static struct pdc_pin_region *get_pin_region(int pin)
 {
 	int i;
-	struct pdc_pin_region *region;
 
 	for (i = 0; i < pdc_region_cnt; i++) {
-		region = &pdc_region[i];
-		if (pin >= region->pin_base &&
-		    pin < region->pin_base + region->cnt)
-			return (region->parent_base + pin - region->pin_base);
+		if (pin >= pdc_region[i].pin_base &&
+		    pin < pdc_region[i].pin_base + pdc_region[i].cnt)
+			return &pdc_region[i];
 	}
 
-	return PDC_NO_PARENT_IRQ;
+	return NULL;
 }
 
 static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
@@ -221,7 +219,8 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 {
 	struct irq_fwspec *fwspec = data;
 	struct irq_fwspec parent_fwspec;
-	irq_hw_number_t hwirq, parent_hwirq;
+	struct pdc_pin_region *region;
+	irq_hw_number_t hwirq;
 	unsigned int type;
 	int ret;
 
@@ -234,8 +233,8 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 	if (ret)
 		return ret;
 
-	parent_hwirq = get_parent_hwirq(hwirq);
-	if (parent_hwirq == PDC_NO_PARENT_IRQ)
+	region = get_pin_region(hwirq);
+	if (!region)
 		return irq_domain_disconnect_hierarchy(domain->parent, virq);
 
 	if (type & IRQ_TYPE_EDGE_BOTH)
@@ -247,7 +246,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 	parent_fwspec.fwnode      = domain->parent->fwnode;
 	parent_fwspec.param_count = 3;
 	parent_fwspec.param[0]    = 0;
-	parent_fwspec.param[1]    = parent_hwirq;
+	parent_fwspec.param[1]    = pin_to_hwirq(region, hwirq);
 	parent_fwspec.param[2]    = type;
 
 	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
@@ -265,7 +264,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 {
 	struct irq_fwspec *fwspec = data;
 	struct irq_fwspec parent_fwspec;
-	irq_hw_number_t hwirq, parent_hwirq;
+	struct pdc_pin_region *region;
+	irq_hw_number_t hwirq;
 	unsigned int type;
 	int ret;
 
@@ -281,8 +281,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 	if (ret)
 		return ret;
 
-	parent_hwirq = get_parent_hwirq(hwirq);
-	if (parent_hwirq == PDC_NO_PARENT_IRQ)
+	region = get_pin_region(hwirq);
+	if (!region)
 		return irq_domain_disconnect_hierarchy(domain->parent, virq);
 
 	if (type & IRQ_TYPE_EDGE_BOTH)
@@ -294,7 +294,7 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 	parent_fwspec.fwnode      = domain->parent->fwnode;
 	parent_fwspec.param_count = 3;
 	parent_fwspec.param[0]    = 0;
-	parent_fwspec.param[1]    = parent_hwirq;
+	parent_fwspec.param[1]    = pin_to_hwirq(region, hwirq);
 	parent_fwspec.param[2]    = type;
 
 	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
-- 
GitLab


From 4dc70713dc24dceeea7f106828674744a6294860 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Feb 2022 10:12:23 +0000
Subject: [PATCH 0862/1586] irqchip/qcom-pdc: Kill non-wakeup irqdomain

A careful look at the way the PDC driver works shows that:

- all interrupts are in the same space
- all interrupts are treated the same

And yet the driver creates two domains based on whether
the interrupt gets mapped directly or from the pinctrl code,
which is obviously a waste of resources.

Kill the non-wakeup domain and unify all the interrupt handling.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220224101226.88373-3-maz@kernel.org
---
 drivers/irqchip/qcom-pdc.c | 84 +++++---------------------------------
 1 file changed, 10 insertions(+), 74 deletions(-)

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 3b214c4e6755a..5be531403f500 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -21,7 +21,6 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#define PDC_MAX_IRQS		168
 #define PDC_MAX_GPIO_IRQS	256
 
 #define CLEAR_INTR(reg, intr)	(reg & ~(1 << intr))
@@ -224,51 +223,6 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 	unsigned int type;
 	int ret;
 
-	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
-	if (ret)
-		return ret;
-
-	ret  = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					     &qcom_pdc_gic_chip, NULL);
-	if (ret)
-		return ret;
-
-	region = get_pin_region(hwirq);
-	if (!region)
-		return irq_domain_disconnect_hierarchy(domain->parent, virq);
-
-	if (type & IRQ_TYPE_EDGE_BOTH)
-		type = IRQ_TYPE_EDGE_RISING;
-
-	if (type & IRQ_TYPE_LEVEL_MASK)
-		type = IRQ_TYPE_LEVEL_HIGH;
-
-	parent_fwspec.fwnode      = domain->parent->fwnode;
-	parent_fwspec.param_count = 3;
-	parent_fwspec.param[0]    = 0;
-	parent_fwspec.param[1]    = pin_to_hwirq(region, hwirq);
-	parent_fwspec.param[2]    = type;
-
-	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
-					    &parent_fwspec);
-}
-
-static const struct irq_domain_ops qcom_pdc_ops = {
-	.translate	= qcom_pdc_translate,
-	.alloc		= qcom_pdc_alloc,
-	.free		= irq_domain_free_irqs_common,
-};
-
-static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
-			       unsigned int nr_irqs, void *data)
-{
-	struct irq_fwspec *fwspec = data;
-	struct irq_fwspec parent_fwspec;
-	struct pdc_pin_region *region;
-	irq_hw_number_t hwirq;
-	unsigned int type;
-	int ret;
-
 	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
 	if (ret)
 		return ret;
@@ -301,16 +255,9 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 					    &parent_fwspec);
 }
 
-static int qcom_pdc_gpio_domain_select(struct irq_domain *d,
-				       struct irq_fwspec *fwspec,
-				       enum irq_domain_bus_token bus_token)
-{
-	return bus_token == DOMAIN_BUS_WAKEUP;
-}
-
-static const struct irq_domain_ops qcom_pdc_gpio_ops = {
-	.select		= qcom_pdc_gpio_domain_select,
-	.alloc		= qcom_pdc_gpio_alloc,
+static const struct irq_domain_ops qcom_pdc_ops = {
+	.translate	= qcom_pdc_translate,
+	.alloc		= qcom_pdc_alloc,
 	.free		= irq_domain_free_irqs_common,
 };
 
@@ -361,7 +308,7 @@ static int pdc_setup_pin_mapping(struct device_node *np)
 
 static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 {
-	struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain;
+	struct irq_domain *parent_domain, *pdc_domain;
 	int ret;
 
 	pdc_base = of_iomap(node, 0);
@@ -383,32 +330,21 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 		goto fail;
 	}
 
-	pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
-						 of_fwnode_handle(node),
-						 &qcom_pdc_ops, NULL);
-	if (!pdc_domain) {
-		pr_err("%pOF: GIC domain add failed\n", node);
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
+	pdc_domain = irq_domain_create_hierarchy(parent_domain,
 					IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
 					PDC_MAX_GPIO_IRQS,
 					of_fwnode_handle(node),
-					&qcom_pdc_gpio_ops, NULL);
-	if (!pdc_gpio_domain) {
-		pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
+					&qcom_pdc_ops, NULL);
+	if (!pdc_domain) {
+		pr_err("%pOF: PDC domain add failed\n", node);
 		ret = -ENOMEM;
-		goto remove;
+		goto fail;
 	}
 
-	irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
+	irq_domain_update_bus_token(pdc_domain, DOMAIN_BUS_WAKEUP);
 
 	return 0;
 
-remove:
-	irq_domain_remove(pdc_domain);
 fail:
 	kfree(pdc_region);
 	iounmap(pdc_base);
-- 
GitLab


From d494d088ac44b9cf561362a7856fa20b656be64f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Feb 2022 10:12:24 +0000
Subject: [PATCH 0863/1586] irqchip/qcom-pdc: Kill qcom_pdc_translate helper

qcom_pdc_translate() really is nothing but an open coded version
of irq_domain_translate_twocell(). Get rid of it and use the common
version instead.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Maulik Shah <quic_mkshah@quicinc.com>
Link: https://lore.kernel.org/r/20220224101226.88373-4-maz@kernel.org
---
 drivers/irqchip/qcom-pdc.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 5be531403f500..837ca6998f6af 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -198,21 +198,6 @@ static struct pdc_pin_region *get_pin_region(int pin)
 	return NULL;
 }
 
-static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
-			      unsigned long *hwirq, unsigned int *type)
-{
-	if (is_of_node(fwspec->fwnode)) {
-		if (fwspec->param_count != 2)
-			return -EINVAL;
-
-		*hwirq = fwspec->param[0];
-		*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
 static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 			  unsigned int nr_irqs, void *data)
 {
@@ -223,7 +208,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 	unsigned int type;
 	int ret;
 
-	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
+	ret = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type);
 	if (ret)
 		return ret;
 
@@ -256,7 +241,7 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 }
 
 static const struct irq_domain_ops qcom_pdc_ops = {
-	.translate	= qcom_pdc_translate,
+	.translate	= irq_domain_translate_twocell,
 	.alloc		= qcom_pdc_alloc,
 	.free		= irq_domain_free_irqs_common,
 };
-- 
GitLab


From a6aca2f460e203781dc41391913cc5b54f4bc0ce Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Feb 2022 10:12:25 +0000
Subject: [PATCH 0864/1586] irqchip/qcom-pdc: Fix broken locking

pdc_enable_intr() serves as a primitive to qcom_pdc_gic_{en,dis}able,
and has a raw spinlock for mutual exclusion, which is uses with
interruptible primitives.

This means that this critical section can itself be interrupted.
Should the interrupt also be a PDC interrupt, and the endpoint driver
perform an irq_disable() on that interrupt, we end-up in a deadlock.

Fix this by using the irqsave/irqrestore variants of the locking
primitives.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Maulik Shah <quic_mkshah@quicinc.com>
Link: https://lore.kernel.org/r/20220224101226.88373-5-maz@kernel.org
---
 drivers/irqchip/qcom-pdc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 837ca6998f6af..0cd20ddfae2a1 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -55,17 +55,18 @@ static u32 pdc_reg_read(int reg, u32 i)
 static void pdc_enable_intr(struct irq_data *d, bool on)
 {
 	int pin_out = d->hwirq;
+	unsigned long flags;
 	u32 index, mask;
 	u32 enable;
 
 	index = pin_out / 32;
 	mask = pin_out % 32;
 
-	raw_spin_lock(&pdc_lock);
+	raw_spin_lock_irqsave(&pdc_lock, flags);
 	enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
 	enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
 	pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
-	raw_spin_unlock(&pdc_lock);
+	raw_spin_unlock_irqrestore(&pdc_lock, flags);
 }
 
 static void qcom_pdc_gic_disable(struct irq_data *d)
-- 
GitLab


From d2febf6bbec5466824432e3d8850fc49e4343572 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Feb 2022 10:12:26 +0000
Subject: [PATCH 0865/1586] irqchip/qcom-pdc: Drop open coded version of
 __assign_bit()

The driver uses what looks like an open-coded version of __assign_bit().
Replace it with the real thing.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Maulik Shah <quic_mkshah@quicinc.com>
Link: https://lore.kernel.org/r/20220224101226.88373-6-maz@kernel.org
---
 drivers/irqchip/qcom-pdc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 0cd20ddfae2a1..d96916cf6a414 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -23,9 +23,6 @@
 
 #define PDC_MAX_GPIO_IRQS	256
 
-#define CLEAR_INTR(reg, intr)	(reg & ~(1 << intr))
-#define ENABLE_INTR(reg, intr)	(reg | (1 << intr))
-
 #define IRQ_ENABLE_BANK		0x10
 #define IRQ_i_CFG		0x110
 
@@ -55,16 +52,16 @@ static u32 pdc_reg_read(int reg, u32 i)
 static void pdc_enable_intr(struct irq_data *d, bool on)
 {
 	int pin_out = d->hwirq;
+	unsigned long enable;
 	unsigned long flags;
 	u32 index, mask;
-	u32 enable;
 
 	index = pin_out / 32;
 	mask = pin_out % 32;
 
 	raw_spin_lock_irqsave(&pdc_lock, flags);
 	enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
-	enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
+	__assign_bit(mask, &enable, on);
 	pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
 	raw_spin_unlock_irqrestore(&pdc_lock, flags);
 }
-- 
GitLab


From e414c25e3399b2b3d7337dc47abccab5c71b7c8f Mon Sep 17 00:00:00 2001
From: "Souptick Joarder (HPE)" <jrdr.linux@gmail.com>
Date: Fri, 18 Feb 2022 22:03:03 +0530
Subject: [PATCH 0866/1586] irqchip/nvic: Release nvic_base upon failure

smatch warning was reported as below ->

smatch warnings:
drivers/irqchip/irq-nvic.c:131 nvic_of_init()
warn: 'nvic_base' not released on lines: 97.

Release nvic_base upon failure.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Souptick Joarder (HPE) <jrdr.linux@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220218163303.33344-1-jrdr.linux@gmail.com
---
 drivers/irqchip/irq-nvic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index ba4759b3e2693..94230306e0eee 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node,
 
 	if (!nvic_irq_domain) {
 		pr_warn("Failed to allocate irq domain\n");
+		iounmap(nvic_base);
 		return -ENOMEM;
 	}
 
@@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node,
 	if (ret) {
 		pr_warn("Failed to allocate irq chips\n");
 		irq_domain_remove(nvic_irq_domain);
+		iounmap(nvic_base);
 		return ret;
 	}
 
-- 
GitLab


From e7d90cfac5510f8c94baa18f9f3f7808859c8332 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 17 Feb 2022 13:49:50 +0100
Subject: [PATCH 0867/1586] PM: domains: Prevent power off for parent unless
 child is in deepest state

A PM domain managed by genpd may support multiple idlestates (power-off
states). During genpd_power_off() a genpd governor may be asked to select
one of the idlestates based upon the dev PM QoS constraints, for example.

However, there is a problem with the behaviour around this in genpd. More
precisely, a parent-domain is allowed to be powered off, no matter of what
idlestate that has been selected for the child-domain.

For the stm32mp1 platform from STMicro, this behaviour doesn't play well.
Instead, the parent-domain must not be powered off, unless the deepest
idlestate has been selected for the child-domain. As the current behaviour
in genpd is quite questionable anyway, let's simply change it into what is
needed by the stm32mp1 platform.

If it surprisingly turns out that other platforms may need a different
behaviour from genpd, then we will have to revisit this to find a way to
make it configurable.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5db704f02e712..c87588c217007 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 			atomic_read(&genpd->sd_count) > 0)
 		return -EBUSY;
 
+	/*
+	 * The children must be in their deepest (powered-off) states to allow
+	 * the parent to be powered off. Note that, there's no need for
+	 * additional locking, as powering on a child, requires the parent's
+	 * lock to be acquired first.
+	 */
+	list_for_each_entry(link, &genpd->parent_links, parent_node) {
+		struct generic_pm_domain *child = link->child;
+		if (child->state_idx < child->state_count - 1)
+			return -EBUSY;
+	}
+
 	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
 		enum pm_qos_flags_status stat;
 
@@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
 	    || atomic_read(&genpd->sd_count) > 0)
 		return;
 
+	/* Check that the children are in their deepest (powered-off) state. */
+	list_for_each_entry(link, &genpd->parent_links, parent_node) {
+		struct generic_pm_domain *child = link->child;
+		if (child->state_idx < child->state_count - 1)
+			return;
+	}
+
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
 	if (_genpd_power_off(genpd, false))
-- 
GitLab


From 9a6582b839281ee0e874621f1a2139d2aeb9489e Mon Sep 17 00:00:00 2001
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
Date: Wed, 23 Feb 2022 09:03:23 +0100
Subject: [PATCH 0868/1586] PM: domains: use dev_err_probe() to simplify error
 handling

dev_err_probe() can reduce code size, makes the code easier to read
and has the added benefit of recording the defer reason for later
read out. Use it where appropriate.

This also fixes an issue, where an error message in __genpd_dev_pm_attach
was not terminated by a line break.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c87588c217007..c0d9ad01b32c8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2267,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np,
 	/* Parse genpd OPP table */
 	if (genpd->set_performance_state) {
 		ret = dev_pm_opp_of_add_table(&genpd->dev);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
-					ret);
-			return ret;
-		}
+		if (ret)
+			return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n");
 
 		/*
 		 * Save table for faster processing while setting performance
@@ -2331,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 		if (genpd->set_performance_state) {
 			ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
 			if (ret) {
-				if (ret != -EPROBE_DEFER)
-					dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
-						i, ret);
+				dev_err_probe(&genpd->dev, ret,
+					      "Failed to add OPP table for index %d\n", i);
 				goto error;
 			}
 
@@ -2691,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
 	ret = genpd_add_device(pd, dev, base_dev);
 	mutex_unlock(&gpd_list_lock);
 
-	if (ret < 0) {
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "failed to add to PM domain %s: %d",
-				pd->name, ret);
-		return ret;
-	}
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name);
 
 	dev->pm_domain->detach = genpd_dev_pm_detach;
 	dev->pm_domain->sync = genpd_dev_pm_sync;
-- 
GitLab


From f6bfe8b5b2c2a5ac8bd2fc7bca3706e6c3fc26d8 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Fri, 25 Feb 2022 14:48:15 +0800
Subject: [PATCH 0869/1586] PM: domains: Fix sleep-in-atomic bug caused by
 genpd_debug_remove()

When a genpd with GENPD_FLAG_IRQ_SAFE gets removed, the following
sleep-in-atomic bug will be seen, as genpd_debug_remove() will be called
with a spinlock being held.

[    0.029183] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1460
[    0.029204] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0
[    0.029219] preempt_count: 1, expected: 0
[    0.029230] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4+ #489
[    0.029245] Hardware name: Thundercomm TurboX CM2290 (DT)
[    0.029256] Call trace:
[    0.029265]  dump_backtrace.part.0+0xbc/0xd0
[    0.029285]  show_stack+0x3c/0xa0
[    0.029298]  dump_stack_lvl+0x7c/0xa0
[    0.029311]  dump_stack+0x18/0x34
[    0.029323]  __might_resched+0x10c/0x13c
[    0.029338]  __might_sleep+0x4c/0x80
[    0.029351]  down_read+0x24/0xd0
[    0.029363]  lookup_one_len_unlocked+0x9c/0xcc
[    0.029379]  lookup_positive_unlocked+0x10/0x50
[    0.029392]  debugfs_lookup+0x68/0xac
[    0.029406]  genpd_remove.part.0+0x12c/0x1b4
[    0.029419]  of_genpd_remove_last+0xa8/0xd4
[    0.029434]  psci_cpuidle_domain_probe+0x174/0x53c
[    0.029449]  platform_probe+0x68/0xe0
[    0.029462]  really_probe+0x190/0x430
[    0.029473]  __driver_probe_device+0x90/0x18c
[    0.029485]  driver_probe_device+0x40/0xe0
[    0.029497]  __driver_attach+0xf4/0x1d0
[    0.029508]  bus_for_each_dev+0x70/0xd0
[    0.029523]  driver_attach+0x24/0x30
[    0.029534]  bus_add_driver+0x164/0x22c
[    0.029545]  driver_register+0x78/0x130
[    0.029556]  __platform_driver_register+0x28/0x34
[    0.029569]  psci_idle_init_domains+0x1c/0x28
[    0.029583]  do_one_initcall+0x50/0x1b0
[    0.029595]  kernel_init_freeable+0x214/0x280
[    0.029609]  kernel_init+0x2c/0x13c
[    0.029622]  ret_from_fork+0x10/0x20

It doesn't seem necessary to call genpd_debug_remove() with the lock, so
move it out from locking to fix the problem.

Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains")
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: 5.11+ <stable@vger.kernel.org> # 5.11+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c0d9ad01b32c8..1ee878d126fdf 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2077,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 		kfree(link);
 	}
 
-	genpd_debug_remove(genpd);
 	list_del(&genpd->gpd_list_node);
 	genpd_unlock(genpd);
+	genpd_debug_remove(genpd);
 	cancel_work_sync(&genpd->power_off_work);
 	if (genpd_is_cpu_domain(genpd))
 		free_cpumask_var(genpd->cpus);
-- 
GitLab


From 7dfe105dfc724c82ed3d79a4c47439c516a2410b Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 11 Feb 2022 08:10:27 -0800
Subject: [PATCH 0870/1586] PM: sleep: wakeup: Fix typos in comments

Remove the second 'the'.
Replace the second 'of' with 'the'.
Replace 'couter' with 'counter'.

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeirq.c | 2 +-
 drivers/base/power/wakeup.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 0004db4a9d3b9..d487a6bac630f 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
  *
  * Enables wakeirq conditionally. We need to enable wake-up interrupt
  * lazily on the first rpm_suspend(). This is needed as the consumer device
- * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would
+ * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
  * otherwise try to disable already disabled wakeirq. The wake-up interrupt
  * starts disabled with IRQ_NOAUTOEN set.
  *
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 8666590201c9a..a57d469676caa 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -587,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
  * @ws: Wakeup source to handle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
- * core of the event by incrementing the counter of of wakeup events being
+ * core of the event by incrementing the counter of the wakeup events being
  * processed.
  */
 static void wakeup_source_activate(struct wakeup_source *ws)
@@ -733,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 
 	/*
 	 * Increment the counter of registered wakeup events and decrement the
-	 * couter of wakeup events in progress simultaneously.
+	 * counter of wakeup events in progress simultaneously.
 	 */
 	cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
 	trace_wakeup_source_deactivate(ws->name, cec);
-- 
GitLab


From 444e1154b2bf0b881b65ba1bba5bc8e691fac04a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 15 Feb 2022 11:37:50 +0800
Subject: [PATCH 0871/1586] PM: hibernate: Clean up non-kernel-doc comments

Address the following W=1 kernel build warning:

kernel/power/swap.c:120: warning: This comment starts with '/**', but
isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/swap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index ad10359030a4c..c51f5507b34fc 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -89,7 +89,7 @@ struct swap_map_page_list {
 	struct swap_map_page_list *next;
 };
 
-/**
+/*
  *	The swap_map_handle structure is used for handling swap in
  *	a file-alike way
  */
@@ -117,7 +117,7 @@ struct swsusp_header {
 
 static struct swsusp_header *swsusp_header;
 
-/**
+/*
  *	The following functions are used for tracing the allocated
  *	swap pages, so that they can be freed in case of an error.
  */
@@ -171,7 +171,7 @@ static int swsusp_extents_insert(unsigned long swap_offset)
 	return 0;
 }
 
-/**
+/*
  *	alloc_swapdev_block - allocate a swap page and register that it has
  *	been allocated, so that it can be freed in case of an error.
  */
@@ -190,7 +190,7 @@ sector_t alloc_swapdev_block(int swap)
 	return 0;
 }
 
-/**
+/*
  *	free_all_swap_pages - free swap pages allocated for saving image data.
  *	It also frees the extents used to register which swap entries had been
  *	allocated.
-- 
GitLab


From a644161ba11df38e5582e718c99668e282ddbf36 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 28 Feb 2022 11:57:59 -0800
Subject: [PATCH 0872/1586] Documentation: admin-guide: pm: Document uncore
 frequency scaling

Added documentation to configure uncore frequency limits in Intel
Xeon processors.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Clean up the document wording ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../pm/intel_uncore_frequency_scaling.rst     | 60 +++++++++++++++++++
 .../admin-guide/pm/working-state.rst          |  1 +
 2 files changed, 61 insertions(+)
 create mode 100644 Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst

diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
new file mode 100644
index 0000000000000..09169d9358356
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================
+Intel Uncore Frequency Scaling
+==============================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+The uncore can consume significant amount of power in Intel's Xeon servers based
+on the workload characteristics. To optimize the total power and improve overall
+performance, SoCs have internal algorithms for scaling uncore frequency. These
+algorithms monitor workload usage of uncore and set a desirable frequency.
+
+It is possible that users have different expectations of uncore performance and
+want to have control over it. The objective is similar to allowing users to set
+the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
+Users may have some latency sensitive workloads where they do not want any
+change to uncore frequency. Also, users may have workloads which require
+different core and uncore performance at distinct phases and they may want to
+use both cpufreq and the uncore scaling interface to distribute power and
+improve overall performance.
+
+Sysfs Interface
+---------------
+
+To control uncore frequency, a sysfs interface is provided in the directory:
+`/sys/devices/system/cpu/intel_uncore_frequency/`.
+
+There is one directory for each package and die combination as the scope of
+uncore scaling control is per die in multiple die/package SoCs or per
+package for single die per package SoCs. The name represents the
+scope of control. For example: 'package_00_die_00' is for package id 0 and
+die 0.
+
+Each package_*_die_* contains the following attributes:
+
+``initial_max_freq_khz``
+	Out of reset, this attribute represent the maximum possible frequency.
+	This is a read-only attribute. If users adjust max_freq_khz,
+	they can always go back to maximum using the value from this attribute.
+
+``initial_min_freq_khz``
+	Out of reset, this attribute represent the minimum possible frequency.
+	This is a read-only attribute. If users adjust min_freq_khz,
+	they can always go back to minimum using the value from this attribute.
+
+``max_freq_khz``
+	This attribute is used to set the maximum uncore frequency.
+
+``min_freq_khz``
+	This attribute is used to set the minimum uncore frequency.
+
+``current_freq_khz``
+	This attribute is used to get the current uncore frequency.
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index 5d2757e2de656..ee45887811ffa 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -15,3 +15,4 @@ Working-State Power Management
    cpufreq_drivers
    intel_epb
    intel-speed-select
+   intel_uncore_frequency_scaling
-- 
GitLab


From ba7ffcd4c4da374b0f64666354eeeda7d3827131 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Feb 2022 14:05:32 -0800
Subject: [PATCH 0873/1586] PM: hibernate: fix __setup handler error handling

If an invalid value is used in "resumedelay=<seconds>", it is
silently ignored. Add a warning message and then let the __setup
handler return 1 to indicate that the kernel command line option
has been handled.

Fixes: 317cf7e5e85e3 ("PM / hibernate: convert simple_strtoul to kstrtoul")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 49d1df0218cb8..0ac805b753e58 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1330,7 +1330,7 @@ static int __init resumedelay_setup(char *str)
 	int rc = kstrtouint(str, 0, &resume_delay);
 
 	if (rc)
-		return rc;
+		pr_warn("resumedelay: bad option string '%s'\n", str);
 	return 1;
 }
 
-- 
GitLab


From 7a64ca17e4dd50d5f910769167f3553902777844 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Feb 2022 14:05:44 -0800
Subject: [PATCH 0874/1586] PM: suspend: fix return value of __setup handler

If an invalid option is given for "test_suspend=<option>", the entire
string is added to init's environment, so return 1 instead of 0 from
the __setup handler.

  Unknown kernel command line parameters "BOOT_IMAGE=/boot/bzImage-517rc5
    test_suspend=invalid"

and

 Run /sbin/init as init process
   with arguments:
     /sbin/init
   with environment:
     HOME=/
     TERM=linux
     BOOT_IMAGE=/boot/bzImage-517rc5
     test_suspend=invalid

Fixes: 2ce986892faf ("PM / sleep: Enhance test_suspend option with repeat capability")
Fixes: 27ddcc6596e5 ("PM / sleep: Add state field to pm_states[] entries")
Fixes: a9d7052363a6 ("PM: Separate suspend to RAM functionality from core")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/suspend_test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index d20526c5be15b..b663a97f5867a 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value)
 	value++;
 	suspend_type = strsep(&value, ",");
 	if (!suspend_type)
-		return 0;
+		return 1;
 
 	repeat = strsep(&value, ",");
 	if (repeat) {
 		if (kstrtou32(repeat, 0, &test_repeat_count_max))
-			return 0;
+			return 1;
 	}
 
 	for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
 		if (!strcmp(pm_labels[i], suspend_type)) {
 			test_state_label = pm_labels[i];
-			return 0;
+			return 1;
 		}
 
 	printk(warn_bad_state, suspend_type);
-	return 0;
+	return 1;
 }
 __setup("test_suspend", setup_test_suspend);
 
-- 
GitLab


From 5bf19d0aa3d2f22d4c050c030f436ab97b7e6f1e Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Tue, 1 Mar 2022 11:28:54 +0800
Subject: [PATCH 0875/1586] powercap: DTPM: dtpm_node_callback[] can be static

drivers/powercap/dtpm.c:525:22: warning: symbol 'dtpm_node_callback' was not declared. Should it be static?

Fixes: 3759ec678e89 ("powercap/drivers/dtpm: Add hierarchy creation")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/dtpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index ec931a06d90ae..41010dec069dc 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -470,7 +470,7 @@ static struct dtpm *dtpm_setup_dt(const struct dtpm_node *hierarchy,
 
 typedef struct dtpm * (*dtpm_node_callback_t)(const struct dtpm_node *, struct dtpm *);
 
-dtpm_node_callback_t dtpm_node_callback[] = {
+static dtpm_node_callback_t dtpm_node_callback[] = {
 	[DTPM_NODE_VIRTUAL] = dtpm_setup_virtual,
 	[DTPM_NODE_DT] = dtpm_setup_dt,
 };
-- 
GitLab


From 55ddcd9f3226b21c5b63d893712c85e85d73f4cb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Mar 2022 09:03:27 +0000
Subject: [PATCH 0876/1586] powercap: DTPM: Fix spelling mistake "initialze" ->
 "initialize"

There is a spelling mistake in a pr_info() message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/dtpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 41010dec069dc..ce920f17f45fc 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -596,7 +596,7 @@ int dtpm_create_hierarchy(struct of_device_id *dtpm_match_table)
 
 		ret = dtpm_subsys[i]->init();
 		if (ret)
-			pr_info("Failed to initialze '%s': %d",
+			pr_info("Failed to initialize '%s': %d",
 				dtpm_subsys[i]->name, ret);
 	}
 
-- 
GitLab


From 023bbde3db41078780f5d57e5354212f974c4bca Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 1 Mar 2022 15:49:32 +0100
Subject: [PATCH 0877/1586] pstore: Add prefix to ECC messages

The "No errors detected" message from the ECC code is shown at the end
of the pstore log and can be confusing or misleading, especially since
it usually appears just after a kernel crash log which normally means
quite the opposite of "no errors".  Prefix the message to clarify that
this message is only about ECC-detected errors.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220301144932.89549-1-vincent.whitchurch@axis.com
---
 fs/pstore/ram_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index fe5305028c6e2..a89e33719fcf2 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -263,10 +263,10 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
 
 	if (prz->corrected_bytes || prz->bad_blocks)
 		ret = snprintf(str, len, ""
-			"\n%d Corrected bytes, %d unrecoverable blocks\n",
+			"\nECC: %d Corrected bytes, %d unrecoverable blocks\n",
 			prz->corrected_bytes, prz->bad_blocks);
 	else
-		ret = snprintf(str, len, "\nNo errors detected\n");
+		ret = snprintf(str, len, "\nECC: No errors detected\n");
 
 	return ret;
 }
-- 
GitLab


From 2cbfae0f50f7a0f8fc9d552bd856f6cd7b7608b6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 24 Feb 2022 01:56:20 +0200
Subject: [PATCH 0878/1586] ACPI: platform: Constify properties parameter in
 acpi_create_platform_device()

Properties are not and should not be changed in the callee, hence constify
properties parameter in acpi_create_platform_device().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_platform.c | 2 +-
 include/linux/acpi.h         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 78d621290a351..de3cbf152deef 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -95,7 +95,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev,
  * Name of the platform device will be the same as @adev's.
  */
 struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
-					struct property_entry *properties)
+						    const struct property_entry *properties)
 {
 	struct platform_device *pdev = NULL;
 	struct platform_device_info pdevinfo;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6274758648e37..9ac545379447e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -691,7 +691,7 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 
 struct platform_device *acpi_create_platform_device(struct acpi_device *,
-						    struct property_entry *);
+						    const struct property_entry *);
 #define ACPI_PTR(_ptr)	(_ptr)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
@@ -930,7 +930,7 @@ static inline int acpi_device_modalias(struct device *dev,
 
 static inline struct platform_device *
 acpi_create_platform_device(struct acpi_device *adev,
-			    struct property_entry *properties)
+			    const struct property_entry *properties)
 {
 	return NULL;
 }
-- 
GitLab


From f167c1a13879a76efd622e8d857609108ef57162 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 24 Feb 2022 01:56:21 +0200
Subject: [PATCH 0879/1586] ACPI: LPSS: Constify properties member in struct
 lpss_device_desc

The properties are static and not supposed to be modified, constify them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index bcae0f03572be..c28954411af9d 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -82,7 +82,7 @@ struct lpss_device_desc {
 	const char *clk_con_id;
 	unsigned int prv_offset;
 	size_t prv_size_override;
-	struct property_entry *properties;
+	const struct property_entry *properties;
 	void (*setup)(struct lpss_private_data *pdata);
 	bool resume_from_noirq;
 };
-- 
GitLab


From 620c803f42defec5c43d3f46cadc101de9aa37ea Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 24 Feb 2022 01:56:22 +0200
Subject: [PATCH 0880/1586] ACPI: LPSS: Provide an SSP type to the driver

The SPI driver wants to know the exact type of the controller. Provide this
information to it, hence allow to fix Intel Wildcat Point case in the future.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index c28954411af9d..fbe0756259c5a 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -21,6 +21,7 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
+#include <linux/pxa2xx_ssp.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
 
@@ -219,10 +220,16 @@ static void bsw_pwm_setup(struct lpss_private_data *pdata)
 	pwm_add_table(bsw_pwm_lookup, ARRAY_SIZE(bsw_pwm_lookup));
 }
 
-static const struct lpss_device_desc lpt_dev_desc = {
+static const struct property_entry lpt_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_LPT_SSP),
+	{ }
+};
+
+static const struct lpss_device_desc lpt_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR
 			| LPSS_SAVE_CTX,
 	.prv_offset = 0x800,
+	.properties = lpt_spi_properties,
 };
 
 static const struct lpss_device_desc lpt_i2c_dev_desc = {
@@ -282,9 +289,15 @@ static const struct lpss_device_desc bsw_uart_dev_desc = {
 	.properties = uart_properties,
 };
 
+static const struct property_entry byt_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BYT_SSP),
+	{ }
+};
+
 static const struct lpss_device_desc byt_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX,
 	.prv_offset = 0x400,
+	.properties = byt_spi_properties,
 };
 
 static const struct lpss_device_desc byt_sdio_dev_desc = {
@@ -305,11 +318,17 @@ static const struct lpss_device_desc bsw_i2c_dev_desc = {
 	.resume_from_noirq = true,
 };
 
+static const struct property_entry bsw_spi_properties[] = {
+	PROPERTY_ENTRY_U32("intel,spi-pxa2xx-type", LPSS_BSW_SSP),
+	{ }
+};
+
 static const struct lpss_device_desc bsw_spi_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX
 			| LPSS_NO_D3_DELAY,
 	.prv_offset = 0x400,
 	.setup = lpss_deassert_reset,
+	.properties = bsw_spi_properties,
 };
 
 static const struct x86_cpu_id lpss_cpu_ids[] = {
@@ -329,8 +348,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "INTL9C60", LPSS_ADDR(lpss_dma_desc) },
 
 	/* Lynxpoint LPSS devices */
-	{ "INT33C0", LPSS_ADDR(lpt_dev_desc) },
-	{ "INT33C1", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT33C0", LPSS_ADDR(lpt_spi_dev_desc) },
+	{ "INT33C1", LPSS_ADDR(lpt_spi_dev_desc) },
 	{ "INT33C2", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT33C3", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT33C4", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -356,8 +375,8 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "808622C1", LPSS_ADDR(bsw_i2c_dev_desc) },
 
 	/* Broadwell LPSS devices */
-	{ "INT3430", LPSS_ADDR(lpt_dev_desc) },
-	{ "INT3431", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT3430", LPSS_ADDR(lpt_spi_dev_desc) },
+	{ "INT3431", LPSS_ADDR(lpt_spi_dev_desc) },
 	{ "INT3432", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT3433", LPSS_ADDR(lpt_i2c_dev_desc) },
 	{ "INT3434", LPSS_ADDR(lpt_uart_dev_desc) },
@@ -366,7 +385,7 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
 	{ "INT3437", },
 
 	/* Wildcat Point LPSS devices */
-	{ "INT3438", LPSS_ADDR(lpt_dev_desc) },
+	{ "INT3438", LPSS_ADDR(lpt_spi_dev_desc) },
 
 	{ }
 };
-- 
GitLab


From 10b19249192ae28ee9092ceca327a83d27d88bd0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 3 Oct 2021 15:11:24 +0300
Subject: [PATCH 0881/1586] ELF: fix overflow in total mapping size calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel assumes that ELF program headers are ordered by mapping address,
but doesn't enforce it. It is possible to make mapping size extremely huge
by simply shuffling first and last PT_LOAD segments.

As long as PT_LOAD segments do not overlap, it is silly to require
sorting by v_addr anyway because mmap() doesn't care.

Don't assume PT_LOAD segments are sorted and calculate min and max
addresses correctly.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: "Magnus Groß" <magnus.gross@rwth-aachen.de>
Link: https://lore.kernel.org/all/Yfqm7HbucDjPbES+@fractal.localdomain/
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/lkml/YVmd7D0M6G%2FDcP4O@localhost.localdomain
---
 fs/binfmt_elf.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c36b1ec357fb4..65ea5381c5c73 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -93,7 +93,7 @@ static int elf_core_dump(struct coredump_params *cprm);
 #define ELF_CORE_EFLAGS	0
 #endif
 
-#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
 
@@ -399,22 +399,21 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 	return(map_addr);
 }
 
-static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
+static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
 {
-	int i, first_idx = -1, last_idx = -1;
+	elf_addr_t min_addr = -1;
+	elf_addr_t max_addr = 0;
+	bool pt_load = false;
+	int i;
 
 	for (i = 0; i < nr; i++) {
-		if (cmds[i].p_type == PT_LOAD) {
-			last_idx = i;
-			if (first_idx == -1)
-				first_idx = i;
+		if (phdr[i].p_type == PT_LOAD) {
+			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
+			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
+			pt_load = true;
 		}
 	}
-	if (first_idx == -1)
-		return 0;
-
-	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
-				ELF_PAGESTART(cmds[first_idx].p_vaddr);
+	return pt_load ? (max_addr - min_addr) : 0;
 }
 
 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
-- 
GitLab


From 84158b7f6a0624b81800b4e7c90f7fb7fdecf66c Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 26 Jan 2022 03:57:39 +0100
Subject: [PATCH 0882/1586] coredump: Also dump first pages of non-executable
 ELF libraries

When I rewrote the VMA dumping logic for coredumps, I changed it to
recognize ELF library mappings based on the file being executable instead
of the mapping having an ELF header. But turns out, distros ship many ELF
libraries as non-executable, so the heuristic goes wrong...

Restore the old behavior where FILTER(ELF_HEADERS) dumps the first page of
any offset-0 readable mapping that starts with the ELF magic.

This fix is technically layer-breaking a bit, because it checks for
something ELF-specific in fs/coredump.c; but since we probably want to
share this between standard ELF and FDPIC ELF anyway, I guess it's fine?
And this also keeps the change small for backporting.

Cc: stable@vger.kernel.org
Fixes: 429a22e776a2 ("coredump: rework elf/elf_fdpic vma_dump_size() into common helper")
Reported-by: Bill Messmer <wmessmer@microsoft.com>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220126025739.2014888-1-jannh@google.com
---
 fs/coredump.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 1c060c0a2d72f..b73817712dd25 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
 #include <linux/path.h>
 #include <linux/timekeeping.h>
 #include <linux/sysctl.h>
+#include <linux/elf.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -980,6 +981,8 @@ static bool always_dump_vma(struct vm_area_struct *vma)
 	return false;
 }
 
+#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
+
 /*
  * Decide how much of @vma's contents should be included in a core dump.
  */
@@ -1039,9 +1042,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	 * dump the first page to aid in determining what was mapped here.
 	 */
 	if (FILTER(ELF_HEADERS) &&
-	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
-	    (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
-		return PAGE_SIZE;
+	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
+		if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
+			return PAGE_SIZE;
+
+		/*
+		 * ELF libraries aren't always executable.
+		 * We'll want to check whether the mapping starts with the ELF
+		 * magic, but not now - we're holding the mmap lock,
+		 * so copy_from_user() doesn't work here.
+		 * Use a placeholder instead, and fix it up later in
+		 * dump_vma_snapshot().
+		 */
+		return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
+	}
 
 #undef	FILTER
 
@@ -1116,8 +1130,6 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 		m->end = vma->vm_end;
 		m->flags = vma->vm_flags;
 		m->dump_size = vma_dump_size(vma, cprm->mm_flags);
-
-		vma_data_size += m->dump_size;
 	}
 
 	mmap_write_unlock(mm);
@@ -1127,6 +1139,23 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 		return -EFAULT;
 	}
 
+	for (i = 0; i < *vma_count; i++) {
+		struct core_vma_metadata *m = (*vma_meta) + i;
+
+		if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
+			char elfmag[SELFMAG];
+
+			if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
+					memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
+				m->dump_size = 0;
+			} else {
+				m->dump_size = PAGE_SIZE;
+			}
+		}
+
+		vma_data_size += m->dump_size;
+	}
+
 	*vma_data_size_ptr = vma_data_size;
 	return 0;
 }
-- 
GitLab


From dcd46d897adb70d63e025f175a00a89797d31a43 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 31 Jan 2022 16:09:47 -0800
Subject: [PATCH 0883/1586] exec: Force single empty string when argv is empty

Quoting[1] Ariadne Conill:

"In several other operating systems, it is a hard requirement that the
second argument to execve(2) be the name of a program, thus prohibiting
a scenario where argc < 1. POSIX 2017 also recommends this behaviour,
but it is not an explicit requirement[2]:

    The argument arg0 should point to a filename string that is
    associated with the process being started by one of the exec
    functions.
...
Interestingly, Michael Kerrisk opened an issue about this in 2008[3],
but there was no consensus to support fixing this issue then.
Hopefully now that CVE-2021-4034 shows practical exploitative use[4]
of this bug in a shellcode, we can reconsider.

This issue is being tracked in the KSPP issue tracker[5]."

While the initial code searches[6][7] turned up what appeared to be
mostly corner case tests, trying to that just reject argv == NULL
(or an immediately terminated pointer list) quickly started tripping[8]
existing userspace programs.

The next best approach is forcing a single empty string into argv and
adjusting argc to match. The number of programs depending on argc == 0
seems a smaller set than those calling execve with a NULL argv.

Account for the additional stack space in bprm_stack_limits(). Inject an
empty string when argc == 0 (and set argc = 1). Warn about the case so
userspace has some notice about the change:

    process './argc0' launched './argc0' with NULL argv: empty string added

Additionally WARN() and reject NULL argv usage for kernel threads.

[1] https://lore.kernel.org/lkml/20220127000724.15106-1-ariadne@dereferenced.org/
[2] https://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html
[3] https://bugzilla.kernel.org/show_bug.cgi?id=8408
[4] https://www.qualys.com/2022/01/25/cve-2021-4034/pwnkit.txt
[5] https://github.com/KSPP/linux/issues/176
[6] https://codesearch.debian.net/search?q=execve%5C+*%5C%28%5B%5E%2C%5D%2B%2C+*NULL&literal=0
[7] https://codesearch.debian.net/search?q=execlp%3F%5Cs*%5C%28%5B%5E%2C%5D%2B%2C%5Cs*NULL&literal=0
[8] https://lore.kernel.org/lkml/20220131144352.GE16385@xsang-OptiPlex-9020/

Reported-by: Ariadne Conill <ariadne@dereferenced.org>
Reported-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: Ariadne Conill <ariadne@dereferenced.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20220201000947.2453721-1-keescook@chromium.org
---
 fs/exec.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/exec.c b/fs/exec.c
index 79f2c9483302d..40b1008fb0f79 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -495,8 +495,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
 	 * the stack. They aren't stored until much later when we can't
 	 * signal to the parent that the child has run out of stack space.
 	 * Instead, calculate it here so it's possible to fail gracefully.
+	 *
+	 * In the case of argc = 0, make sure there is space for adding a
+	 * empty string (which will bump argc to 1), to ensure confused
+	 * userspace programs don't start processing from argv[1], thinking
+	 * argc can never be 0, to keep them from walking envp by accident.
+	 * See do_execveat_common().
 	 */
-	ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+	ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
 	if (limit <= ptr_size)
 		return -E2BIG;
 	limit -= ptr_size;
@@ -1897,6 +1903,9 @@ static int do_execveat_common(int fd, struct filename *filename,
 	}
 
 	retval = count(argv, MAX_ARG_STRINGS);
+	if (retval == 0)
+		pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
+			     current->comm, bprm->filename);
 	if (retval < 0)
 		goto out_free;
 	bprm->argc = retval;
@@ -1923,6 +1932,19 @@ static int do_execveat_common(int fd, struct filename *filename,
 	if (retval < 0)
 		goto out_free;
 
+	/*
+	 * When argv is empty, add an empty string ("") as argv[0] to
+	 * ensure confused userspace programs that start processing
+	 * from argv[1] won't end up walking envp. See also
+	 * bprm_stack_limits().
+	 */
+	if (bprm->argc == 0) {
+		retval = copy_string_kernel("", bprm);
+		if (retval < 0)
+			goto out_free;
+		bprm->argc = 1;
+	}
+
 	retval = bprm_execve(bprm, fd, filename, flags);
 out_free:
 	free_bprm(bprm);
@@ -1951,6 +1973,8 @@ int kernel_execve(const char *kernel_filename,
 	}
 
 	retval = count_strings_kernel(argv);
+	if (WARN_ON_ONCE(retval == 0))
+		retval = -EINVAL;
 	if (retval < 0)
 		goto out_free;
 	bprm->argc = retval;
-- 
GitLab


From 9132c3947b09a6c67372424ff69f867f2cee82f8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 31 Jan 2022 17:16:37 -0800
Subject: [PATCH 0884/1586] selftests/exec: Test for empty string on NULL argv

Test for the NULL argv argument producing a single empty string on exec.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/lkml/20220201011637.2457646-1-keescook@chromium.org
---
 tools/testing/selftests/exec/Makefile    |  1 +
 tools/testing/selftests/exec/null-argv.c | 78 ++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 tools/testing/selftests/exec/null-argv.c

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 12c5e27d32c16..551affb437fe1 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -10,6 +10,7 @@ TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 TEST_FILES := Makefile
 
 TEST_GEN_PROGS += recursion-depth
+TEST_GEN_PROGS += null-argv
 
 EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*	\
 	       $(OUTPUT)/S_I*.test
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
new file mode 100644
index 0000000000000..c19726e710d19
--- /dev/null
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test that empty argvs are swapped out for a single empty string. */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+#define FORK(exec)				\
+do {						\
+	pid = fork();				\
+	if (pid == 0) {				\
+		/* Child */			\
+		exec; /* Some kind of exec */	\
+		perror("# " #exec);		\
+		return 1;			\
+	}					\
+	check_result(pid, #exec);		\
+} while (0)
+
+void check_result(pid_t pid, const char *msg)
+{
+	int wstatus;
+
+	if (pid == (pid_t)-1) {
+		perror("# fork");
+		ksft_test_result_fail("fork failed: %s\n", msg);
+		return;
+	}
+	if (waitpid(pid, &wstatus, 0) < 0) {
+		perror("# waitpid");
+		ksft_test_result_fail("waitpid failed: %s\n", msg);
+		return;
+	}
+	if (!WIFEXITED(wstatus)) {
+		ksft_test_result_fail("child did not exit: %s\n", msg);
+		return;
+	}
+	if (WEXITSTATUS(wstatus) != 0) {
+		ksft_test_result_fail("non-zero exit: %s\n", msg);
+		return;
+	}
+	ksft_test_result_pass("%s\n", msg);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+	pid_t pid;
+	static char * const args[] = { NULL };
+	static char * const str[] = { "", NULL };
+
+	/* argc counting checks */
+	if (argc < 1) {
+		fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n");
+		return 1;
+	}
+	if (argc != 1) {
+		fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc);
+		return 1;
+	}
+	if (argv[0][0] == '\0') {
+		/* Good, we found a NULL terminated string at argv[0]! */
+		return 0;
+	}
+
+	/* Test runner. */
+	ksft_print_header();
+	ksft_set_plan(5);
+
+	FORK(execve(argv[0], str, NULL));
+	FORK(execve(argv[0], NULL, NULL));
+	FORK(execve(argv[0], NULL, envp));
+	FORK(execve(argv[0], args, NULL));
+	FORK(execve(argv[0], args, envp));
+
+	ksft_exit(ksft_cnt.ksft_pass == ksft_plan);
+}
-- 
GitLab


From d65bc29be0ae4ca2368df25dc6f6247aefb57f07 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 13 Feb 2022 22:25:20 +0300
Subject: [PATCH 0885/1586] binfmt: move more stuff undef CONFIG_COREDUMP

struct linux_binfmt::core_dump and struct min_coredump::min_coredump
are used under CONFIG_COREDUMP only. Shrink those embedded configs
a bit.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/YglbIFyN+OtwVyjW@localhost.localdomain
---
 fs/binfmt_elf.c         | 2 ++
 fs/binfmt_elf_fdpic.c   | 2 +-
 fs/binfmt_flat.c        | 2 ++
 include/linux/binfmts.h | 2 ++
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 65ea5381c5c73..d0c1703b9576a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -101,8 +101,10 @@ static struct linux_binfmt elf_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_elf_binary,
 	.load_shlib	= load_elf_library,
+#ifdef CONFIG_COREDUMP
 	.core_dump	= elf_core_dump,
 	.min_coredump	= ELF_EXEC_PAGESIZE,
+#endif
 };
 
 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c6f588dc4a9db..7fa6e6632d9df 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -83,8 +83,8 @@ static struct linux_binfmt elf_fdpic_format = {
 	.load_binary	= load_elf_fdpic_binary,
 #ifdef CONFIG_ELF_CORE
 	.core_dump	= elf_fdpic_core_dump,
-#endif
 	.min_coredump	= ELF_EXEC_PAGESIZE,
+#endif
 };
 
 static int __init init_elf_fdpic_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5d776f80ee50c..5f0bf24bb3b85 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -102,8 +102,10 @@ static int flat_core_dump(struct coredump_params *cprm);
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_flat_binary,
+#ifdef CONFIG_COREDUMP
 	.core_dump	= flat_core_dump,
 	.min_coredump	= PAGE_SIZE
+#endif
 };
 
 /****************************************************************************/
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 049cf9421d831..5d651c219c99b 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -98,8 +98,10 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *);
 	int (*load_shlib)(struct file *);
+#ifdef CONFIG_COREDUMP
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
+#endif
 } __randomize_layout;
 
 extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
-- 
GitLab


From 0da1d5002745cdc721bc018b582a8a9704d56c42 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Thu, 27 Jan 2022 21:40:16 +0900
Subject: [PATCH 0886/1586] fs/binfmt_elf: Fix AT_PHDR for unusual ELF files

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=197921

As pointed out in the discussion of buglink, we cannot calculate AT_PHDR
as the sum of load_addr and exec->e_phoff.

: The AT_PHDR of ELF auxiliary vectors should point to the memory address
: of program header. But binfmt_elf.c calculates this address as follows:
:
: NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
:
: which is wrong since e_phoff is the file offset of program header and
: load_addr is the memory base address from PT_LOAD entry.
:
: The ld.so uses AT_PHDR as the memory address of program header. In normal
: case, since the e_phoff is usually 64 and in the first PT_LOAD region, it
: is the correct program header address.
:
: But if the address of program header isn't equal to the first PT_LOAD
: address + e_phoff (e.g.  Put the program header in other non-consecutive
: PT_LOAD region), ld.so will try to read program header from wrong address
: then crash or use incorrect program header.

This is because exec->e_phoff
is the offset of PHDRs in the file and the address of PHDRs in the
memory may differ from it. This patch fixes the bug by calculating the
address of program headers from PT_LOADs directly.

Signed-off-by: Akira Kawata <akirakawata1@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220127124014.338760-2-akirakawata1@gmail.com
---
 fs/binfmt_elf.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d0c1703b9576a..600cec2173c89 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -172,8 +172,8 @@ static int padzero(unsigned long elf_bss)
 
 static int
 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
-		unsigned long load_addr, unsigned long interp_load_addr,
-		unsigned long e_entry)
+		unsigned long interp_load_addr,
+		unsigned long e_entry, unsigned long phdr_addr)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long p = bprm->p;
@@ -259,7 +259,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
-	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
+	NEW_AUX_ENT(AT_PHDR, phdr_addr);
 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
@@ -824,7 +824,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
 static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
- 	unsigned long load_addr = 0, load_bias = 0;
+	unsigned long load_addr, load_bias = 0, phdr_addr = 0;
 	int load_addr_set = 0;
 	unsigned long error;
 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
@@ -1181,6 +1181,17 @@ out_free_interp:
 				reloc_func_desc = load_bias;
 			}
 		}
+
+		/*
+		 * Figure out which segment in the file contains the Program
+		 * Header table, and map to the associated memory address.
+		 */
+		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
+		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
+			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
+				    elf_ppnt->p_vaddr;
+		}
+
 		k = elf_ppnt->p_vaddr;
 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
 			start_code = k;
@@ -1216,6 +1227,7 @@ out_free_interp:
 	}
 
 	e_entry = elf_ex->e_entry + load_bias;
+	phdr_addr += load_bias;
 	elf_bss += load_bias;
 	elf_brk += load_bias;
 	start_code += load_bias;
@@ -1279,8 +1291,8 @@ out_free_interp:
 		goto out;
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
-	retval = create_elf_tables(bprm, elf_ex,
-			  load_addr, interp_load_addr, e_entry);
+	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
+				   e_entry, phdr_addr);
 	if (retval < 0)
 		goto out;
 
-- 
GitLab


From 2b4bfbe0967697c9b5de83dbaf5b49db4b367ec0 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Thu, 27 Jan 2022 21:40:17 +0900
Subject: [PATCH 0887/1586] fs/binfmt_elf: Refactor load_elf_binary function

I delete load_addr because it is not used anymore. And I rename
load_addr_set to first_pt_load because it is used only to capture the
first iteration of the loop.

Signed-off-by: Akira Kawata <akirakawata1@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220127124014.338760-3-akirakawata1@gmail.com
---
 fs/binfmt_elf.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 600cec2173c89..4c5a2175f6053 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -824,8 +824,8 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
 static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
-	unsigned long load_addr, load_bias = 0, phdr_addr = 0;
-	int load_addr_set = 0;
+	unsigned long load_bias = 0, phdr_addr = 0;
+	int first_pt_load = 1;
 	unsigned long error;
 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 	struct elf_phdr *elf_property_phdata = NULL;
@@ -1075,12 +1075,12 @@ out_free_interp:
 
 		vaddr = elf_ppnt->p_vaddr;
 		/*
-		 * The first time through the loop, load_addr_set is false:
+		 * The first time through the loop, first_pt_load is true:
 		 * layout will be calculated. Once set, use MAP_FIXED since
 		 * we know we've already safely mapped the entire region with
 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
 		 */
-		if (load_addr_set) {
+		if (!first_pt_load) {
 			elf_flags |= MAP_FIXED;
 		} else if (elf_ex->e_type == ET_EXEC) {
 			/*
@@ -1171,13 +1171,11 @@ out_free_interp:
 			goto out_free_dentry;
 		}
 
-		if (!load_addr_set) {
-			load_addr_set = 1;
-			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
+		if (first_pt_load) {
+			first_pt_load = 0;
 			if (elf_ex->e_type == ET_DYN) {
 				load_bias += error -
 				             ELF_PAGESTART(load_bias + vaddr);
-				load_addr += load_bias;
 				reloc_func_desc = load_bias;
 			}
 		}
-- 
GitLab


From b452722e6ff39a1b8111d5b36b8562aaead1726e Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 11 Feb 2022 08:09:40 -0800
Subject: [PATCH 0888/1586] exec: cleanup comments

Remove the second 'from'.
Replace 'backwords' with 'backwards'.
Replace 'visibile' with 'visible'.

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220211160940.2516243-1-trix@redhat.com
---
 fs/exec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 40b1008fb0f79..8256e8bb9ad33 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,7 +118,7 @@ bool path_noexec(const struct path *path)
  * Note that a shared library must be both readable and executable due to
  * security reasons.
  *
- * Also note that we take the address to load from from the file itself.
+ * Also note that we take the address to load from the file itself.
  */
 SYSCALL_DEFINE1(uselib, const char __user *, library)
 {
@@ -542,7 +542,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
 		if (!valid_arg_len(bprm, len))
 			goto out;
 
-		/* We're going to work our way backwords. */
+		/* We're going to work our way backwards. */
 		pos = bprm->p;
 		str += len;
 		bprm->p -= len;
@@ -1275,7 +1275,7 @@ int begin_new_exec(struct linux_binprm * bprm)
 
 	/*
 	 * Must be called _before_ exec_mmap() as bprm->mm is
-	 * not visibile until then. This also enables the update
+	 * not visible until then. This also enables the update
 	 * to be lockless.
 	 */
 	retval = set_mm_exe_file(bprm->mm, bprm->file);
-- 
GitLab


From 7dc6ea7c56bd76ab88d09504a90879479e3851ef Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 23 Feb 2022 20:38:53 -0800
Subject: [PATCH 0889/1586] MAINTAINERS: Update execve entry with more details

The UAPI elf.h header was missed in the original MAINTAINER entry. Add
it. Include linux-mm mailing list since that's where execve has
traditionally been discussed. Note that this area is Supported, and aim
at the git tree.

Cc: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 MAINTAINERS | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 69a2935daf6c4..2a4de0619c497 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7225,6 +7225,9 @@ F:	net/core/of_net.c
 EXEC & BINFMT API
 R:	Eric Biederman <ebiederm@xmission.com>
 R:	Kees Cook <keescook@chromium.org>
+L:	linux-mm@kvack.org
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
 F:	arch/alpha/kernel/binfmt_loader.c
 F:	arch/x86/ia32/ia32_aout.c
 F:	fs/*binfmt_*.c
@@ -7232,6 +7235,7 @@ F:	fs/exec.c
 F:	include/linux/binfmts.h
 F:	include/linux/elf.h
 F:	include/uapi/linux/binfmts.h
+F:	include/uapi/linux/elf.h
 F:	tools/testing/selftests/exec/
 N:	asm/elf.h
 N:	binfmt
-- 
GitLab


From 4f0bfdfd8323e5b461fc1042143a1097dba9fced Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 23 Feb 2022 19:34:31 -0800
Subject: [PATCH 0890/1586] ELF: Properly redefine PT_GNU_* in terms of PT_LOOS

The PT_GNU_* program header types are actually offsets from PT_LOOS,
so redefine them as such, reorder them, and add the missing PT_GNU_RELRO.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/elf.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 61bf4774b8f2a..6438d55529bf8 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -35,10 +35,11 @@ typedef __s64	Elf64_Sxword;
 #define PT_HIOS    0x6fffffff      /* OS-specific */
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
-#define PT_GNU_EH_FRAME		0x6474e550
-#define PT_GNU_PROPERTY		0x6474e553
-
+#define PT_GNU_EH_FRAME	(PT_LOOS + 0x474e550)
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO	(PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY	(PT_LOOS + 0x474e553)
+
 
 /*
  * Extended Numbering
-- 
GitLab


From 0d3616bbd03cdfaa8a5fdf38e0fec2b1ef6ec0a0 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@wdc.com>
Date: Wed, 2 Mar 2022 13:15:52 +0000
Subject: [PATCH 0891/1586] irqchip/sifive-plic: Improve naming scheme for per
 context offsets

The PLIC supports a fixed number of contexts (15872).
Each context has fixed register offsets in PLIC.

The number of contexts that we need to initialize depends on the privilege
modes supported by each hart. Therefore, this mapping between PLIC context
registers to hart privilege modes is platform specific, and is currently
supplied via device tree.

For example, canaan,k210 has the following mapping:
Context0: hart0 M-mode
Context1: hart0 S-mode
Context2: hart1 M-mode
Context3: hart1 S-mode

While sifive,fu540 has the following mapping:
Context0: hart0 M-mode
Context1: hart1 M-mode
Context2: hart1 S-mode

Because the number of contexts per hart is not fixed, the names
ENABLE_PER_HART and CONTEXT_PER_HART for the register offsets are quite
confusing and might mislead the reader to think that these are fixed
register offsets per hart.

Rename the offsets to more clearly highlight that these are per PLIC
context and not per hart.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220302131544.3166154-2-Niklas.Cassel@wdc.com
---
 drivers/irqchip/irq-sifive-plic.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 259065d271ef0..28b86cd763671 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -44,8 +44,8 @@
  * Each hart context has a vector of interrupt enable bits associated with it.
  * There's one bit for each interrupt source.
  */
-#define ENABLE_BASE			0x2000
-#define     ENABLE_PER_HART		0x80
+#define CONTEXT_ENABLE_BASE		0x2000
+#define     CONTEXT_ENABLE_SIZE		0x80
 
 /*
  * Each hart context has a set of control registers associated with it.  Right
@@ -53,7 +53,7 @@
  * take an interrupt, and a register to claim interrupts.
  */
 #define CONTEXT_BASE			0x200000
-#define     CONTEXT_PER_HART		0x1000
+#define     CONTEXT_SIZE		0x1000
 #define     CONTEXT_THRESHOLD		0x00
 #define     CONTEXT_CLAIM		0x04
 
@@ -361,11 +361,11 @@ static int __init plic_init(struct device_node *node,
 
 		cpumask_set_cpu(cpu, &priv->lmask);
 		handler->present = true;
-		handler->hart_base =
-			priv->regs + CONTEXT_BASE + i * CONTEXT_PER_HART;
+		handler->hart_base = priv->regs + CONTEXT_BASE +
+			i * CONTEXT_SIZE;
 		raw_spin_lock_init(&handler->enable_lock);
-		handler->enable_base =
-			priv->regs + ENABLE_BASE + i * ENABLE_PER_HART;
+		handler->enable_base = priv->regs + CONTEXT_ENABLE_BASE +
+			i * CONTEXT_ENABLE_SIZE;
 		handler->priv = priv;
 done:
 		for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
-- 
GitLab


From 098fdbc3531f06aae2426b3a6f9bd730e7691258 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@wdc.com>
Date: Wed, 2 Mar 2022 13:15:53 +0000
Subject: [PATCH 0892/1586] irqchip/sifive-plic: Disable S-mode IRQs if running
 in M-mode

When detecting a context for a privilege mode different from the current
running privilege mode, we simply skip to the next context register.

This means that we never clear the S-mode enable bits when running in
M-mode.

On canaan k210, a bunch of S-mode interrupts are enabled by the bootrom.
These S-mode specific interrupts should never trigger, since we never set
the mie.SEIE bit in the parent interrupt controller (riscv-intc).

However, we will be able to see the mip.SEIE bit set as pending.

This isn't a good default when CONFIG_RISCV_M_MODE is set, since in that
case we will never enter a lower privilege mode (e.g. S-mode).

Let's clear the S-mode enable bits when running the kernel in M-mode, such
that we won't have a interrupt pending bit set, which we will never clear.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220302131544.3166154-3-Niklas.Cassel@wdc.com
---
 drivers/irqchip/irq-sifive-plic.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 28b86cd763671..7ae12e51dc170 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -81,17 +81,21 @@ static int plic_parent_irq __ro_after_init;
 static bool plic_cpuhp_setup_done __ro_after_init;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
-static inline void plic_toggle(struct plic_handler *handler,
-				int hwirq, int enable)
+static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable)
 {
-	u32 __iomem *reg = handler->enable_base + (hwirq / 32) * sizeof(u32);
+	u32 __iomem *reg = enable_base + (hwirq / 32) * sizeof(u32);
 	u32 hwirq_mask = 1 << (hwirq % 32);
 
-	raw_spin_lock(&handler->enable_lock);
 	if (enable)
 		writel(readl(reg) | hwirq_mask, reg);
 	else
 		writel(readl(reg) & ~hwirq_mask, reg);
+}
+
+static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
+{
+	raw_spin_lock(&handler->enable_lock);
+	__plic_toggle(handler->enable_base, hwirq, enable);
 	raw_spin_unlock(&handler->enable_lock);
 }
 
@@ -324,8 +328,18 @@ static int __init plic_init(struct device_node *node,
 		 * Skip contexts other than external interrupts for our
 		 * privilege level.
 		 */
-		if (parent.args[0] != RV_IRQ_EXT)
+		if (parent.args[0] != RV_IRQ_EXT) {
+			/* Disable S-mode enable bits if running in M-mode. */
+			if (IS_ENABLED(CONFIG_RISCV_M_MODE)) {
+				void __iomem *enable_base = priv->regs +
+					CONTEXT_ENABLE_BASE +
+					i * CONTEXT_ENABLE_SIZE;
+
+				for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
+					__plic_toggle(enable_base, hwirq, 0);
+			}
 			continue;
+		}
 
 		hartid = riscv_of_parent_hartid(parent.np);
 		if (hartid < 0) {
-- 
GitLab


From 75c3543e39f0c94644eac5965b3efe50c2c5c39d Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 1 Mar 2022 12:18:29 +0100
Subject: [PATCH 0893/1586] regulator: virtual: use dev_err_probe()

Use dev_err_probe() to avoid printing spurious warnings on
probe deferral.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220301111831.3742383-2-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/virtual.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 52c5a0e0acd80..50d2e9caaa719 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -295,12 +295,10 @@ static int regulator_virtual_probe(struct platform_device *pdev)
 	mutex_init(&drvdata->lock);
 
 	drvdata->regulator = devm_regulator_get(&pdev->dev, reg_id);
-	if (IS_ERR(drvdata->regulator)) {
-		ret = PTR_ERR(drvdata->regulator);
-		dev_err(&pdev->dev, "Failed to obtain supply '%s': %d\n",
-			reg_id, ret);
-		return ret;
-	}
+	if (IS_ERR(drvdata->regulator))
+		return dev_err_probe(&pdev->dev, PTR_ERR(drvdata->regulator),
+				     "Failed to obtain supply '%s'\n",
+				     reg_id);
 
 	ret = sysfs_create_group(&pdev->dev.kobj,
 				 &regulator_virtual_attr_group);
-- 
GitLab


From d2fb5487ecb2a28b61ff261ae18488afc98d24a6 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 1 Mar 2022 12:18:30 +0100
Subject: [PATCH 0894/1586] regulator: virtual: warn against production use

This driver is only meant for debugging and testing.  Currently, it's
not possible to use it without patching the kernel since it requires
platform data, but we'll be adding devicetree support, so add a loud
warning to make it clear that it's still only meant for debugging and
testing.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220301111831.3742383-3-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/virtual.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 50d2e9caaa719..9e0abbee1df56 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -285,8 +285,21 @@ static int regulator_virtual_probe(struct platform_device *pdev)
 {
 	char *reg_id = dev_get_platdata(&pdev->dev);
 	struct virtual_consumer_data *drvdata;
+	static bool warned;
 	int ret;
 
+	if (!warned) {
+		warned = true;
+		pr_warn("**********************************************************\n");
+		pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+		pr_warn("**                                                      **\n");
+		pr_warn("** regulator-virtual-consumer is only for testing and   **\n");
+		pr_warn("** debugging.  Do not use it in a production kernel.    **\n");
+		pr_warn("**                                                      **\n");
+		pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+		pr_warn("**********************************************************\n");
+	}
+
 	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct virtual_consumer_data),
 			       GFP_KERNEL);
 	if (drvdata == NULL)
-- 
GitLab


From 80c056656d46ffbece6125dee3f25adbc36d1486 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 1 Mar 2022 12:18:31 +0100
Subject: [PATCH 0895/1586] regulator: virtual: add devicetree support

The reg-virt-consumer is very useful for development and testing of
regulator drivers since it allows voltages and modes to be set from
userspace.  However, it currently requires platform data so it cannot be
used without patching the kernel.  Add support for probing it from the
devicetree to remedy this.

Since this driver is only meant for testing and is a purely software
construct, no binding documentation is added.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220301111831.3742383-4-vincent.whitchurch@axis.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/virtual.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 9e0abbee1df56..5d32628a5011d 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -13,6 +13,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 struct virtual_consumer_data {
 	struct mutex lock;
@@ -281,6 +282,14 @@ static const struct attribute_group regulator_virtual_attr_group = {
 	.attrs	= regulator_virtual_attributes,
 };
 
+#ifdef CONFIG_OF
+static const struct of_device_id regulator_virtual_consumer_of_match[] = {
+	{ .compatible = "regulator-virtual-consumer" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, regulator_virtual_consumer_of_match);
+#endif
+
 static int regulator_virtual_probe(struct platform_device *pdev)
 {
 	char *reg_id = dev_get_platdata(&pdev->dev);
@@ -305,6 +314,14 @@ static int regulator_virtual_probe(struct platform_device *pdev)
 	if (drvdata == NULL)
 		return -ENOMEM;
 
+	/*
+	 * This virtual consumer does not have any hardware-defined supply
+	 * name, so just allow the regulator to be specified in a property
+	 * named "default-supply" when we're being probed from devicetree.
+	 */
+	if (!reg_id && pdev->dev.of_node)
+		reg_id = "default";
+
 	mutex_init(&drvdata->lock);
 
 	drvdata->regulator = devm_regulator_get(&pdev->dev, reg_id);
@@ -345,6 +362,7 @@ static struct platform_driver regulator_virtual_consumer_driver = {
 	.remove		= regulator_virtual_remove,
 	.driver		= {
 		.name		= "reg-virt-consumer",
+		.of_match_table = of_match_ptr(regulator_virtual_consumer_of_match),
 	},
 };
 
-- 
GitLab


From ee8ad9440f18478796dbd7e0891efcc44376ab70 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 1 Mar 2022 13:42:43 +0000
Subject: [PATCH 0896/1586] spi: dt-bindings: renesas,rspi: Drop comment for
 generic compatible string

Drop comment "# RZ/A and RZ/G2{L,LC}" for generic compatible string
"renesas,rspi-rz" as this will avoid changing the line for every new SoC
addition.

Suggested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20220301134244.20174-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/renesas,rspi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
index a902f0ca21983..2c3c6bd6ec45b 100644
--- a/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
+++ b/Documentation/devicetree/bindings/spi/renesas,rspi.yaml
@@ -23,7 +23,7 @@ properties:
               - renesas,rspi-r7s9210   # RZ/A2
               - renesas,r9a07g044-rspi # RZ/G2{L,LC}
               - renesas,r9a07g054-rspi # RZ/V2L
-          - const: renesas,rspi-rz     # RZ/A and RZ/G2{L,LC}
+          - const: renesas,rspi-rz
 
       - items:
           - enum:
-- 
GitLab


From d149dd2a806b9d11e570c3731eca8bda3c5f6238 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 1 Mar 2022 15:23:57 -0600
Subject: [PATCH 0897/1586] spi: dt-bindings: mediatek: Set min size for
 'mediatek,pad-select'

The minimum array length defaults to the same size as the maximum. For
'mediatek,pad-select', the example has a length of 2 and in-tree .dts
files have a length of 1, but the schema says the length must be 4.

There's currently no warning in the example because the schema fixups
are not handling this case correctly.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220301212358.1887668-1-robh@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
index bfa44acb1bdd0..ac9ff12ae27a2 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -62,6 +62,7 @@ properties:
 
   mediatek,pad-select:
     $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
     maxItems: 4
     items:
       enum: [0, 1, 2, 3]
-- 
GitLab


From 13262fc26c1837c51a5131dbbdd67a2387f8bfc7 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Wed, 2 Mar 2022 17:20:51 +0800
Subject: [PATCH 0898/1586] spi: spi-zynqmp-gqspi: Handle error for
 dma_set_mask

As the potential failure of the dma_set_mask(),
it should be better to check it and return error
if fails.

Fixes: 126bdb606fd2 ("spi: spi-zynqmp-gqspi: return -ENOMEM if dma_map_single fails")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Link: https://lore.kernel.org/r/20220302092051.121343-1-jiasheng@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynqmp-gqspi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 328b6559bb19a..2b5afae8ff7fc 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -1172,7 +1172,10 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
 		goto clk_dis_all;
 	}
 
-	dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+	if (ret)
+		goto clk_dis_all;
+
 	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
 	ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS;
 	ctlr->mem_ops = &zynqmp_qspi_mem_ops;
-- 
GitLab


From cdbec3ede0b8cb318c36f5cc945b9360329cbd25 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 1 Mar 2022 12:39:20 -0500
Subject: [PATCH 0899/1586] selinux: shorten the policy capability enum names

The SELinux policy capability enum names are rather long and follow
the "POLICYDB_CAPABILITY_XXX format".  While the "POLICYDB_" prefix
is helpful in tying the enums to other SELinux policy constants,
macros, etc. there is no reason why we need to spell out
"CAPABILITY" completely.  Shorten "CAPABILITY" to "CAP" in order to
make things a bit shorter and cleaner.

Moving forward, the SELinux policy capability enum names should
follow the "POLICYDB_CAP_XXX" format.

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ima.c                     |  4 ++--
 security/selinux/include/policycap.h       | 22 +++++++++++-----------
 security/selinux/include/policycap_names.h |  2 +-
 security/selinux/include/security.h        | 18 +++++++++---------
 security/selinux/selinuxfs.c               |  2 +-
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/security/selinux/ima.c b/security/selinux/ima.c
index 727c4e43219d7..fded01981a393 100644
--- a/security/selinux/ima.c
+++ b/security/selinux/ima.c
@@ -29,7 +29,7 @@ static char *selinux_ima_collect_state(struct selinux_state *state)
 	buf_len = strlen("initialized=0;enforcing=0;checkreqprot=0;") + 1;
 
 	len = strlen(on);
-	for (i = 0; i < __POLICYDB_CAPABILITY_MAX; i++)
+	for (i = 0; i < __POLICYDB_CAP_MAX; i++)
 		buf_len += strlen(selinux_policycap_names[i]) + len;
 
 	buf = kzalloc(buf_len, GFP_KERNEL);
@@ -54,7 +54,7 @@ static char *selinux_ima_collect_state(struct selinux_state *state)
 	rc = strlcat(buf, checkreqprot_get(state) ? on : off, buf_len);
 	WARN_ON(rc >= buf_len);
 
-	for (i = 0; i < __POLICYDB_CAPABILITY_MAX; i++) {
+	for (i = 0; i < __POLICYDB_CAP_MAX; i++) {
 		rc = strlcat(buf, selinux_policycap_names[i], buf_len);
 		WARN_ON(rc >= buf_len);
 
diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h
index a9e572ca4fd96..2680aa21205ce 100644
--- a/security/selinux/include/policycap.h
+++ b/security/selinux/include/policycap.h
@@ -4,18 +4,18 @@
 
 /* Policy capabilities */
 enum {
-	POLICYDB_CAPABILITY_NETPEER,
-	POLICYDB_CAPABILITY_OPENPERM,
-	POLICYDB_CAPABILITY_EXTSOCKCLASS,
-	POLICYDB_CAPABILITY_ALWAYSNETWORK,
-	POLICYDB_CAPABILITY_CGROUPSECLABEL,
-	POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION,
-	POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS,
-	POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC,
-	__POLICYDB_CAPABILITY_MAX
+	POLICYDB_CAP_NETPEER,
+	POLICYDB_CAP_OPENPERM,
+	POLICYDB_CAP_EXTSOCKCLASS,
+	POLICYDB_CAP_ALWAYSNETWORK,
+	POLICYDB_CAP_CGROUPSECLABEL,
+	POLICYDB_CAP_NNP_NOSUID_TRANSITION,
+	POLICYDB_CAP_GENFS_SECLABEL_SYMLINKS,
+	POLICYDB_CAP_IOCTL_SKIP_CLOEXEC,
+	__POLICYDB_CAP_MAX
 };
-#define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
+#define POLICYDB_CAP_MAX (__POLICYDB_CAP_MAX - 1)
 
-extern const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX];
+extern const char *selinux_policycap_names[__POLICYDB_CAP_MAX];
 
 #endif /* _SELINUX_POLICYCAP_H_ */
diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h
index ebd64afe1defd..100da7d043dbf 100644
--- a/security/selinux/include/policycap_names.h
+++ b/security/selinux/include/policycap_names.h
@@ -5,7 +5,7 @@
 #include "policycap.h"
 
 /* Policy capability names */
-const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = {
+const char *selinux_policycap_names[__POLICYDB_CAP_MAX] = {
 	"network_peer_controls",
 	"open_perms",
 	"extended_socket_class",
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 84f42fa8012f1..ace4bd13e8084 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -96,7 +96,7 @@ struct selinux_state {
 #endif
 	bool checkreqprot;
 	bool initialized;
-	bool policycap[__POLICYDB_CAPABILITY_MAX];
+	bool policycap[__POLICYDB_CAP_MAX];
 
 	struct page *status_page;
 	struct mutex status_lock;
@@ -174,56 +174,56 @@ static inline bool selinux_policycap_netpeer(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NETPEER]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_NETPEER]);
 }
 
 static inline bool selinux_policycap_openperm(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_OPENPERM]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_OPENPERM]);
 }
 
 static inline bool selinux_policycap_extsockclass(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_EXTSOCKCLASS]);
 }
 
 static inline bool selinux_policycap_alwaysnetwork(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_ALWAYSNETWORK]);
 }
 
 static inline bool selinux_policycap_cgroupseclabel(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_CGROUPSECLABEL]);
 }
 
 static inline bool selinux_policycap_nnp_nosuid_transition(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_NNP_NOSUID_TRANSITION]);
 }
 
 static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_GENFS_SECLABEL_SYMLINKS]);
 }
 
 static inline bool selinux_policycap_ioctl_skip_cloexec(void)
 {
 	struct selinux_state *state = &selinux_state;
 
-	return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]);
+	return READ_ONCE(state->policycap[POLICYDB_CAP_IOCTL_SKIP_CLOEXEC]);
 }
 
 struct selinux_policy_convert_data;
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index f2f6203e0fff5..097c6d866ec4d 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1983,7 +1983,7 @@ static int sel_make_policycap(struct selinux_fs_info *fsi)
 	struct dentry *dentry = NULL;
 	struct inode *inode = NULL;
 
-	for (iter = 0; iter <= POLICYDB_CAPABILITY_MAX; iter++) {
+	for (iter = 0; iter <= POLICYDB_CAP_MAX; iter++) {
 		if (iter < ARRAY_SIZE(selinux_policycap_names))
 			dentry = d_alloc_name(fsi->policycap_dir,
 					      selinux_policycap_names[iter]);
-- 
GitLab


From ff4670ed6e8137642b0760aa198bc04188451b80 Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Wed, 2 Mar 2022 10:49:19 +0100
Subject: [PATCH 0900/1586] dt-bindings: trivial-devices: Add xdpe11280

Add Infineon Multi-phase Digital Controller XDPE11280.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/81c0523ba790965a371d267bb1944652d80f5f72.1646214248.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 1c49bbc654eed..da929cb08463c 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -137,6 +137,8 @@ properties:
           - infineon,slb9645tt
             # Infineon TLV493D-A1B6 I2C 3D Magnetic Sensor
           - infineon,tlv493d-a1b6
+            # Infineon Multi-phase Digital VR Controller xdpe11280
+          - infineon,xdpe11280
             # Infineon Multi-phase Digital VR Controller xdpe12254
           - infineon,xdpe12254
             # Infineon Multi-phase Digital VR Controller xdpe12284
-- 
GitLab


From efdab64d88ab693bdcadc23780e347cc4185d4be Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Wed, 2 Mar 2022 10:49:20 +0100
Subject: [PATCH 0901/1586] hwmon: (xdpe12284) Add support for xdpe11280

Add support for another Infineon Multi-phase controller chip. The
xdpe11280 uses linear instead of vid data format for VOUT. Detect
VOUT_MODE format during identification and skip the xdpe122 related
adaptions in case it is linear.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/fa6a4b636a05ecb337d132824efca2545188a2a2.1646214248.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/xdpe12284.rst | 12 ++++++++----
 drivers/hwmon/pmbus/xdpe12284.c   | 21 ++++++++++++++++++---
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/Documentation/hwmon/xdpe12284.rst b/Documentation/hwmon/xdpe12284.rst
index 67d1f87808e57..a224dc74ad356 100644
--- a/Documentation/hwmon/xdpe12284.rst
+++ b/Documentation/hwmon/xdpe12284.rst
@@ -5,6 +5,10 @@ Kernel driver xdpe122
 
 Supported chips:
 
+  * Infineon XDPE11280
+
+    Prefix: 'xdpe11280'
+
   * Infineon XDPE12254
 
     Prefix: 'xdpe12254'
@@ -20,10 +24,10 @@ Authors:
 Description
 -----------
 
-This driver implements support for Infineon Multi-phase XDPE122 family
-dual loop voltage regulators.
-The family includes XDPE12284 and XDPE12254 devices.
-The devices from this family complaint with:
+This driver implements support for Infineon Multi-phase XDPE112 and XDPE122
+family dual loop voltage regulators.
+These families include XDPE11280, XDPE12284 and XDPE12254 devices.
+The devices from this family compliant with:
 
 - Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
   converter specification.
diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c
index b07da06a40c9d..b979c88d2e870 100644
--- a/drivers/hwmon/pmbus/xdpe12284.c
+++ b/drivers/hwmon/pmbus/xdpe12284.c
@@ -76,7 +76,22 @@ static int xdpe122_identify(struct i2c_client *client,
 			    struct pmbus_driver_info *info)
 {
 	u8 vout_params;
-	int i, ret;
+	int i, ret, vout_mode;
+
+	vout_mode = pmbus_read_byte_data(client, 0, PMBUS_VOUT_MODE);
+	if (vout_mode >= 0 && vout_mode != 0xff) {
+		switch (vout_mode >> 5) {
+		case 0:
+			info->format[PSC_VOLTAGE_OUT] = linear;
+			return 0;
+		case 1:
+			info->format[PSC_VOLTAGE_OUT] = vid;
+			info->read_word_data = xdpe122_read_word_data;
+			break;
+		default:
+			return -ENODEV;
+		}
+	}
 
 	for (i = 0; i < XDPE122_PAGE_NUM; i++) {
 		/* Read the register with VOUT scaling value.*/
@@ -110,7 +125,6 @@ static int xdpe122_identify(struct i2c_client *client,
 static struct pmbus_driver_info xdpe122_info = {
 	.pages = XDPE122_PAGE_NUM,
 	.format[PSC_VOLTAGE_IN] = linear,
-	.format[PSC_VOLTAGE_OUT] = vid,
 	.format[PSC_TEMPERATURE] = linear,
 	.format[PSC_CURRENT_IN] = linear,
 	.format[PSC_CURRENT_OUT] = linear,
@@ -124,7 +138,6 @@ static struct pmbus_driver_info xdpe122_info = {
 		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
 		PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
 	.identify = xdpe122_identify,
-	.read_word_data = xdpe122_read_word_data,
 };
 
 static int xdpe122_probe(struct i2c_client *client)
@@ -140,6 +153,7 @@ static int xdpe122_probe(struct i2c_client *client)
 }
 
 static const struct i2c_device_id xdpe122_id[] = {
+	{"xdpe11280", 0},
 	{"xdpe12254", 0},
 	{"xdpe12284", 0},
 	{}
@@ -148,6 +162,7 @@ static const struct i2c_device_id xdpe122_id[] = {
 MODULE_DEVICE_TABLE(i2c, xdpe122_id);
 
 static const struct of_device_id __maybe_unused xdpe122_of_match[] = {
+	{.compatible = "infineon,xdpe11280"},
 	{.compatible = "infineon,xdpe12254"},
 	{.compatible = "infineon,xdpe12284"},
 	{}
-- 
GitLab


From f53bfe4d698430bd602c37042db4021dc2603b8d Mon Sep 17 00:00:00 2001
From: Marcello Sylvester Bauer <sylv@sylv.io>
Date: Wed, 2 Mar 2022 10:49:21 +0100
Subject: [PATCH 0902/1586] hwmon: (xdpe12284) Add regulator support

Add simple on/off regulator support for xdpe12284 and
other pmbus parts supported by the xdpe12284 driver.

Signed-off-by: Marcello Sylvester Bauer <sylv@sylv.io>
Link: https://lore.kernel.org/r/f69b8e7fa32cd2bad9516d8fa590abb87c7e4869.1646214248.git.sylv@sylv.io
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig     |  6 ++++++
 drivers/hwmon/pmbus/xdpe12284.c | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 6552467c588d1..a2ea1d5a87650 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -417,6 +417,12 @@ config SENSORS_XDPE122
 	  This driver can also be built as a module. If so, the module will
 	  be called xdpe12284.
 
+config SENSORS_XDPE122_REGULATOR
+	bool "Regulator support for XDPE122 and compatibles"
+	depends on SENSORS_XDPE122 && REGULATOR
+	help
+	  Uses the xdpe12284 or compatible as regulator.
+
 config SENSORS_ZL6100
 	tristate "Intersil ZL6100 and compatibles"
 	help
diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c
index b979c88d2e870..18fffc5d749bd 100644
--- a/drivers/hwmon/pmbus/xdpe12284.c
+++ b/drivers/hwmon/pmbus/xdpe12284.c
@@ -10,6 +10,8 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/regulator/driver.h>
+
 #include "pmbus.h"
 
 #define XDPE122_PROT_VR12_5MV		0x01 /* VR12.0 mode, 5-mV DAC */
@@ -122,6 +124,11 @@ static int xdpe122_identify(struct i2c_client *client,
 	return 0;
 }
 
+static const struct regulator_desc xdpe122_reg_desc[] = {
+	PMBUS_REGULATOR("vout", 0),
+	PMBUS_REGULATOR("vout", 1),
+};
+
 static struct pmbus_driver_info xdpe122_info = {
 	.pages = XDPE122_PAGE_NUM,
 	.format[PSC_VOLTAGE_IN] = linear,
@@ -138,6 +145,10 @@ static struct pmbus_driver_info xdpe122_info = {
 		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
 		PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
 	.identify = xdpe122_identify,
+#if IS_ENABLED(CONFIG_SENSORS_XDPE122_REGULATOR)
+	.num_regulators = 2,
+	.reg_desc = xdpe122_reg_desc,
+#endif
 };
 
 static int xdpe122_probe(struct i2c_client *client)
-- 
GitLab


From a3cd66d7cbadcc0c29884f25b754fd22699c719c Mon Sep 17 00:00:00 2001
From: Potin Lai <potin.lai@quantatw.com>
Date: Wed, 2 Mar 2022 20:38:16 +0800
Subject: [PATCH 0903/1586] hwmon: (adm1275) Allow setting sample averaging

Current driver assume PWR_AVG and VI_AVG as 1 by default, and user needs
to set sample averaging via sysfs manually.

This patch parses the properties "adi,power-sample-average" and
"adi,volt-curr-sample-average" from device tree, and setting sample
averaging during probe. Input value must be one of value in the
list [1, 2, 4, 8, 16, 32, 64, 128].

Signed-off-by: Potin Lai <potin.lai@quantatw.com>
Link: https://lore.kernel.org/r/20220302123817.27025-2-potin.lai@quantatw.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/adm1275.c | 40 ++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index d311e0557401c..3b07bfb43e937 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -475,6 +475,7 @@ static int adm1275_probe(struct i2c_client *client)
 	int vindex = -1, voindex = -1, cindex = -1, pindex = -1;
 	int tindex = -1;
 	u32 shunt;
+	u32 avg;
 
 	if (!i2c_check_functionality(client->adapter,
 				     I2C_FUNC_SMBUS_READ_BYTE_DATA
@@ -687,7 +688,7 @@ static int adm1275_probe(struct i2c_client *client)
 		if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) !=
 		    (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) {
 			config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN;
-			ret = i2c_smbus_write_byte_data(client,
+			ret = i2c_smbus_write_word_data(client,
 							ADM1275_PMON_CONFIG,
 							config);
 			if (ret < 0) {
@@ -756,6 +757,43 @@ static int adm1275_probe(struct i2c_client *client)
 		return -ENODEV;
 	}
 
+	if (data->have_power_sampling &&
+	    of_property_read_u32(client->dev.of_node,
+				 "adi,power-sample-average", &avg) == 0) {
+		if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
+		    BIT(__fls(avg)) != avg) {
+			dev_err(&client->dev,
+				"Invalid number of power samples");
+			return -EINVAL;
+		}
+		ret = adm1275_write_pmon_config(data, client, true,
+						ilog2(avg));
+		if (ret < 0) {
+			dev_err(&client->dev,
+				"Setting power sample averaging failed with error %d",
+				ret);
+			return ret;
+		}
+	}
+
+	if (of_property_read_u32(client->dev.of_node,
+				"adi,volt-curr-sample-average", &avg) == 0) {
+		if (!avg || avg > ADM1275_SAMPLES_AVG_MAX ||
+		    BIT(__fls(avg)) != avg) {
+			dev_err(&client->dev,
+				"Invalid number of voltage/current samples");
+			return -EINVAL;
+		}
+		ret = adm1275_write_pmon_config(data, client, false,
+						ilog2(avg));
+		if (ret < 0) {
+			dev_err(&client->dev,
+				"Setting voltage and current sample averaging failed with error %d",
+				ret);
+			return ret;
+		}
+	}
+
 	if (voindex < 0)
 		voindex = vindex;
 	if (vindex >= 0) {
-- 
GitLab


From a113870165b862440a31a8614fa6905a85033486 Mon Sep 17 00:00:00 2001
From: Potin Lai <potin.lai@quantatw.com>
Date: Wed, 2 Mar 2022 20:38:17 +0800
Subject: [PATCH 0904/1586] dt-bindings: hwmon: Add sample averaging properties
 for ADM1275

Add documentation of new properties for sample averaging in PMON_CONFIG
register.

New properties:
- adi,volt-curr-sample-average
- adi,power-sample-average

Signed-off-by: Potin Lai <potin.lai@quantatw.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220302123817.27025-3-potin.lai@quantatw.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 .../bindings/hwmon/adi,adm1275.yaml           | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml b/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
index 223393d7cafd8..ab87f51c5aefe 100644
--- a/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
+++ b/Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml
@@ -37,6 +37,72 @@ properties:
     description:
       Shunt resistor value in micro-Ohm.
 
+  adi,volt-curr-sample-average:
+    description: |
+      Number of samples to be used to report voltage and current values.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [1, 2, 4, 8, 16, 32, 64, 128]
+
+  adi,power-sample-average:
+    description: |
+      Number of samples to be used to report power values.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [1, 2, 4, 8, 16, 32, 64, 128]
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,adm1075
+              - adi,adm1276
+    then:
+      properties:
+        adi,volt-curr-sample-average:
+          default: 128
+        adi,power-sample-average: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,adm1275
+    then:
+      properties:
+        adi,volt-curr-sample-average:
+          default: 16
+        adi,power-sample-average: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,adm1272
+    then:
+      properties:
+        adi,volt-curr-sample-average:
+          default: 128
+        adi,power-sample-average:
+          default: 128
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,adm1278
+              - adi,adm1293
+              - adi,adm1294
+    then:
+      properties:
+        adi,volt-curr-sample-average:
+          default: 128
+        adi,power-sample-average:
+          default: 1
+
 required:
   - compatible
   - reg
@@ -53,5 +119,7 @@ examples:
             compatible = "adi,adm1272";
             reg = <0x10>;
             shunt-resistor-micro-ohms = <500>;
+            adi,volt-curr-sample-average = <128>;
+            adi,power-sample-average = <128>;
         };
     };
-- 
GitLab


From 3c36fe9302d153283a0cea9e4e25a9e2143ac232 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Feb 2022 20:51:42 +0100
Subject: [PATCH 0905/1586] ACPI: bus: Introduce acpi_bus_for_each_dev()

In order to avoid exposing acpi_bus_type to modules, introduce an
acpi_bus_for_each_dev() helper for iterating over all ACPI device
objects and make typec_link_ports() use it instead of the raw
bus_for_each_dev() along with acpi_bus_type.

Having done that, drop the acpi_bus_type export.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
---
 drivers/acpi/bus.c              | 7 ++++++-
 drivers/usb/typec/port-mapper.c | 2 +-
 include/acpi/acpi_bus.h         | 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6b..647791d34c19f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1043,7 +1043,12 @@ struct bus_type acpi_bus_type = {
 	.remove		= acpi_device_remove,
 	.uevent		= acpi_device_uevent,
 };
-EXPORT_SYMBOL_GPL(acpi_bus_type);
+
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data)
+{
+	return bus_for_each_dev(&acpi_bus_type, NULL, data, fn);
+}
+EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);
 
 /* --------------------------------------------------------------------------
                              Initialization/Cleanup
diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c
index a7d507802509f..a929e000d0e2c 100644
--- a/drivers/usb/typec/port-mapper.c
+++ b/drivers/usb/typec/port-mapper.c
@@ -59,7 +59,7 @@ int typec_link_ports(struct typec_port *con)
 	if (!has_acpi_companion(&con->dev))
 		return 0;
 
-	bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match);
+	acpi_bus_for_each_dev(typec_port_match, &arg);
 	if (!arg.match)
 		return 0;
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ca88c4706f2bb..3f7f01f038690 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -480,6 +480,8 @@ void acpi_initialize_hp_context(struct acpi_device *adev,
 /* acpi_device.dev.bus == &acpi_bus_type */
 extern struct bus_type acpi_bus_type;
 
+int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
+
 /*
  * Events
  * ------
-- 
GitLab


From 1b9855de1ef4a3f82119f9d8b054a588c7c315fb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 Feb 2022 19:14:57 +0100
Subject: [PATCH 0906/1586] misc: hi6421-spmi-pmic: Use
 generic_handle_irq_safe().

generic_handle_irq() is invoked from a regular interrupt service
routine. This handler will become a forced-threaded handler on
PREEMPT_RT and will be invoked with enabled interrupts. The
generic_handle_irq() must be invoked with disabled interrupts in order
to avoid deadlocks.

Instead of manually disabling interrupts before invoking use
generic_handle_irq_safe() which can be invoked with enabled and disabled
interrupts.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220211181500.1856198-5-bigeasy@linutronix.de
---
 drivers/misc/hi6421v600-irq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 1c763796cf1fa..caa3de37698b0 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
 			 * If both powerkey down and up IRQs are received,
 			 * handle them at the right order
 			 */
-			generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
-			generic_handle_irq(priv->irqs[POWERKEY_UP]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
 			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
 		}
 
@@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
 			continue;
 
 		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
-			generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
 		}
 	}
 
-- 
GitLab


From 83d9b7e3955d32d3e4e3321029f655716cb21c99 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 Feb 2022 19:14:58 +0100
Subject: [PATCH 0907/1586] mfd: ezx-pcap: Use generic_handle_irq_safe().

Instead of manually disabling interrupts before invoking use
generic_handle_irq_safe() which can be invoked with enabled and disabled
interrupts.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220211181500.1856198-6-bigeasy@linutronix.de
---
 drivers/mfd/ezx-pcap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 70fa18b04ad2b..b14d3f98e1ebd 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
 		ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
 		ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
 
-		local_irq_disable();
 		service = isr & ~msr;
 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
 			if (service & 1)
-				generic_handle_irq(irq);
+				generic_handle_irq_safe(irq);
 		}
-		local_irq_enable();
 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
 	} while (gpio_get_value(pdata->gpio));
 }
-- 
GitLab


From bfe6b967948c251955bcf175cb2d4e8d169102ca Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 Feb 2022 19:14:59 +0100
Subject: [PATCH 0908/1586] net: usb: lan78xx: Use generic_handle_irq_safe().

Instead of manually disabling interrupts before invoking use
generic_handle_irq_safe() which can be invoked with enabled and disabled
interrupts.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Woojung Huh <woojung.huh@microchip.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: UNGLinuxDriver@microchip.com
Link: https://lore.kernel.org/r/20220211181500.1856198-7-bigeasy@linutronix.de
---
 drivers/net/usb/lan78xx.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index b8e20a3f2b84e..415f16662f88e 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1537,11 +1537,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
 		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
 		lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
 
-		if (dev->domain_data.phyirq > 0) {
-			local_irq_disable();
-			generic_handle_irq(dev->domain_data.phyirq);
-			local_irq_enable();
-		}
+		if (dev->domain_data.phyirq > 0)
+			generic_handle_irq_safe(dev->domain_data.phyirq);
 	} else {
 		netdev_warn(dev->net,
 			    "unexpected interrupt: 0x%08x\n", intdata);
-- 
GitLab


From ff8dcfebe08dfb2524041116d4afce53ffe0b015 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 11 Feb 2022 19:15:00 +0100
Subject: [PATCH 0909/1586] staging: greybus: gpio: Use
 generic_handle_irq_safe().

Instead of manually disabling interrupts before invoking use
generic_handle_irq_safe() which can be invoked with enabled and disabled
interrupts.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Johan Hovold <johan@kernel.org>
Cc: Alex Elder <elder@kernel.org>
Cc: Rui Miguel Silva <rmfrfs@gmail.com>
Link: https://lore.kernel.org/r/20220211181500.1856198-8-bigeasy@linutronix.de
---
 drivers/staging/greybus/gpio.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
index 7e6347fe93f99..8a7cf1d0e9688 100644
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
 		return -EINVAL;
 	}
 
-	local_irq_disable();
-	ret = generic_handle_irq(irq);
-	local_irq_enable();
-
+	ret = generic_handle_irq_safe(irq);
 	if (ret)
 		dev_err(dev, "failed to invoke irq handler\n");
 
-- 
GitLab


From 0934683dd1c7d307cac29902ee5e0ab50aa279f9 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:22 +0200
Subject: [PATCH 0910/1586] hwrng: atmel - add wait for ready support on read

Add wait for ready support on read.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 43 +++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index ecb71c4317a50..1a4874668c04c 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/hw_random.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -20,6 +21,7 @@
 #define TRNG_CR		0x00
 #define TRNG_MR		0x04
 #define TRNG_ISR	0x1c
+#define TRNG_ISR_DATRDY	BIT(0)
 #define TRNG_ODATA	0x50
 
 #define TRNG_KEY	0x524e4700 /* RNG */
@@ -36,25 +38,40 @@ struct atmel_trng {
 	struct hwrng rng;
 };
 
+static bool atmel_trng_wait_ready(struct atmel_trng *trng, bool wait)
+{
+	int ready;
+
+	ready = readl(trng->base + TRNG_ISR) & TRNG_ISR_DATRDY;
+	if (!ready && wait)
+		readl_poll_timeout(trng->base + TRNG_ISR, ready,
+				   ready & TRNG_ISR_DATRDY, 1000, 20000);
+
+	return !!ready;
+}
+
 static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
 			   bool wait)
 {
 	struct atmel_trng *trng = container_of(rng, struct atmel_trng, rng);
 	u32 *data = buf;
+	int ret;
+
+	ret = atmel_trng_wait_ready(trng, wait);
+	if (!ret)
+		goto out;
 
-	/* data ready? */
-	if (readl(trng->base + TRNG_ISR) & 1) {
-		*data = readl(trng->base + TRNG_ODATA);
-		/*
-		  ensure data ready is only set again AFTER the next data
-		  word is ready in case it got set between checking ISR
-		  and reading ODATA, so we don't risk re-reading the
-		  same word
-		*/
-		readl(trng->base + TRNG_ISR);
-		return 4;
-	} else
-		return 0;
+	*data = readl(trng->base + TRNG_ODATA);
+	/*
+	 * ensure data ready is only set again AFTER the next data word is ready
+	 * in case it got set between checking ISR and reading ODATA, so we
+	 * don't risk re-reading the same word
+	 */
+	readl(trng->base + TRNG_ISR);
+	ret = 4;
+
+out:
+	return ret;
 }
 
 static void atmel_trng_enable(struct atmel_trng *trng)
-- 
GitLab


From a223ea9f89ab960eb254ba78429efd42eaf845eb Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:23 +0200
Subject: [PATCH 0911/1586] hwrng: atmel - disable trng on failure path

Call atmel_trng_disable() on failure path of probe.

Fixes: a1fa98d8116f ("hwrng: atmel - disable TRNG during suspend")
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 1a4874668c04c..b7ef951927fb6 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -131,6 +131,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
 
 err_register:
 	clk_disable_unprepare(trng->clk);
+	atmel_trng_disable(trng);
 	return ret;
 }
 
-- 
GitLab


From f14b02088fb8b6633bc32b052e0d66308f191cc8 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:24 +0200
Subject: [PATCH 0912/1586] hwrng: atmel - rename enable/disable functions to
 init/cleanup

s/atmel_trng_disable/atmel_trng_cleanup/g and
s/atmel_trng_enable/atmel_trng_init/g to cope with
struct hwrng::{init, cleanup} members.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index b7ef951927fb6..17f02049c112e 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -74,12 +74,12 @@ out:
 	return ret;
 }
 
-static void atmel_trng_enable(struct atmel_trng *trng)
+static void atmel_trng_init(struct atmel_trng *trng)
 {
 	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
 }
 
-static void atmel_trng_disable(struct atmel_trng *trng)
+static void atmel_trng_cleanup(struct atmel_trng *trng)
 {
 	writel(TRNG_KEY, trng->base + TRNG_CR);
 }
@@ -117,7 +117,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	atmel_trng_enable(trng);
+	atmel_trng_init(trng);
 	trng->rng.name = pdev->name;
 	trng->rng.read = atmel_trng_read;
 
@@ -131,7 +131,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
 
 err_register:
 	clk_disable_unprepare(trng->clk);
-	atmel_trng_disable(trng);
+	atmel_trng_cleanup(trng);
 	return ret;
 }
 
@@ -140,7 +140,7 @@ static int atmel_trng_remove(struct platform_device *pdev)
 	struct atmel_trng *trng = platform_get_drvdata(pdev);
 
 
-	atmel_trng_disable(trng);
+	atmel_trng_cleanup(trng);
 	clk_disable_unprepare(trng->clk);
 
 	return 0;
@@ -151,7 +151,7 @@ static int atmel_trng_suspend(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
-	atmel_trng_disable(trng);
+	atmel_trng_cleanup(trng);
 	clk_disable_unprepare(trng->clk);
 
 	return 0;
@@ -166,7 +166,7 @@ static int atmel_trng_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	atmel_trng_enable(trng);
+	atmel_trng_init(trng);
 
 	return 0;
 }
-- 
GitLab


From 9fbd8b306fcb8d1f1294972f26a3e4d57134c4ad Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:25 +0200
Subject: [PATCH 0913/1586] hwrng: atmel - move set of TRNG_HALFR in
 atmel_trng_init()

Move set of TRNG_HALFR in atmel_trng_init() as this function is
also called on resume path. In case of SAMA7G5 where backup and
self-refresh PM mode is available most of the SoC parts are
powered of (including TRNG) when entering suspend. In that case
on resuming path TRNG_HALFR should also be re-configured.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 49 +++++++++++++++---------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 17f02049c112e..ef49dbe681cf3 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -36,6 +36,7 @@ struct atmel_trng {
 	struct clk *clk;
 	void __iomem *base;
 	struct hwrng rng;
+	bool has_half_rate;
 };
 
 static bool atmel_trng_wait_ready(struct atmel_trng *trng, bool wait)
@@ -74,14 +75,32 @@ out:
 	return ret;
 }
 
-static void atmel_trng_init(struct atmel_trng *trng)
+static int atmel_trng_init(struct atmel_trng *trng)
 {
+	unsigned long rate;
+	int ret;
+
+	ret = clk_prepare_enable(trng->clk);
+	if (ret)
+		return ret;
+
+	if (trng->has_half_rate) {
+		rate = clk_get_rate(trng->clk);
+
+		/* if peripheral clk is above 100MHz, set HALFR */
+		if (rate > 100000000)
+			writel(TRNG_HALFR, trng->base + TRNG_MR);
+	}
+
 	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
+
+	return 0;
 }
 
 static void atmel_trng_cleanup(struct atmel_trng *trng)
 {
 	writel(TRNG_KEY, trng->base + TRNG_CR);
+	clk_disable_unprepare(trng->clk);
 }
 
 static int atmel_trng_probe(struct platform_device *pdev)
@@ -105,22 +124,14 @@ static int atmel_trng_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	if (data->has_half_rate) {
-		unsigned long rate = clk_get_rate(trng->clk);
-
-		/* if peripheral clk is above 100MHz, set HALFR */
-		if (rate > 100000000)
-			writel(TRNG_HALFR, trng->base + TRNG_MR);
-	}
+	trng->has_half_rate = data->has_half_rate;
+	trng->rng.name = pdev->name;
+	trng->rng.read = atmel_trng_read;
 
-	ret = clk_prepare_enable(trng->clk);
+	ret = atmel_trng_init(trng);
 	if (ret)
 		return ret;
 
-	atmel_trng_init(trng);
-	trng->rng.name = pdev->name;
-	trng->rng.read = atmel_trng_read;
-
 	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
 	if (ret)
 		goto err_register;
@@ -130,7 +141,6 @@ static int atmel_trng_probe(struct platform_device *pdev)
 	return 0;
 
 err_register:
-	clk_disable_unprepare(trng->clk);
 	atmel_trng_cleanup(trng);
 	return ret;
 }
@@ -141,7 +151,6 @@ static int atmel_trng_remove(struct platform_device *pdev)
 
 
 	atmel_trng_cleanup(trng);
-	clk_disable_unprepare(trng->clk);
 
 	return 0;
 }
@@ -152,7 +161,6 @@ static int atmel_trng_suspend(struct device *dev)
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
 	atmel_trng_cleanup(trng);
-	clk_disable_unprepare(trng->clk);
 
 	return 0;
 }
@@ -160,15 +168,8 @@ static int atmel_trng_suspend(struct device *dev)
 static int atmel_trng_resume(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
-	int ret;
 
-	ret = clk_prepare_enable(trng->clk);
-	if (ret)
-		return ret;
-
-	atmel_trng_init(trng);
-
-	return 0;
+	return atmel_trng_init(trng);
 }
 
 static const struct dev_pm_ops atmel_trng_pm_ops = {
-- 
GitLab


From b953188525973314e32586e45bbd3d4768891c27 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:26 +0200
Subject: [PATCH 0914/1586] hwrng: atmel - use __maybe_unused and pm_ptr() for
 pm ops

Use __maybe_unused and pm_ptr() for pm ops.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index ef49dbe681cf3..0fff748084727 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -155,8 +155,7 @@ static int atmel_trng_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int atmel_trng_suspend(struct device *dev)
+static int __maybe_unused atmel_trng_suspend(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
@@ -165,18 +164,17 @@ static int atmel_trng_suspend(struct device *dev)
 	return 0;
 }
 
-static int atmel_trng_resume(struct device *dev)
+static int __maybe_unused atmel_trng_resume(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
 	return atmel_trng_init(trng);
 }
 
-static const struct dev_pm_ops atmel_trng_pm_ops = {
+static const struct dev_pm_ops __maybe_unused atmel_trng_pm_ops = {
 	.suspend	= atmel_trng_suspend,
 	.resume		= atmel_trng_resume,
 };
-#endif /* CONFIG_PM */
 
 static const struct atmel_trng_data at91sam9g45_config = {
 	.has_half_rate = false,
@@ -204,9 +202,7 @@ static struct platform_driver atmel_trng_driver = {
 	.remove		= atmel_trng_remove,
 	.driver		= {
 		.name	= "atmel-trng",
-#ifdef CONFIG_PM
-		.pm	= &atmel_trng_pm_ops,
-#endif /* CONFIG_PM */
+		.pm	= pm_ptr(&atmel_trng_pm_ops),
 		.of_match_table = atmel_trng_dt_ids,
 	},
 };
-- 
GitLab


From c4f51eab6ce0b7138f375673dbb2789e49b6d028 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:27 +0200
Subject: [PATCH 0915/1586] hwrng: atmel - add runtime pm support

Add runtime PM support.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 44 ++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 0fff748084727..b662d44a09a62 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -17,6 +17,7 @@
 #include <linux/hw_random.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #define TRNG_CR		0x00
 #define TRNG_MR		0x04
@@ -58,6 +59,12 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
 	u32 *data = buf;
 	int ret;
 
+	ret = pm_runtime_get_sync((struct device *)trng->rng.priv);
+	if (ret < 0) {
+		pm_runtime_put_sync((struct device *)trng->rng.priv);
+		return ret;
+	}
+
 	ret = atmel_trng_wait_ready(trng, wait);
 	if (!ret)
 		goto out;
@@ -72,6 +79,8 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
 	ret = 4;
 
 out:
+	pm_runtime_mark_last_busy((struct device *)trng->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *)trng->rng.priv);
 	return ret;
 }
 
@@ -127,21 +136,28 @@ static int atmel_trng_probe(struct platform_device *pdev)
 	trng->has_half_rate = data->has_half_rate;
 	trng->rng.name = pdev->name;
 	trng->rng.read = atmel_trng_read;
+	trng->rng.priv = (unsigned long)&pdev->dev;
+	platform_set_drvdata(pdev, trng);
 
+#ifndef CONFIG_PM
 	ret = atmel_trng_init(trng);
 	if (ret)
 		return ret;
+#endif
 
-	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
-	if (ret)
-		goto err_register;
-
-	platform_set_drvdata(pdev, trng);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 100);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 
-	return 0;
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
+	if (ret) {
+		pm_runtime_disable(&pdev->dev);
+		pm_runtime_set_suspended(&pdev->dev);
+#ifndef CONFIG_PM
+		atmel_trng_cleanup(trng);
+#endif
+	}
 
-err_register:
-	atmel_trng_cleanup(trng);
 	return ret;
 }
 
@@ -151,11 +167,13 @@ static int atmel_trng_remove(struct platform_device *pdev)
 
 
 	atmel_trng_cleanup(trng);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
 
 	return 0;
 }
 
-static int __maybe_unused atmel_trng_suspend(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_suspend(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
@@ -164,7 +182,7 @@ static int __maybe_unused atmel_trng_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused atmel_trng_resume(struct device *dev)
+static int __maybe_unused atmel_trng_runtime_resume(struct device *dev)
 {
 	struct atmel_trng *trng = dev_get_drvdata(dev);
 
@@ -172,8 +190,10 @@ static int __maybe_unused atmel_trng_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops __maybe_unused atmel_trng_pm_ops = {
-	.suspend	= atmel_trng_suspend,
-	.resume		= atmel_trng_resume,
+	SET_RUNTIME_PM_OPS(atmel_trng_runtime_suspend,
+			   atmel_trng_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 };
 
 static const struct atmel_trng_data at91sam9g45_config = {
-- 
GitLab


From 53e748c2758ca24854b6ce89fb08a93a36772dc0 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Mon, 21 Feb 2022 09:59:28 +0200
Subject: [PATCH 0916/1586] hwrng: atmel - remove extra line

Remove extra line.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index b662d44a09a62..b8effe77d80ff 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -165,7 +165,6 @@ static int atmel_trng_remove(struct platform_device *pdev)
 {
 	struct atmel_trng *trng = platform_get_drvdata(pdev);
 
-
 	atmel_trng_cleanup(trng);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
-- 
GitLab


From 2f5ee72ee950a677069ee8ab75af40cebba3bb3c Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 11:52:34 +0000
Subject: [PATCH 0917/1586] crypto: cavium/zip - register algorithm only if
 hardware is present

On my renesas salavator-X, I saw some cavium driver failing crypto
self-tests.
But salvator does not have such hardware.
This is due to cavium/zip driver registering algorithms even if hardware
is not present.
The solution is to move algorithm registration in the probe function.
This permits also to simplify module init/exit by using
module_pci_driver.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/zip/zip_main.c | 83 ++++++++++++----------------
 1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
index 812b4ac9afd6a..dc5b7bf7e1fd9 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -55,6 +55,11 @@ static const struct pci_device_id zip_id_table[] = {
 	{ 0, }
 };
 
+static void zip_debugfs_init(void);
+static void zip_debugfs_exit(void);
+static int zip_register_compression_device(void);
+static void zip_unregister_compression_device(void);
+
 void zip_reg_write(u64 val, u64 __iomem *addr)
 {
 	writeq(val, addr);
@@ -235,6 +240,15 @@ static int zip_init_hw(struct zip_device *zip)
 	return 0;
 }
 
+static void zip_reset(struct zip_device *zip)
+{
+	union zip_cmd_ctl cmd_ctl;
+
+	cmd_ctl.u_reg64 = 0x0ull;
+	cmd_ctl.s.reset = 1;  /* Forces ZIP cores to do reset */
+	zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+}
+
 static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
@@ -282,8 +296,21 @@ static int zip_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_release_regions;
 
+	/* Register with the Kernel Crypto Interface */
+	err = zip_register_compression_device();
+	if (err < 0) {
+		zip_err("ZIP: Kernel Crypto Registration failed\n");
+		goto err_register;
+	}
+
+	/* comp-decomp statistics are handled with debugfs interface */
+	zip_debugfs_init();
+
 	return 0;
 
+err_register:
+	zip_reset(zip);
+
 err_release_regions:
 	if (zip->reg_base)
 		iounmap(zip->reg_base);
@@ -305,16 +332,17 @@ err_free_device:
 static void zip_remove(struct pci_dev *pdev)
 {
 	struct zip_device *zip = pci_get_drvdata(pdev);
-	union zip_cmd_ctl cmd_ctl;
 	int q = 0;
 
 	if (!zip)
 		return;
 
+	zip_debugfs_exit();
+
+	zip_unregister_compression_device();
+
 	if (zip->reg_base) {
-		cmd_ctl.u_reg64 = 0x0ull;
-		cmd_ctl.s.reset = 1;  /* Forces ZIP cores to do reset */
-		zip_reg_write(cmd_ctl.u_reg64, (zip->reg_base + ZIP_CMD_CTL));
+		zip_reset(zip);
 		iounmap(zip->reg_base);
 	}
 
@@ -585,7 +613,7 @@ DEFINE_SHOW_ATTRIBUTE(zip_regs);
 /* Root directory for thunderx_zip debugfs entry */
 static struct dentry *zip_debugfs_root;
 
-static void __init zip_debugfs_init(void)
+static void zip_debugfs_init(void)
 {
 	if (!debugfs_initialized())
 		return;
@@ -604,7 +632,7 @@ static void __init zip_debugfs_init(void)
 
 }
 
-static void __exit zip_debugfs_exit(void)
+static void zip_debugfs_exit(void)
 {
 	debugfs_remove_recursive(zip_debugfs_root);
 }
@@ -615,48 +643,7 @@ static void __exit zip_debugfs_exit(void) { }
 #endif
 /* debugfs - end */
 
-static int __init zip_init_module(void)
-{
-	int ret;
-
-	zip_msg("%s\n", DRV_NAME);
-
-	ret = pci_register_driver(&zip_driver);
-	if (ret < 0) {
-		zip_err("ZIP: pci_register_driver() failed\n");
-		return ret;
-	}
-
-	/* Register with the Kernel Crypto Interface */
-	ret = zip_register_compression_device();
-	if (ret < 0) {
-		zip_err("ZIP: Kernel Crypto Registration failed\n");
-		goto err_pci_unregister;
-	}
-
-	/* comp-decomp statistics are handled with debugfs interface */
-	zip_debugfs_init();
-
-	return ret;
-
-err_pci_unregister:
-	pci_unregister_driver(&zip_driver);
-	return ret;
-}
-
-static void __exit zip_cleanup_module(void)
-{
-	zip_debugfs_exit();
-
-	/* Unregister from the kernel crypto interface */
-	zip_unregister_compression_device();
-
-	/* Unregister this driver for pci zip devices */
-	pci_unregister_driver(&zip_driver);
-}
-
-module_init(zip_init_module);
-module_exit(zip_cleanup_module);
+module_pci_driver(zip_driver);
 
 MODULE_AUTHOR("Cavium Inc");
 MODULE_DESCRIPTION("Cavium Inc ThunderX ZIP Driver");
-- 
GitLab


From b169b3766242b6f3336e24a6c8ee1522978b57a7 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 12:08:29 +0000
Subject: [PATCH 0918/1586] crypto: sun8i-ss - call finalize with bh disabled

Doing ipsec produces a spinlock recursion warning.
This is due to not disabling BH during crypto completion function.

Fixes: f08fcced6d00 ("crypto: allwinner - Add sun8i-ss cryptographic offloader")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 3 +++
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 9ef1c85c4aaa5..554e400d41cad 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -11,6 +11,7 @@
  * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
+#include <linux/bottom_half.h>
 #include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
@@ -274,7 +275,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = sun8i_ss_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 3c073eb3db038..1a71ed49d2333 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -9,6 +9,7 @@
  *
  * You could find the datasheet in Documentation/arm/sunxi.rst
  */
+#include <linux/bottom_half.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
@@ -442,6 +443,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
 theend:
 	kfree(pad);
 	kfree(result);
+	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
-- 
GitLab


From f75a749b6d78aeae2ce90e14fcc4b7b3ba46126d Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 12:08:30 +0000
Subject: [PATCH 0919/1586] crypto: sun8i-ce - call finalize with bh disabled

Doing ipsec produces a spinlock recursion warning.
This is due to not disabling BH during crypto completion function.

Fixes: 06f751b61329 ("crypto: allwinner - Add sun8i-ce Crypto Engine")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 3 +++
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 54ae8d16e4931..35e3cadccac2b 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -11,6 +11,7 @@
  * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
+#include <linux/bottom_half.h>
 #include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
@@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
 
 	flow = rctx->flow;
 	err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm));
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
 
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 88194718a806c..859b7522faaac 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -9,6 +9,7 @@
  *
  * You could find the datasheet in Documentation/arm/sunxi.rst
  */
+#include <linux/bottom_half.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
@@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
 theend:
 	kfree(buf);
 	kfree(result);
+	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
+	local_bh_enable();
 	return 0;
 }
-- 
GitLab


From dba633342994ce47d347bcf5522ba28301247b79 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 12:08:31 +0000
Subject: [PATCH 0920/1586] crypto: amlogic - call finalize with bh disabled

Doing ipsec produces a spinlock recursion warning.
This is due to not disabling BH during crypto completion function.

Fixes: 48fe583fe541 ("crypto: amlogic - Add crypto accelerator for amlogic GXL")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/amlogic/amlogic-gxl-cipher.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index c6865cbd334b2..e79514fce731f 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine,
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = meson_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
-- 
GitLab


From 7f22421103c5a7f9a1726f0ed125274c38174ddb Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 12:08:32 +0000
Subject: [PATCH 0921/1586] crypto: gemini - call finalize with bh disabled

Doing ipsec produces a spinlock recursion warning.
This is due to not disabling BH during crypto completion function.

Fixes: 46c5338db7bd45b2 ("crypto: sl3516 - Add sl3516 crypto engine")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/gemini/sl3516-ce-cipher.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index 53e3fefb81de5..14d0d83d388d8 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 
 	err = sl3516_ce_cipher(breq);
+	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
+	local_bh_enable();
 
 	return 0;
 }
-- 
GitLab


From 4058cf08945c18a6de193f4118fd05d83d3d4285 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Feb 2022 12:08:33 +0000
Subject: [PATCH 0922/1586] crypto: engine - check if BH is disabled during
 completion

When doing iperf over ipsec with crypto hardware sun8i-ce, I hit some
spinlock recursion bug.

This is due to completion function called with enabled BH.

Add check a to detect this.

Fixes: 735d37b5424b ("crypto: engine - Introduce the block request crypto engine framework")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index fb07da9920eea..6056a990c9f2c 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -53,6 +53,7 @@ static void crypto_finalize_request(struct crypto_engine *engine,
 				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 	}
+	lockdep_assert_in_softirq();
 	req->complete(req, err);
 
 	kthread_queue_work(engine->kworker, &engine->pump_requests);
-- 
GitLab


From 1038fd78a1b8269b642ce09600242682186a78e2 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:47 +0100
Subject: [PATCH 0923/1586] crypto: kpp - provide support for KPP template
 instances

The upcoming support for the RFC 7919 ffdhe group parameters will be
made available in the form of templates like "ffdhe2048(dh)",
"ffdhe3072(dh)" and so on. Template instantiations thereof would wrap the
inner "dh" kpp_alg and also provide kpp_alg services to the outside again.
Furthermore, it might be perhaps be desirable to provide KDF templates in
the future, which would similarly wrap an inner kpp_alg and present
themselves to the outside as another kpp_alg, transforming the shared
secret on its way out.

Introduce the bits needed for supporting KPP template instances. Everything
related to inner kpp_alg spawns potentially being held by such template
instances will be deferred to a subsequent patch in order to facilitate
review.

Define struct struct kpp_instance in close analogy to the already existing
skcipher_instance, shash_instance and alike, but wrapping a struct kpp_alg.
Implement the new kpp_register_instance() template instance registration
primitive. Provide some helper functions for
- going back and forth between a generic struct crypto_instance and the new
  struct kpp_instance,
- obtaining the instantiating kpp_instance from a crypto_kpp transform and
- for accessing a given kpp_instance's implementation specific context
  data.

Annotate everything with proper kernel-doc comments, even though
include/crypto/internal/kpp.h is not considered for the generated docs.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/kpp.c                  | 20 +++++++++
 include/crypto/internal/kpp.h | 83 +++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/crypto/kpp.c b/crypto/kpp.c
index 313b2c6999635..458195495a1dc 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -68,9 +68,17 @@ static int crypto_kpp_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static void crypto_kpp_free_instance(struct crypto_instance *inst)
+{
+	struct kpp_instance *kpp = kpp_instance(inst);
+
+	kpp->free(kpp);
+}
+
 static const struct crypto_type crypto_kpp_type = {
 	.extsize = crypto_alg_extsize,
 	.init_tfm = crypto_kpp_init_tfm,
+	.free = crypto_kpp_free_instance,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_kpp_show,
 #endif
@@ -111,5 +119,17 @@ void crypto_unregister_kpp(struct kpp_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_kpp);
 
+int kpp_register_instance(struct crypto_template *tmpl,
+			  struct kpp_instance *inst)
+{
+	if (WARN_ON(!inst->free))
+		return -EINVAL;
+
+	kpp_prepare_alg(&inst->alg);
+
+	return crypto_register_instance(tmpl, kpp_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(kpp_register_instance);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Key-agreement Protocol Primitives");
diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 659b642efada1..c4d5a970fe9d9 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -10,6 +10,24 @@
 #include <crypto/kpp.h>
 #include <crypto/algapi.h>
 
+/**
+ * struct kpp_instance - KPP template instance
+ * @free: Callback getting invoked upon instance destruction. Must be set.
+ * @s: Internal. Generic crypto core instance state properly layout
+ *     to alias with @alg as needed.
+ * @alg: The &struct kpp_alg implementation provided by the instance.
+ */
+struct kpp_instance {
+	void (*free)(struct kpp_instance *inst);
+	union {
+		struct {
+			char head[offsetof(struct kpp_alg, base)];
+			struct crypto_instance base;
+		} s;
+		struct kpp_alg alg;
+	};
+};
+
 /*
  * Transform internal helpers.
  */
@@ -33,6 +51,62 @@ static inline const char *kpp_alg_name(struct crypto_kpp *tfm)
 	return crypto_kpp_tfm(tfm)->__crt_alg->cra_name;
 }
 
+/*
+ * Template instance internal helpers.
+ */
+/**
+ * kpp_crypto_instance() - Cast a &struct kpp_instance to the corresponding
+ *                         generic &struct crypto_instance.
+ * @inst: Pointer to the &struct kpp_instance to be cast.
+ * Return: A pointer to the &struct crypto_instance embedded in @inst.
+ */
+static inline struct crypto_instance *kpp_crypto_instance(
+	struct kpp_instance *inst)
+{
+	return &inst->s.base;
+}
+
+/**
+ * kpp_instance() - Cast a generic &struct crypto_instance to the corresponding
+ *                  &struct kpp_instance.
+ * @inst: Pointer to the &struct crypto_instance to be cast.
+ * Return: A pointer to the &struct kpp_instance @inst is embedded in.
+ */
+static inline struct kpp_instance *kpp_instance(struct crypto_instance *inst)
+{
+	return container_of(inst, struct kpp_instance, s.base);
+}
+
+/**
+ * kpp_alg_instance() - Get the &struct kpp_instance a given KPP transform has
+ *                      been instantiated from.
+ * @kpp: The KPP transform instantiated from some &struct kpp_instance.
+ * Return: The &struct kpp_instance associated with @kpp.
+ */
+static inline struct kpp_instance *kpp_alg_instance(struct crypto_kpp *kpp)
+{
+	return kpp_instance(crypto_tfm_alg_instance(&kpp->base));
+}
+
+/**
+ * kpp_instance_ctx() - Get a pointer to a &struct kpp_instance's implementation
+ *                      specific context data.
+ * @inst: The &struct kpp_instance whose context data to access.
+ *
+ * A KPP template implementation may allocate extra memory beyond the
+ * end of a &struct kpp_instance instantiated from &crypto_template.create().
+ * This function provides a means to obtain a pointer to this area.
+ *
+ * Return: A pointer to the implementation specific context data.
+ */
+static inline void *kpp_instance_ctx(struct kpp_instance *inst)
+{
+	return crypto_instance_ctx(kpp_crypto_instance(inst));
+}
+
+/*
+ * KPP algorithm (un)registration functions.
+ */
 /**
  * crypto_register_kpp() -- Register key-agreement protocol primitives algorithm
  *
@@ -56,4 +130,13 @@ int crypto_register_kpp(struct kpp_alg *alg);
  */
 void crypto_unregister_kpp(struct kpp_alg *alg);
 
+/**
+ * kpp_register_instance() - Register a KPP template instance.
+ * @tmpl: The instantiating template.
+ * @inst: The KPP template instance to be registered.
+ * Return: %0 on success, negative error code otherwise.
+ */
+int kpp_register_instance(struct crypto_template *tmpl,
+			  struct kpp_instance *inst);
+
 #endif
-- 
GitLab


From 46ed5269bf7dac752f78fc5f8dc5efb52b483fe7 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:48 +0100
Subject: [PATCH 0924/1586] crypto: kpp - provide support for KPP spawns

The upcoming support for the RFC 7919 ffdhe group parameters will be
made available in the form of templates like "ffdhe2048(dh)",
"ffdhe3072(dh)" and so on. Template instantiations thereof would wrap the
inner "dh" kpp_alg and also provide kpp_alg services to the outside again.

The primitves needed for providing kpp_alg services from template instances
have been introduced with the previous patch. Continue this work now and
implement everything needed for enabling template instances to make use
of inner KPP algorithms like "dh".

More specifically, define a struct crypto_kpp_spawn in close analogy to
crypto_skcipher_spawn, crypto_shash_spawn and alike. Implement a
crypto_grab_kpp() and crypto_drop_kpp() pair for binding such a spawn to
some inner kpp_alg and for releasing it respectively. Template
implementations can instantiate transforms from the underlying kpp_alg by
means of the new crypto_spawn_kpp(). Finally, provide the
crypto_spawn_kpp_alg() helper for accessing a spawn's underlying kpp_alg
during template instantiation.

Annotate everything with proper kernel-doc comments, even though
include/crypto/internal/kpp.h is not considered for the generated docs.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/kpp.c                  |  9 +++++
 include/crypto/internal/kpp.h | 75 +++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/crypto/kpp.c b/crypto/kpp.c
index 458195495a1dc..7aa6ba4b60a4d 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -95,6 +95,15 @@ struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_kpp);
 
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+		    struct crypto_instance *inst,
+		    const char *name, u32 type, u32 mask)
+{
+	spawn->base.frontend = &crypto_kpp_type;
+	return crypto_grab_spawn(&spawn->base, inst, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_kpp);
+
 static void kpp_prepare_alg(struct kpp_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index c4d5a970fe9d9..9cb0662ebe871 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -28,6 +28,20 @@ struct kpp_instance {
 	};
 };
 
+/**
+ * struct crypto_kpp_spawn - KPP algorithm spawn
+ * @base: Internal. Generic crypto core spawn state.
+ *
+ * Template instances can get a hold on some inner KPP algorithm by
+ * binding a &struct crypto_kpp_spawn via
+ * crypto_grab_kpp(). Transforms may subsequently get instantiated
+ * from the referenced inner &struct kpp_alg by means of
+ * crypto_spawn_kpp().
+ */
+struct crypto_kpp_spawn {
+	struct crypto_spawn base;
+};
+
 /*
  * Transform internal helpers.
  */
@@ -139,4 +153,65 @@ void crypto_unregister_kpp(struct kpp_alg *alg);
 int kpp_register_instance(struct crypto_template *tmpl,
 			  struct kpp_instance *inst);
 
+/*
+ * KPP spawn related functions.
+ */
+/**
+ * crypto_grab_kpp() - Look up a KPP algorithm and bind a spawn to it.
+ * @spawn: The KPP spawn to bind.
+ * @inst: The template instance owning @spawn.
+ * @name: The KPP algorithm name to look up.
+ * @type: The type bitset to pass on to the lookup.
+ * @mask: The mask bismask to pass on to the lookup.
+ * Return: %0 on success, a negative error code otherwise.
+ */
+int crypto_grab_kpp(struct crypto_kpp_spawn *spawn,
+		    struct crypto_instance *inst,
+		    const char *name, u32 type, u32 mask);
+
+/**
+ * crypto_drop_kpp() - Release a spawn previously bound via crypto_grab_kpp().
+ * @spawn: The spawn to release.
+ */
+static inline void crypto_drop_kpp(struct crypto_kpp_spawn *spawn)
+{
+	crypto_drop_spawn(&spawn->base);
+}
+
+/**
+ * crypto_spawn_kpp_alg() - Get the algorithm a KPP spawn has been bound to.
+ * @spawn: The spawn to get the referenced &struct kpp_alg for.
+ *
+ * This function as well as the returned result are safe to use only
+ * after @spawn has been successfully bound via crypto_grab_kpp() and
+ * up to until the template instance owning @spawn has either been
+ * registered successfully or the spawn has been released again via
+ * crypto_drop_spawn().
+ *
+ * Return: A pointer to the &struct kpp_alg referenced from the spawn.
+ */
+static inline struct kpp_alg *crypto_spawn_kpp_alg(
+	struct crypto_kpp_spawn *spawn)
+{
+	return container_of(spawn->base.alg, struct kpp_alg, base);
+}
+
+/**
+ * crypto_spawn_kpp() - Create a transform from a KPP spawn.
+ * @spawn: The spawn previously bound to some &struct kpp_alg via
+ *         crypto_grab_kpp().
+ *
+ * Once a &struct crypto_kpp_spawn has been successfully bound to a
+ * &struct kpp_alg via crypto_grab_kpp(), transforms for the latter
+ * may get instantiated from the former by means of this function.
+ *
+ * Return: A pointer to the freshly created KPP transform on success
+ * or an ``ERR_PTR()`` otherwise.
+ */
+static inline struct crypto_kpp *crypto_spawn_kpp(
+	struct crypto_kpp_spawn *spawn)
+{
+	return crypto_spawn_tfm2(&spawn->base);
+}
+
 #endif
-- 
GitLab


From 48c6d8b878c1d811015cfba1a302d8474a9aace6 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:49 +0100
Subject: [PATCH 0925/1586] crypto: dh - remove struct dh's ->q member

The only current user of the DH KPP algorithm, the
keyctl(KEYCTL_DH_COMPUTE) syscall, doesn't set the domain parameter ->q
in struct dh. Remove it and any associated (de)serialization code in
crypto_dh_encode_key() and crypto_dh_decode_key. Adjust the encoded
->secret values in testmgr's DH test vectors accordingly.

Note that the dh-generic implementation would have initialized its
struct dh_ctx's ->q from the decoded struct dh's ->q, if present. If this
struct dh_ctx's ->q would ever have been non-NULL, it would have enabled a
full key validation as specified in NIST SP800-56A in dh_is_pubkey_valid().
However, as outlined above, ->q is always NULL in practice and the full key
validation code is effectively dead. A later patch will make
dh_is_pubkey_valid() to calculate Q from P on the fly, if possible, so
don't remove struct dh_ctx's ->q now, but leave it there until that has
happened.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c         |  6 ------
 crypto/dh_helper.c  | 17 ++++-------------
 crypto/testmgr.h    | 16 ++++++----------
 include/crypto/dh.h |  4 ----
 4 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index 27e62a2a8027d..abc2a72c82719 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -62,12 +62,6 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
 	if (!ctx->p)
 		return -EINVAL;
 
-	if (params->q && params->q_size) {
-		ctx->q = mpi_read_raw_data(params->q, params->q_size);
-		if (!ctx->q)
-			return -EINVAL;
-	}
-
 	ctx->g = mpi_read_raw_data(params->g, params->g_size);
 	if (!ctx->g)
 		return -EINVAL;
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 9fd5a42eea157..aabc91e4f63f5 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -10,7 +10,7 @@
 #include <crypto/dh.h>
 #include <crypto/kpp.h>
 
-#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 4 * sizeof(int))
+#define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int))
 
 static inline u8 *dh_pack_data(u8 *dst, u8 *end, const void *src, size_t size)
 {
@@ -28,7 +28,7 @@ static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size)
 
 static inline unsigned int dh_data_size(const struct dh *p)
 {
-	return p->key_size + p->p_size + p->q_size + p->g_size;
+	return p->key_size + p->p_size + p->g_size;
 }
 
 unsigned int crypto_dh_key_len(const struct dh *p)
@@ -53,11 +53,9 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
 	ptr = dh_pack_data(ptr, end, &params->key_size,
 			   sizeof(params->key_size));
 	ptr = dh_pack_data(ptr, end, &params->p_size, sizeof(params->p_size));
-	ptr = dh_pack_data(ptr, end, &params->q_size, sizeof(params->q_size));
 	ptr = dh_pack_data(ptr, end, &params->g_size, sizeof(params->g_size));
 	ptr = dh_pack_data(ptr, end, params->key, params->key_size);
 	ptr = dh_pack_data(ptr, end, params->p, params->p_size);
-	ptr = dh_pack_data(ptr, end, params->q, params->q_size);
 	ptr = dh_pack_data(ptr, end, params->g, params->g_size);
 	if (ptr != end)
 		return -EINVAL;
@@ -79,7 +77,6 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 
 	ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
 	ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size));
-	ptr = dh_unpack_data(&params->q_size, ptr, sizeof(params->q_size));
 	ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size));
 	if (secret.len != crypto_dh_key_len(params))
 		return -EINVAL;
@@ -89,7 +86,7 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	 * some drivers assume otherwise.
 	 */
 	if (params->key_size > params->p_size ||
-	    params->g_size > params->p_size || params->q_size > params->p_size)
+	    params->g_size > params->p_size)
 		return -EINVAL;
 
 	/* Don't allocate memory. Set pointers to data within
@@ -97,9 +94,7 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	 */
 	params->key = (void *)ptr;
 	params->p = (void *)(ptr + params->key_size);
-	params->q = (void *)(ptr + params->key_size + params->p_size);
-	params->g = (void *)(ptr + params->key_size + params->p_size +
-			     params->q_size);
+	params->g = (void *)(ptr + params->key_size + params->p_size);
 
 	/*
 	 * Don't permit 'p' to be 0.  It's not a prime number, and it's subject
@@ -109,10 +104,6 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	if (memchr_inv(params->p, 0, params->p_size) == NULL)
 		return -EINVAL;
 
-	/* It is permissible to not provide Q. */
-	if (params->q_size == 0)
-		params->q = NULL;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 85ccf811f5e79..31da6fd3cfe7f 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1246,17 +1246,15 @@ static const struct kpp_testvec dh_tv_template[] = {
 	.secret =
 #ifdef __LITTLE_ENDIAN
 	"\x01\x00" /* type */
-	"\x15\x02" /* len */
+	"\x11\x02" /* len */
 	"\x00\x01\x00\x00" /* key_size */
 	"\x00\x01\x00\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x01\x00\x00\x00" /* g_size */
 #else
 	"\x00\x01" /* type */
-	"\x02\x15" /* len */
+	"\x02\x11" /* len */
 	"\x00\x00\x01\x00" /* key_size */
 	"\x00\x00\x01\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x00\x00\x00\x01" /* g_size */
 #endif
 	/* xa */
@@ -1346,7 +1344,7 @@ static const struct kpp_testvec dh_tv_template[] = {
 	"\xd3\x34\x49\xad\x64\xa6\xb1\xc0\x59\x28\x75\x60\xa7\x8a\xb0\x11"
 	"\x56\x89\x42\x74\x11\xf5\xf6\x5e\x6f\x16\x54\x6a\xb1\x76\x4d\x50"
 	"\x8a\x68\xc1\x5b\x82\xb9\x0d\x00\x32\x50\xed\x88\x87\x48\x92\x17",
-	.secret_size = 533,
+	.secret_size = 529,
 	.b_public_size = 256,
 	.expected_a_public_size = 256,
 	.expected_ss_size = 256,
@@ -1355,17 +1353,15 @@ static const struct kpp_testvec dh_tv_template[] = {
 	.secret =
 #ifdef __LITTLE_ENDIAN
 	"\x01\x00" /* type */
-	"\x15\x02" /* len */
+	"\x11\x02" /* len */
 	"\x00\x01\x00\x00" /* key_size */
 	"\x00\x01\x00\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x01\x00\x00\x00" /* g_size */
 #else
 	"\x00\x01" /* type */
-	"\x02\x15" /* len */
+	"\x02\x11" /* len */
 	"\x00\x00\x01\x00" /* key_size */
 	"\x00\x00\x01\x00" /* p_size */
-	"\x00\x00\x00\x00" /* q_size */
 	"\x00\x00\x00\x01" /* g_size */
 #endif
 	/* xa */
@@ -1455,7 +1451,7 @@ static const struct kpp_testvec dh_tv_template[] = {
 	"\x5e\x5a\x64\xbd\xf6\x85\x04\xe8\x28\x6a\xac\xef\xce\x19\x8e\x9a"
 	"\xfe\x75\xc0\x27\x69\xe3\xb3\x7b\x21\xa7\xb1\x16\xa4\x85\x23\xee"
 	"\xb0\x1b\x04\x6e\xbd\xab\x16\xde\xfd\x86\x6b\xa9\x95\xd7\x0b\xfd",
-	.secret_size = 533,
+	.secret_size = 529,
 	.b_public_size = 256,
 	.expected_a_public_size = 256,
 	.expected_ss_size = 256,
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index d71e9858ab86d..2585f0e6bb69d 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -24,21 +24,17 @@
  *
  * @key:	Private DH key
  * @p:		Diffie-Hellman parameter P
- * @q:		Diffie-Hellman parameter Q
  * @g:		Diffie-Hellman generator G
  * @key_size:	Size of the private DH key
  * @p_size:	Size of DH parameter P
- * @q_size:	Size of DH parameter Q
  * @g_size:	Size of DH generator G
  */
 struct dh {
 	void *key;
 	void *p;
-	void *q;
 	void *g;
 	unsigned int key_size;
 	unsigned int p_size;
-	unsigned int q_size;
 	unsigned int g_size;
 };
 
-- 
GitLab


From 215bebc8c6ac438c382a6a56bd2764a2d4e1da72 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:50 +0100
Subject: [PATCH 0926/1586] crypto: dh - constify struct dh's pointer members

struct dh contains several pointer members corresponding to DH parameters:
->key, ->p and ->g. A subsequent commit will introduce "dh" wrapping
templates of the form "ffdhe2048(dh)", "ffdhe3072(dh)" and so on in order
to provide built-in support for the well-known safe-prime ffdhe group
parameters specified in RFC 7919. These templates will need to set the
group parameter related members of the (serialized) struct dh instance
passed to the inner "dh" kpp_alg instance, i.e. ->p and ->g, to some
constant, static storage arrays.

Turn the struct dh pointer members' types into "pointer to const" in
preparation for this.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/dh.h | 6 +++---
 security/keys/dh.c  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index 2585f0e6bb69d..67f3f6bca527a 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -30,9 +30,9 @@
  * @g_size:	Size of DH generator G
  */
 struct dh {
-	void *key;
-	void *p;
-	void *g;
+	const void *key;
+	const void *p;
+	const void *g;
 	unsigned int key_size;
 	unsigned int p_size;
 	unsigned int g_size;
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 4573fc15617dc..b339760a31dd3 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -15,7 +15,7 @@
 #include <keys/user-type.h>
 #include "internal.h"
 
-static ssize_t dh_data_from_key(key_serial_t keyid, void **data)
+static ssize_t dh_data_from_key(key_serial_t keyid, const void **data)
 {
 	struct key *key;
 	key_ref_t key_ref;
-- 
GitLab


From fae198935c442e09afa3ecca197e144f732068d7 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:51 +0100
Subject: [PATCH 0927/1586] crypto: dh - split out deserialization code from
 crypto_dh_decode()

A subsequent commit will introduce "dh" wrapping templates of the form
"ffdhe2048(dh)", "ffdhe3072(dh)" and so on in order to provide built-in
support for the well-known safe-prime ffdhe group parameters specified in
RFC 7919.

Those templates' ->set_secret() will wrap the inner "dh" implementation's
->set_secret() and set the ->p and ->g group parameters as appropriate on
the way inwards. More specifically,
- A ffdheXYZ(dh) user would call crypto_dh_encode() on a struct dh instance
  having ->p == ->g == NULL as well as ->p_size == ->g_size == 0 and pass
  the resulting buffer to the outer ->set_secret().
- This outer ->set_secret() would then decode the struct dh via
  crypto_dh_decode_key(), set ->p, ->g, ->p_size as well as ->g_size as
  appropriate for the group in question and encode the struct dh again
  before passing it further down to the inner "dh"'s ->set_secret().

The problem is that crypto_dh_decode_key() implements some basic checks
which would reject parameter sets with ->p_size == 0 and thus, the ffdheXYZ
templates' ->set_secret() cannot use it as-is for decoding the passed
buffer. As the inner "dh"'s ->set_secret() will eventually conduct said
checks on the final parameter set anyway, the outer ->set_secret() really
only needs the decoding functionality.

Split out the pure struct dh decoding part from crypto_dh_decode_key() into
the new __crypto_dh_decode_key().

__crypto_dh_decode_key() gets defined in crypto/dh_helper.c, but will have
to get called from crypto/dh.c and thus, its declaration must be somehow
made available to the latter. Strictly speaking, __crypto_dh_decode_key()
is internal to the dh_generic module, yet it would be a bit over the top
to introduce a new header like e.g. include/crypto/internal/dh.h
containing just a single prototype. Add the __crypto_dh_decode_key()
declaration to include/crypto/dh.h instead.

Provide a proper kernel-doc annotation, even though
__crypto_dh_decode_key() is purposedly not on the function list specified
in Documentation/crypto/api-kpp.rst.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh_helper.c  | 27 +++++++++++++++++++--------
 include/crypto/dh.h | 16 ++++++++++++++++
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index aabc91e4f63f5..2d499879328b7 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -63,7 +63,7 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
 }
 EXPORT_SYMBOL_GPL(crypto_dh_encode_key);
 
-int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+int __crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 {
 	const u8 *ptr = buf;
 	struct kpp_secret secret;
@@ -81,6 +81,24 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	if (secret.len != crypto_dh_key_len(params))
 		return -EINVAL;
 
+	/* Don't allocate memory. Set pointers to data within
+	 * the given buffer
+	 */
+	params->key = (void *)ptr;
+	params->p = (void *)(ptr + params->key_size);
+	params->g = (void *)(ptr + params->key_size + params->p_size);
+
+	return 0;
+}
+
+int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
+{
+	int err;
+
+	err = __crypto_dh_decode_key(buf, len, params);
+	if (err)
+		return err;
+
 	/*
 	 * Don't permit the buffer for 'key' or 'g' to be larger than 'p', since
 	 * some drivers assume otherwise.
@@ -89,13 +107,6 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	    params->g_size > params->p_size)
 		return -EINVAL;
 
-	/* Don't allocate memory. Set pointers to data within
-	 * the given buffer
-	 */
-	params->key = (void *)ptr;
-	params->p = (void *)(ptr + params->key_size);
-	params->g = (void *)(ptr + params->key_size + params->p_size);
-
 	/*
 	 * Don't permit 'p' to be 0.  It's not a prime number, and it's subject
 	 * to corner cases such as 'mod 0' being undefined or
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index 67f3f6bca527a..7b863e911cb44 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -79,4 +79,20 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params);
  */
 int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params);
 
+/**
+ * __crypto_dh_decode_key() - decode a private key without parameter checks
+ * @buf:	Buffer holding a packet key that should be decoded
+ * @len:	Length of the packet private key buffer
+ * @params:	Buffer allocated by the caller that is filled with the
+ *		unpacked DH private key.
+ *
+ * Internal function providing the same services as the exported
+ * crypto_dh_decode_key(), but without any of those basic parameter
+ * checks conducted by the latter.
+ *
+ * Return:	-EINVAL if buffer has insufficient size, 0 on success
+ */
+int __crypto_dh_decode_key(const char *buf, unsigned int len,
+			   struct dh *params);
+
 #endif
-- 
GitLab


From d902981f09bf935f257953d227a7721e8e541052 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:52 +0100
Subject: [PATCH 0928/1586] crypto: dh - introduce common code for built-in
 safe-prime group support

Recent work on NVME in-band authentication support ([1]) needs to invoke
the "dh" KPP with the FFDHE safe-prime group parameters as specified in
RFC 7919 and generate ephemeral keys suitable for the respective group. By
coincidence, the requirements from NIST SP800-56Arev3,
sec. 5.5.2 ("Assurance of Domain-Parameter Validity") basically boil down
to disallowing any group parameters not among the approved safe-prime
groups specified in either RFC 7919 or RFC 3526 in FIPS mode. Furthermore,
SP800-56Arev3 specifies the respective security strength for each of the
approved safe-prime groups, which has a direct impact on the minimum key
lengths.

In this light, it's desirable to introduce built-in support for the
RFC 7919 safe-prime groups to the kernel's DH implementation, provide a
SP800-56Arev3 conforming key generation primitive for those and render
non-approved group parameters unusable in FIPS mode on the way.

As suggested ([2]) in the course of discussion to previous iterations of
this patchset, the built-in support for ffdhe groups would be best made
available in the form of templates wrapping the existing "dh"
implementation, one for each group specified by RFC 7919: ffdhe2048(dh),
ffdhe3072(dh), ffdhe4096(dh), ffdhe6144(dh) and ffdhe8192(dh). As these
templates differ only in the safe-prime constants they'd configure the
inner "dh" transforms with, they can share almost all of their
"dh"-wrapping template implementation code.

Introduce this common code to dh_generic. The actual dump of the RFC 7919
safe-prime constants will be deferred to the next patch in order to
facilitate review. The ephemeral key generation primitive mentioned above
likewise deserves a patch on its own, as does the mechanism by which
unapproved groups are rendered unusable in FIPS mode.

Define a struct dh_safe_prime container for specifying the individual
templates' associated safe-prime group constants. All ffdheXYZ(dh) template
instances will store a pointer to such a dh_safe_prime in their context
areas each. Implement the common __dh_safe_prime_create() template
instantiation helper. The intention is that the individual ffdheXYZ(dh)
crypto_templates' ->create() implementations will simply forward any calls
to __dh_safe_prime_create(), passing a suitable dh_safe_prime in addition
to the received ->create() arguments. __dh_safe_prime_create() would then
create and register a kpp_instance as appropriate, storing the given
dh_safe_prime pointer alongside a crypto_kpp_spawn for the inner "dh"
kpp_alg in the context area.

As the ffdheXYZ(dh) kpp_instances are supposed to act as proxies to the
inner "dh" kpp_alg, make each of their associated crypto_kpp transforms to
in turn own an inner "dh" transform, a pointer to which gets stored in the
context area. Setup and teardown are getting handled from the outer
->init_tfm() and ->exit_tfm() respectively.

In order to achieve the overall goal and let the ffdheXYZ(dh) kpp_instances
configure the inner "dh" transforms with the respective group parameters,
make their common ->set_secret(), the new dh_safe_prime_set_secret(), fill
in the P and G values before forwarding the call to the inner "dh"'s
->set_secret(). Note that the outer ->set_secret() can obtain the P value
associated with the given ffdheXYZ(dh) kpp_instance by means of the
dh_safe_prime referenced from the latter's context. The value of G OTOH
always equals constant 2 for the safe-prime groups.

Finally, make the remaining two kpp_alg primitives both operating on
kpp_requests, i.e. ->generate_public_key() and ->compute_shared_secret(),
to merely forward any request to the inner "dh" implementation. However, a
kpp_request instance received from the outside cannot get simply passed
on as-is, because its associated transform (crypto_kpp_reqtfm()) will have
been set to the outer ffdheXYZ(dh) one. In order to handle this, reserve
some space in the outer ffdheXYZ(dh) kpp_requests' context areas for in
turn storing an inner kpp_request suitable for "dh" each. Make the outer
->generate_public_key() and ->compute_shared_secret() respectively to setup
this inner kpp_request by means of the new dh_safe_prime_prepare_dh_req()
helper before handing it over to the "dh" implementation for further
processing. dh_safe_prime_prepare_dh_req() basically copies the outer
kpp_request received from the outside over to the inner one, but installs
the inner transform and its own ->complete() proxy callback therein. This
completion callback, the new dh_safe_prime_complete_req(), doesn't do
anything beyond completing the outer request. Note that there exist some
examples in crypto/, which would simply install the completion handler
from the outer request at the inner one in similar setups, e.g. seqiv.
However, this would mean that the user-provided completion handler won't
get called with the address of the outer kpp_request initially submitted
and the handler might not be prepared for this. Users could certainly work
around this by setting the callback ->data properly, but IMO it's cleaner
this way. Furthermore, it might make sense to extend
dh_safe_prime_complete_req() in the future and move e.g. those
post-computation FIPS checks from the generic "dh" implementation to the
ffdheXYZ(dh) templates.

[1] https://lore.kernel.org/r/20211202152358.60116-1-hare@suse.de
[2] https://lore.kernel.org/r/20211217055227.GA20698@gondor.apana.org.au

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 208 insertions(+)

diff --git a/crypto/dh.c b/crypto/dh.c
index abc2a72c82719..08fb7610fe2a7 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -257,6 +257,214 @@ static struct kpp_alg dh = {
 	},
 };
 
+
+struct dh_safe_prime {
+	unsigned int max_strength;
+	unsigned int p_size;
+	const char *p;
+};
+
+static const char safe_prime_g[]  = { 2 };
+
+struct dh_safe_prime_instance_ctx {
+	struct crypto_kpp_spawn dh_spawn;
+	const struct dh_safe_prime *safe_prime;
+};
+
+struct dh_safe_prime_tfm_ctx {
+	struct crypto_kpp *dh_tfm;
+};
+
+static void dh_safe_prime_free_instance(struct kpp_instance *inst)
+{
+	struct dh_safe_prime_instance_ctx *ctx = kpp_instance_ctx(inst);
+
+	crypto_drop_kpp(&ctx->dh_spawn);
+	kfree(inst);
+}
+
+static inline struct dh_safe_prime_instance_ctx *dh_safe_prime_instance_ctx(
+	struct crypto_kpp *tfm)
+{
+	return kpp_instance_ctx(kpp_alg_instance(tfm));
+}
+
+static inline struct kpp_alg *dh_safe_prime_dh_alg(
+	struct dh_safe_prime_tfm_ctx *ctx)
+{
+	return crypto_kpp_alg(ctx->dh_tfm);
+}
+
+static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_instance_ctx *inst_ctx =
+		dh_safe_prime_instance_ctx(tfm);
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	tfm_ctx->dh_tfm = crypto_spawn_kpp(&inst_ctx->dh_spawn);
+	if (IS_ERR(tfm_ctx->dh_tfm))
+		return PTR_ERR(tfm_ctx->dh_tfm);
+
+	return 0;
+}
+
+static void dh_safe_prime_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	crypto_free_kpp(tfm_ctx->dh_tfm);
+}
+
+static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
+				    unsigned int len)
+{
+	struct dh_safe_prime_instance_ctx *inst_ctx =
+		dh_safe_prime_instance_ctx(tfm);
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+	struct dh params;
+	void *buf;
+	unsigned int buf_size;
+	int err;
+
+	err = __crypto_dh_decode_key(buffer, len, &params);
+	if (err)
+		return err;
+
+	if (params.p_size || params.g_size)
+		return -EINVAL;
+
+	params.p = inst_ctx->safe_prime->p;
+	params.p_size = inst_ctx->safe_prime->p_size;
+	params.g = safe_prime_g;
+	params.g_size = sizeof(safe_prime_g);
+
+	buf_size = crypto_dh_key_len(&params);
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	err = crypto_dh_encode_key(buf, buf_size, &params);
+	if (err)
+		goto out;
+
+	err = crypto_kpp_set_secret(tfm_ctx->dh_tfm, buf, buf_size);
+out:
+	kfree_sensitive(buf);
+	return err;
+}
+
+static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req,
+				       int err)
+{
+	struct kpp_request *req = dh_req->data;
+
+	kpp_request_complete(req, err);
+}
+
+static struct kpp_request *dh_safe_prime_prepare_dh_req(struct kpp_request *req)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx =
+		kpp_tfm_ctx(crypto_kpp_reqtfm(req));
+	struct kpp_request *dh_req = kpp_request_ctx(req);
+
+	kpp_request_set_tfm(dh_req, tfm_ctx->dh_tfm);
+	kpp_request_set_callback(dh_req, req->base.flags,
+				 dh_safe_prime_complete_req, req);
+
+	kpp_request_set_input(dh_req, req->src, req->src_len);
+	kpp_request_set_output(dh_req, req->dst, req->dst_len);
+
+	return dh_req;
+}
+
+static int dh_safe_prime_generate_public_key(struct kpp_request *req)
+{
+	struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+	return crypto_kpp_generate_public_key(dh_req);
+}
+
+static int dh_safe_prime_compute_shared_secret(struct kpp_request *req)
+{
+	struct kpp_request *dh_req = dh_safe_prime_prepare_dh_req(req);
+
+	return crypto_kpp_compute_shared_secret(dh_req);
+}
+
+static unsigned int dh_safe_prime_max_size(struct crypto_kpp *tfm)
+{
+	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
+
+	return crypto_kpp_maxsize(tfm_ctx->dh_tfm);
+}
+
+static int __maybe_unused __dh_safe_prime_create(
+	struct crypto_template *tmpl, struct rtattr **tb,
+	const struct dh_safe_prime *safe_prime)
+{
+	struct kpp_instance *inst;
+	struct dh_safe_prime_instance_ctx *ctx;
+	const char *dh_name;
+	struct kpp_alg *dh_alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_KPP, &mask);
+	if (err)
+		return err;
+
+	dh_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(dh_name))
+		return PTR_ERR(dh_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	ctx = kpp_instance_ctx(inst);
+
+	err = crypto_grab_kpp(&ctx->dh_spawn, kpp_crypto_instance(inst),
+			      dh_name, 0, mask);
+	if (err)
+		goto err_free_inst;
+
+	err = -EINVAL;
+	dh_alg = crypto_spawn_kpp_alg(&ctx->dh_spawn);
+	if (strcmp(dh_alg->base.cra_name, "dh"))
+		goto err_free_inst;
+
+	ctx->safe_prime = safe_prime;
+
+	err = crypto_inst_setname(kpp_crypto_instance(inst),
+				  tmpl->name, &dh_alg->base);
+	if (err)
+		goto err_free_inst;
+
+	inst->alg.set_secret = dh_safe_prime_set_secret;
+	inst->alg.generate_public_key = dh_safe_prime_generate_public_key;
+	inst->alg.compute_shared_secret = dh_safe_prime_compute_shared_secret;
+	inst->alg.max_size = dh_safe_prime_max_size;
+	inst->alg.init = dh_safe_prime_init_tfm;
+	inst->alg.exit = dh_safe_prime_exit_tfm;
+	inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize;
+	inst->alg.base.cra_priority = dh_alg->base.cra_priority;
+	inst->alg.base.cra_module = THIS_MODULE;
+	inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx);
+
+	inst->free = dh_safe_prime_free_instance;
+
+	err = kpp_register_instance(tmpl, inst);
+	if (err)
+		goto err_free_inst;
+
+	return 0;
+
+err_free_inst:
+	dh_safe_prime_free_instance(inst);
+
+	return err;
+}
+
 static int dh_init(void)
 {
 	return crypto_register_kpp(&dh);
-- 
GitLab


From 7dce59819750d78513c70bf4a9024e265af88b23 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:53 +0100
Subject: [PATCH 0929/1586] crypto: dh - implement ffdheXYZ(dh) templates

Current work on NVME in-band authentication support ([1]) needs to invoke
DH with the FFDHE safe-prime group parameters specified in RFC 7919.

Introduce a new CRYPTO_DH_RFC7919_GROUPS Kconfig option. If enabled, make
dh_generic register a couple of ffdheXYZ(dh) templates, one for each group:
ffdhe2048(dh), ffdhe3072(dh), ffdhe4096(dh), ffdhe6144(dh) and
ffdhe8192(dh). Their respective ->set_secret() expects a (serialized)
struct dh, just like the underlying "dh" implementation does, but with the
P and G values unset so that the safe-prime constants for the given group
can be filled in by the wrapping template.

Internally, a struct dh_safe_prime instance is being defined for each of
the ffdheXYZ(dh) templates as appropriate. In order to prepare for future
key generation, fill in the maximum security strength values as specified
by SP800-56Arev3 on the go, even though they're not needed at this point
yet.

Implement the respective ffdheXYZ(dh) crypto_template's ->create() by
simply forwarding any calls to the __dh_safe_prime_create() helper
introduced with the previous commit, passing the associated dh_safe_prime
in addition to the received ->create() arguments.

[1] https://lore.kernel.org/r/20211202152358.60116-1-hare@suse.de

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig |   6 +
 crypto/dh.c    | 298 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 303 insertions(+), 1 deletion(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index d9573b3f081ff..ba9434ad06efd 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -231,6 +231,12 @@ config CRYPTO_DH
 	help
 	  Generic implementation of the Diffie-Hellman algorithm.
 
+config CRYPTO_DH_RFC7919_GROUPS
+	bool "Support for RFC 7919 FFDHE group parameters"
+	depends on CRYPTO_DH
+	help
+	  Provide support for RFC 7919 FFDHE group parameters. If unsure, say N.
+
 config CRYPTO_ECC
 	tristate
 	select CRYPTO_RNG_DEFAULT
diff --git a/crypto/dh.c b/crypto/dh.c
index 08fb7610fe2a7..d0adb1705fe7d 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -465,13 +465,309 @@ err_free_inst:
 	return err;
 }
 
+#ifdef CONFIG_CRYPTO_DH_RFC7919_GROUPS
+
+static const struct dh_safe_prime ffdhe2048_prime = {
+	.max_strength = 112,
+	.p_size = 256,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x28\x5c\x97\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe3072_prime = {
+	.max_strength = 128,
+	.p_size = 384,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\xc6\x2e\x37\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe4096_prime = {
+	.max_strength = 152,
+	.p_size = 512,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x65\x5f\x6a\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe6144_prime = {
+	.max_strength = 176,
+	.p_size = 768,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+	"\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+	"\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+	"\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+	"\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+	"\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+	"\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+	"\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+	"\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+	"\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+	"\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+	"\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+	"\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+	"\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+	"\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+	"\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+	"\xa4\x0e\x32\x9c\xd0\xe4\x0e\x65\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static const struct dh_safe_prime ffdhe8192_prime = {
+	.max_strength = 200,
+	.p_size = 1024,
+	.p =
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xad\xf8\x54\x58\xa2\xbb\x4a\x9a"
+	"\xaf\xdc\x56\x20\x27\x3d\x3c\xf1\xd8\xb9\xc5\x83\xce\x2d\x36\x95"
+	"\xa9\xe1\x36\x41\x14\x64\x33\xfb\xcc\x93\x9d\xce\x24\x9b\x3e\xf9"
+	"\x7d\x2f\xe3\x63\x63\x0c\x75\xd8\xf6\x81\xb2\x02\xae\xc4\x61\x7a"
+	"\xd3\xdf\x1e\xd5\xd5\xfd\x65\x61\x24\x33\xf5\x1f\x5f\x06\x6e\xd0"
+	"\x85\x63\x65\x55\x3d\xed\x1a\xf3\xb5\x57\x13\x5e\x7f\x57\xc9\x35"
+	"\x98\x4f\x0c\x70\xe0\xe6\x8b\x77\xe2\xa6\x89\xda\xf3\xef\xe8\x72"
+	"\x1d\xf1\x58\xa1\x36\xad\xe7\x35\x30\xac\xca\x4f\x48\x3a\x79\x7a"
+	"\xbc\x0a\xb1\x82\xb3\x24\xfb\x61\xd1\x08\xa9\x4b\xb2\xc8\xe3\xfb"
+	"\xb9\x6a\xda\xb7\x60\xd7\xf4\x68\x1d\x4f\x42\xa3\xde\x39\x4d\xf4"
+	"\xae\x56\xed\xe7\x63\x72\xbb\x19\x0b\x07\xa7\xc8\xee\x0a\x6d\x70"
+	"\x9e\x02\xfc\xe1\xcd\xf7\xe2\xec\xc0\x34\x04\xcd\x28\x34\x2f\x61"
+	"\x91\x72\xfe\x9c\xe9\x85\x83\xff\x8e\x4f\x12\x32\xee\xf2\x81\x83"
+	"\xc3\xfe\x3b\x1b\x4c\x6f\xad\x73\x3b\xb5\xfc\xbc\x2e\xc2\x20\x05"
+	"\xc5\x8e\xf1\x83\x7d\x16\x83\xb2\xc6\xf3\x4a\x26\xc1\xb2\xef\xfa"
+	"\x88\x6b\x42\x38\x61\x1f\xcf\xdc\xde\x35\x5b\x3b\x65\x19\x03\x5b"
+	"\xbc\x34\xf4\xde\xf9\x9c\x02\x38\x61\xb4\x6f\xc9\xd6\xe6\xc9\x07"
+	"\x7a\xd9\x1d\x26\x91\xf7\xf7\xee\x59\x8c\xb0\xfa\xc1\x86\xd9\x1c"
+	"\xae\xfe\x13\x09\x85\x13\x92\x70\xb4\x13\x0c\x93\xbc\x43\x79\x44"
+	"\xf4\xfd\x44\x52\xe2\xd7\x4d\xd3\x64\xf2\xe2\x1e\x71\xf5\x4b\xff"
+	"\x5c\xae\x82\xab\x9c\x9d\xf6\x9e\xe8\x6d\x2b\xc5\x22\x36\x3a\x0d"
+	"\xab\xc5\x21\x97\x9b\x0d\xea\xda\x1d\xbf\x9a\x42\xd5\xc4\x48\x4e"
+	"\x0a\xbc\xd0\x6b\xfa\x53\xdd\xef\x3c\x1b\x20\xee\x3f\xd5\x9d\x7c"
+	"\x25\xe4\x1d\x2b\x66\x9e\x1e\xf1\x6e\x6f\x52\xc3\x16\x4d\xf4\xfb"
+	"\x79\x30\xe9\xe4\xe5\x88\x57\xb6\xac\x7d\x5f\x42\xd6\x9f\x6d\x18"
+	"\x77\x63\xcf\x1d\x55\x03\x40\x04\x87\xf5\x5b\xa5\x7e\x31\xcc\x7a"
+	"\x71\x35\xc8\x86\xef\xb4\x31\x8a\xed\x6a\x1e\x01\x2d\x9e\x68\x32"
+	"\xa9\x07\x60\x0a\x91\x81\x30\xc4\x6d\xc7\x78\xf9\x71\xad\x00\x38"
+	"\x09\x29\x99\xa3\x33\xcb\x8b\x7a\x1a\x1d\xb9\x3d\x71\x40\x00\x3c"
+	"\x2a\x4e\xce\xa9\xf9\x8d\x0a\xcc\x0a\x82\x91\xcd\xce\xc9\x7d\xcf"
+	"\x8e\xc9\xb5\x5a\x7f\x88\xa4\x6b\x4d\xb5\xa8\x51\xf4\x41\x82\xe1"
+	"\xc6\x8a\x00\x7e\x5e\x0d\xd9\x02\x0b\xfd\x64\xb6\x45\x03\x6c\x7a"
+	"\x4e\x67\x7d\x2c\x38\x53\x2a\x3a\x23\xba\x44\x42\xca\xf5\x3e\xa6"
+	"\x3b\xb4\x54\x32\x9b\x76\x24\xc8\x91\x7b\xdd\x64\xb1\xc0\xfd\x4c"
+	"\xb3\x8e\x8c\x33\x4c\x70\x1c\x3a\xcd\xad\x06\x57\xfc\xcf\xec\x71"
+	"\x9b\x1f\x5c\x3e\x4e\x46\x04\x1f\x38\x81\x47\xfb\x4c\xfd\xb4\x77"
+	"\xa5\x24\x71\xf7\xa9\xa9\x69\x10\xb8\x55\x32\x2e\xdb\x63\x40\xd8"
+	"\xa0\x0e\xf0\x92\x35\x05\x11\xe3\x0a\xbe\xc1\xff\xf9\xe3\xa2\x6e"
+	"\x7f\xb2\x9f\x8c\x18\x30\x23\xc3\x58\x7e\x38\xda\x00\x77\xd9\xb4"
+	"\x76\x3e\x4e\x4b\x94\xb2\xbb\xc1\x94\xc6\x65\x1e\x77\xca\xf9\x92"
+	"\xee\xaa\xc0\x23\x2a\x28\x1b\xf6\xb3\xa7\x39\xc1\x22\x61\x16\x82"
+	"\x0a\xe8\xdb\x58\x47\xa6\x7c\xbe\xf9\xc9\x09\x1b\x46\x2d\x53\x8c"
+	"\xd7\x2b\x03\x74\x6a\xe7\x7f\x5e\x62\x29\x2c\x31\x15\x62\xa8\x46"
+	"\x50\x5d\xc8\x2d\xb8\x54\x33\x8a\xe4\x9f\x52\x35\xc9\x5b\x91\x17"
+	"\x8c\xcf\x2d\xd5\xca\xce\xf4\x03\xec\x9d\x18\x10\xc6\x27\x2b\x04"
+	"\x5b\x3b\x71\xf9\xdc\x6b\x80\xd6\x3f\xdd\x4a\x8e\x9a\xdb\x1e\x69"
+	"\x62\xa6\x95\x26\xd4\x31\x61\xc1\xa4\x1d\x57\x0d\x79\x38\xda\xd4"
+	"\xa4\x0e\x32\x9c\xcf\xf4\x6a\xaa\x36\xad\x00\x4c\xf6\x00\xc8\x38"
+	"\x1e\x42\x5a\x31\xd9\x51\xae\x64\xfd\xb2\x3f\xce\xc9\x50\x9d\x43"
+	"\x68\x7f\xeb\x69\xed\xd1\xcc\x5e\x0b\x8c\xc3\xbd\xf6\x4b\x10\xef"
+	"\x86\xb6\x31\x42\xa3\xab\x88\x29\x55\x5b\x2f\x74\x7c\x93\x26\x65"
+	"\xcb\x2c\x0f\x1c\xc0\x1b\xd7\x02\x29\x38\x88\x39\xd2\xaf\x05\xe4"
+	"\x54\x50\x4a\xc7\x8b\x75\x82\x82\x28\x46\xc0\xba\x35\xc3\x5f\x5c"
+	"\x59\x16\x0c\xc0\x46\xfd\x82\x51\x54\x1f\xc6\x8c\x9c\x86\xb0\x22"
+	"\xbb\x70\x99\x87\x6a\x46\x0e\x74\x51\xa8\xa9\x31\x09\x70\x3f\xee"
+	"\x1c\x21\x7e\x6c\x38\x26\xe5\x2c\x51\xaa\x69\x1e\x0e\x42\x3c\xfc"
+	"\x99\xe9\xe3\x16\x50\xc1\x21\x7b\x62\x48\x16\xcd\xad\x9a\x95\xf9"
+	"\xd5\xb8\x01\x94\x88\xd9\xc0\xa0\xa1\xfe\x30\x75\xa5\x77\xe2\x31"
+	"\x83\xf8\x1d\x4a\x3f\x2f\xa4\x57\x1e\xfc\x8c\xe0\xba\x8a\x4f\xe8"
+	"\xb6\x85\x5d\xfe\x72\xb0\xa6\x6e\xde\xd2\xfb\xab\xfb\xe5\x8a\x30"
+	"\xfa\xfa\xbe\x1c\x5d\x71\xa8\x7e\x2f\x74\x1e\xf8\xc1\xfe\x86\xfe"
+	"\xa6\xbb\xfd\xe5\x30\x67\x7f\x0d\x97\xd1\x1d\x49\xf7\xa8\x44\x3d"
+	"\x08\x22\xe5\x06\xa9\xf4\x61\x4e\x01\x1e\x2a\x94\x83\x8f\xf8\x8c"
+	"\xd6\x8c\x8b\xb7\xc5\xc6\x42\x4c\xff\xff\xff\xff\xff\xff\xff\xff",
+};
+
+static int dh_ffdhe2048_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe2048_prime);
+}
+
+static int dh_ffdhe3072_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe3072_prime);
+}
+
+static int dh_ffdhe4096_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe4096_prime);
+}
+
+static int dh_ffdhe6144_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe6144_prime);
+}
+
+static int dh_ffdhe8192_create(struct crypto_template *tmpl,
+			       struct rtattr **tb)
+{
+	return  __dh_safe_prime_create(tmpl, tb, &ffdhe8192_prime);
+}
+
+static struct crypto_template crypto_ffdhe_templates[] = {
+	{
+		.name = "ffdhe2048",
+		.create = dh_ffdhe2048_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe3072",
+		.create = dh_ffdhe3072_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe4096",
+		.create = dh_ffdhe4096_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe6144",
+		.create = dh_ffdhe6144_create,
+		.module = THIS_MODULE,
+	},
+	{
+		.name = "ffdhe8192",
+		.create = dh_ffdhe8192_create,
+		.module = THIS_MODULE,
+	},
+};
+
+#else /* ! CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+static struct crypto_template crypto_ffdhe_templates[] = {};
+
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
+
+
 static int dh_init(void)
 {
-	return crypto_register_kpp(&dh);
+	int err;
+
+	err = crypto_register_kpp(&dh);
+	if (err)
+		return err;
+
+	err = crypto_register_templates(crypto_ffdhe_templates,
+					ARRAY_SIZE(crypto_ffdhe_templates));
+	if (err) {
+		crypto_unregister_kpp(&dh);
+		return err;
+	}
+
+	return 0;
 }
 
 static void dh_exit(void)
 {
+	crypto_unregister_templates(crypto_ffdhe_templates,
+				    ARRAY_SIZE(crypto_ffdhe_templates));
 	crypto_unregister_kpp(&dh);
 }
 
-- 
GitLab


From 60a273e9aecd8ee8a7d84f78f366795a67607829 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:54 +0100
Subject: [PATCH 0930/1586] crypto: testmgr - add known answer tests for
 ffdheXYZ(dh) templates

Add known answer tests for the ffdhe2048(dh), ffdhe3072(dh), ffdhe4096(dh),
ffdhe6144(dh) and ffdhe8192(dh) templates introduced with the previous
patch to the testmgr. All TVs have been generated with OpenSSL.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c |  37 ++
 crypto/testmgr.h | 871 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 908 insertions(+)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 0c2efde2f6c65..394d1952b2c59 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4973,6 +4973,43 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(essiv_aes_cbc_tv_template)
 		}
 	}, {
+#if IS_ENABLED(CONFIG_CRYPTO_DH_RFC7919_GROUPS)
+		.alg = "ffdhe2048(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe2048_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe3072(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe3072_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe4096(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe4096_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe6144(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe6144_dh_tv_template)
+		}
+	}, {
+		.alg = "ffdhe8192(dh)",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ffdhe8192_dh_tv_template)
+		}
+	}, {
+#endif /* CONFIG_CRYPTO_DH_RFC7919_GROUPS */
 		.alg = "gcm(aes)",
 		.generic_driver = "gcm_base(ctr(aes-generic),ghash-generic)",
 		.test = alg_test_aead,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 31da6fd3cfe7f..4b880c63686a8 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1458,6 +1458,877 @@ static const struct kpp_testvec dh_tv_template[] = {
 	}
 };
 
+static const struct kpp_testvec ffdhe2048_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x01" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x10" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+	"\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+	"\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+	"\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+	"\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+	"\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+	"\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+	"\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+	"\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+	"\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+	"\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+	"\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+	"\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+	"\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+	"\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+	"\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+	.b_public =
+	"\x5c\x00\x6f\xda\xfe\x4c\x0c\xc2\x18\xff\xa9\xec\x7a\xbe\x8a\x51"
+	"\x64\x6b\x57\xf8\xed\xe2\x36\x77\xc1\x23\xbf\x56\xa6\x48\x76\x34"
+	"\x0e\xf3\x68\x05\x45\x6a\x98\x5b\x9e\x8b\xc0\x11\x29\xcb\x5b\x66"
+	"\x2d\xc2\xeb\x4c\xf1\x7d\x85\x30\xaa\xd5\xf5\xb8\xd3\x62\x1e\x97"
+	"\x1e\x34\x18\xf8\x76\x8c\x10\xca\x1f\xe4\x5d\x62\xe1\xbe\x61\xef"
+	"\xaf\x2c\x8d\x97\x15\xa5\x86\xd5\xd3\x12\x6f\xec\xe2\xa4\xb2\x5a"
+	"\x35\x1d\xd4\x91\xa6\xef\x13\x09\x65\x9c\x45\xc0\x12\xad\x7f\xee"
+	"\x93\x5d\xfa\x89\x26\x7d\xae\xee\xea\x8c\xa3\xcf\x04\x2d\xa0\xc7"
+	"\xd9\x14\x62\xaf\xdf\xa0\x33\xd7\x5e\x83\xa2\xe6\x0e\x0e\x5d\x77"
+	"\xce\xe6\x72\xe4\xec\x9d\xff\x72\x9f\x38\x95\x19\x96\xba\x4c\xe3"
+	"\x5f\xb8\x46\x4a\x1d\xe9\x62\x7b\xa8\xdc\xe7\x61\x90\x6b\xb9\xd4"
+	"\xad\x0b\xa3\x06\xb3\x70\xfa\xea\x2b\xc4\x2c\xde\x43\x37\xf6\x8d"
+	"\x72\xf0\x86\x9a\xbb\x3b\x8e\x7a\x71\x03\x30\x30\x2a\x5d\xcd\x1e"
+	"\xe4\xd3\x08\x07\x75\x17\x17\x72\x1e\x77\x6c\x98\x0d\x29\x7f\xac"
+	"\xe7\xb2\xee\xa9\x1c\x33\x9d\x08\x39\xe1\xd8\x5b\xe5\xbc\x48\xb2"
+	"\xb6\xdf\xcd\xa0\x42\x06\xcc\xfb\xed\x60\x6f\xbc\x57\xac\x09\x45",
+	.expected_a_public =
+	"\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+	"\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+	"\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+	"\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+	"\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+	"\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+	"\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+	"\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+	"\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+	"\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+	"\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+	"\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+	"\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+	"\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+	"\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+	"\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+	.expected_ss =
+	"\xf3\x0e\x64\x7b\x66\xd7\x82\x7e\xab\x7e\x4a\xbe\x13\x6f\x43\x3d"
+	"\xea\x4f\x1f\x8b\x9d\x41\x56\x71\xe1\x06\x96\x02\x68\xfa\x44\x6e"
+	"\xe7\xf2\x26\xd4\x01\x4a\xf0\x28\x25\x76\xad\xd7\xe0\x17\x74\xfe"
+	"\xf9\xe1\x6d\xd3\xf7\xc7\xdf\xc0\x62\xa5\xf3\x4e\x1b\x5c\x77\x2a"
+	"\xfb\x0b\x87\xc3\xde\x1e\xc1\xe0\xd3\x7a\xb8\x02\x02\xec\x9c\x97"
+	"\xfb\x34\xa0\x20\x10\x23\x87\xb2\x9a\x72\xe3\x3d\xb2\x18\x50\xf3"
+	"\x6a\xd3\xd3\x19\xc4\x36\xd5\x59\xd6\xd6\xa7\x5c\xc3\xf9\x09\x33"
+	"\xa1\xf5\xb9\x4b\xf3\x0b\xe1\x4f\x79\x6b\x45\xf2\xec\x8b\xe5\x69"
+	"\x9f\xc6\x05\x01\xfe\x3a\x13\xfd\x6d\xea\x03\x83\x29\x7c\x7f\xf5"
+	"\x41\x55\x95\xde\x7e\x62\xae\xaf\x28\xdb\x7c\xa9\x90\x1e\xb2\xb1"
+	"\x1b\xef\xf1\x2e\xde\x47\xaa\xa8\x92\x9a\x49\x3d\xc0\xe0\x8d\xbb"
+	"\x0c\x42\x86\xaf\x00\xce\xb0\xab\x22\x7c\xe9\xbe\xb9\x72\x2f\xcf"
+	"\x5e\x5d\x62\x52\x2a\xd1\xfe\xcc\xa2\xf3\x40\xfd\x01\xa7\x54\x0a"
+	"\xa1\xfb\x1c\xf2\x44\xa6\x47\x30\x5a\xba\x2a\x05\xff\xd0\x6c\xab"
+	"\xeb\xe6\x8f\xf6\xd7\x73\xa3\x0e\x6c\x0e\xcf\xfd\x8e\x16\x5d\xe0"
+	"\x2c\x11\x05\x82\x3c\x22\x16\x6c\x52\x61\xcf\xbb\xff\xf8\x06\xd0",
+	.secret_size = 272,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	},
+};
+
+static const struct kpp_testvec ffdhe3072_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x90\x01" /* len */
+	"\x80\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x90" /* len */
+	"\x00\x00\x01\x80" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+	"\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+	"\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+	"\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+	"\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+	"\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+	"\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+	"\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+	"\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+	"\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+	"\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+	"\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+	"\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+	"\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+	"\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+	"\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+	"\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+	"\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+	"\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+	"\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+	"\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+	"\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+	"\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+	"\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+	.b_public =
+	"\x73\x40\x8b\xce\xe8\x6a\x1c\x03\x50\x54\x42\x36\x22\xc6\x1d\xe8"
+	"\xe1\xef\x5c\x89\xa5\x55\xc1\xc4\x1c\xd7\x4f\xee\x5d\xba\x62\x60"
+	"\xfe\x93\x2f\xfd\x93\x2c\x8f\x70\xc6\x47\x17\x25\xb2\x95\xd7\x7d"
+	"\x41\x81\x4d\x52\x1c\xbe\x4d\x57\x3e\x26\x51\x28\x03\x8f\x67\xf5"
+	"\x22\x16\x1c\x67\xf7\x62\xcb\xfd\xa3\xee\x8d\xe0\xfa\x15\x9a\x53"
+	"\xbe\x7b\x9f\xc0\x12\x7a\xfc\x5e\x77\x2d\x60\x06\xba\x71\xc5\xca"
+	"\xd7\x26\xaf\x3b\xba\x6f\xd3\xc4\x82\x57\x19\x26\xb0\x16\x7b\xbd"
+	"\x83\xf2\x21\x03\x79\xff\x0a\x6f\xc5\x7b\x00\x15\xad\x5b\xf4\x42"
+	"\x1f\xcb\x7f\x3d\x34\x77\x3c\xc3\xe0\x38\xa5\x40\x51\xbe\x6f\xd9"
+	"\xc9\x77\x9c\xfc\x0d\xc1\x8e\xef\x0f\xaa\x5e\xa8\xbb\x16\x4a\x3e"
+	"\x26\x55\xae\xc1\xb6\x3e\xfd\x73\xf7\x59\xd2\xe5\x4b\x91\x8e\x28"
+	"\x77\x1e\x5a\xe2\xcd\xce\x92\x35\xbb\x1e\xbb\xcf\x79\x94\xdf\x31"
+	"\xde\x31\xa8\x75\xf6\xe0\xaa\x2e\xe9\x4f\x44\xc8\xba\xb9\xab\x80"
+	"\x29\xa1\xea\x58\x2e\x40\x96\xa0\x1a\xf5\x2c\x38\x47\x43\x5d\x26"
+	"\x2c\xd8\xad\xea\xd3\xad\xe8\x51\x49\xad\x45\x2b\x25\x7c\xde\xe4"
+	"\xaf\x03\x2a\x39\x26\x86\x66\x10\xbc\xa8\x71\xda\xe0\xe8\xf1\xdd"
+	"\x50\xff\x44\xb2\xd3\xc7\xff\x66\x63\xf6\x42\xe3\x97\x9d\x9e\xf4"
+	"\xa6\x89\xb9\xab\x12\x17\xf2\x85\x56\x9c\x6b\x24\x71\x83\x57\x7d"
+	"\x3c\x7b\x2b\x88\x92\x19\xd7\x1a\x00\xd5\x38\x94\x43\x60\x4d\xa7"
+	"\x12\x9e\x0d\xf6\x5c\x9a\xd3\xe2\x9e\xb1\x21\xe8\xe2\x9e\xe9\x1e"
+	"\x9d\xa5\x94\x95\xa6\x3d\x12\x15\xd8\x8b\xac\xe0\x8c\xde\xe6\x40"
+	"\x98\xaa\x5e\x55\x4f\x3d\x86\x87\x0d\xe3\xc6\x68\x15\xe6\xde\x17"
+	"\x78\x21\xc8\x6c\x06\xc7\x94\x56\xb4\xaf\xa2\x35\x0b\x0c\x97\xd7"
+	"\xa4\x12\xee\xf4\xd2\xef\x80\x28\xb3\xee\xe9\x15\x8b\x01\x32\x79",
+	.expected_a_public =
+	"\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+	"\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+	"\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+	"\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+	"\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+	"\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+	"\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+	"\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+	"\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+	"\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+	"\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+	"\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+	"\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+	"\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+	"\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+	"\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+	"\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+	"\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+	"\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+	"\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+	"\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+	"\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+	"\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+	"\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+	.expected_ss =
+	"\x47\x8e\xb2\x19\x09\xf0\x46\x99\x6b\x41\x86\xf7\x34\xad\xbf\x2a"
+	"\x18\x1b\x7d\xec\xa9\xb2\x47\x2f\x40\xfb\x9a\x64\x30\x44\xf3\x4c"
+	"\x01\x67\xad\x57\x5a\xbc\xd4\xc8\xef\x7e\x8a\x14\x74\x1d\x6d\x8c"
+	"\x7b\xce\xc5\x57\x5f\x95\xe8\x72\xba\xdf\xa3\xcd\x00\xbe\x09\x4c"
+	"\x06\x72\xe7\x17\xb0\xe5\xe5\xb7\x20\xa5\xcb\xd9\x68\x99\xad\x3f"
+	"\xde\xf3\xde\x1d\x1c\x00\x74\xd2\xd1\x57\x55\x5d\xce\x76\x0c\xc4"
+	"\x7a\xc4\x65\x7c\x19\x17\x0a\x09\x66\x7d\x3a\xab\xf7\x61\x3a\xe3"
+	"\x5b\xac\xcf\x69\xb0\x8b\xee\x5d\x28\x36\xbb\x3f\x74\xce\x6e\x38"
+	"\x1e\x39\xab\x26\xca\x89\xdc\x58\x59\xcb\x95\xe4\xbc\xd6\x19\x48"
+	"\xd0\x55\x68\x7b\xb4\x27\x95\x3c\xd9\x58\x10\x4f\x8f\x55\x1c\x3f"
+	"\x04\xce\x89\x1f\x82\x28\xe9\x48\x17\x47\x8f\xee\xb7\x8f\xeb\xb1"
+	"\x29\xa8\x23\x18\x73\x33\x9f\x83\x08\xca\xcd\x54\x6e\xca\xec\x78"
+	"\x7b\x16\x83\x3f\xdb\x0a\xef\xfd\x87\x94\x19\x08\x6e\x6e\x22\x57"
+	"\xd7\xd2\x79\xf9\xf6\xeb\xe0\x6c\x93\x9d\x95\xfa\x41\x7a\xa9\xd6"
+	"\x2a\xa3\x26\x9b\x24\x1b\x8b\xa0\xed\x04\xb2\xe4\x6c\x4e\xc4\x3f"
+	"\x61\xe5\xe0\x4d\x09\x28\xaf\x58\x35\x25\x0b\xd5\x38\x18\x69\x51"
+	"\x18\x51\x73\x7b\x28\x19\x9f\xe4\x69\xfc\x2c\x25\x08\x99\x8f\x62"
+	"\x65\x62\xa5\x28\xf1\xf4\xfb\x02\x29\x27\xb0\x5e\xbb\x4f\xf9\x1a"
+	"\xa7\xc4\x38\x63\x5b\x01\xfe\x00\x66\xe3\x47\x77\x21\x85\x17\xd5"
+	"\x34\x19\xd3\x87\xab\x44\x62\x08\x59\xb2\x6b\x1f\x21\x0c\x23\x84"
+	"\xf7\xba\x92\x67\xf9\x16\x85\x6a\xe0\xeb\xe7\x4f\x06\x80\x81\x81"
+	"\x28\x9c\xe8\x2e\x71\x97\x48\xe0\xd1\xbc\xce\xe9\x42\x2c\x89\xdf"
+	"\x0b\xa9\xa1\x07\x84\x33\x78\x7f\x49\x2f\x1c\x55\xc3\x7f\xc3\x37"
+	"\x40\xdf\x13\xf4\xa0\x21\x79\x6e\x3a\xe3\xb8\x23\x9e\x8a\x6e\x9c",
+	.secret_size = 400,
+	.b_public_size = 384,
+	.expected_a_public_size = 384,
+	.expected_ss_size = 384,
+	},
+};
+
+static const struct kpp_testvec ffdhe4096_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x02" /* len */
+	"\x00\x02\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x10" /* len */
+	"\x00\x00\x02\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+	"\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+	"\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+	"\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+	"\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+	"\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+	"\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+	"\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+	"\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+	"\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+	"\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+	"\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+	"\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+	"\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+	"\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+	"\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+	"\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+	"\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+	"\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+	"\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+	"\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+	"\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+	"\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+	"\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+	"\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+	"\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+	"\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+	"\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+	"\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+	"\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+	"\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+	"\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+	.b_public =
+	"\x24\x38\x02\x02\x2f\xeb\x54\xdd\x73\x21\x91\x4a\xd8\xa4\x0a\xbf"
+	"\xf4\xf5\x9a\x45\xb5\xcd\x42\xa3\x57\xcc\x65\x4a\x23\x2e\xee\x59"
+	"\xba\x6f\x14\x89\xae\x2e\x14\x0a\x72\x77\x23\x7f\x6c\x2e\xba\x52"
+	"\x3f\x71\xbf\xe4\x60\x03\x16\xaa\x61\xf5\x80\x1d\x8a\x45\x9e\x53"
+	"\x7b\x07\xd9\x7e\xfe\xaf\xcb\xda\xff\x20\x71\xba\x89\x39\x75\xc3"
+	"\xb3\x65\x0c\xb1\xa7\xfa\x4a\xe7\xe0\x85\xc5\x4e\x91\x47\x41\xf4"
+	"\xdd\xcd\xc5\x3d\x17\x12\xed\xee\xc0\x31\xb1\xaf\xc1\xd5\x3c\x07"
+	"\xa1\x5a\xc4\x05\x45\xe3\x10\x0c\xc3\x14\xae\x65\xca\x40\xae\x31"
+	"\x5c\x13\x0d\x32\x85\xa7\x6e\xf4\x5e\x29\x3d\x4e\xd3\xd7\x49\x58"
+	"\xe1\x73\xbb\x0a\x7b\xd6\x13\xea\x49\xd7\x20\x3d\x31\xaa\x77\xab"
+	"\x21\x74\xe9\x2f\xe9\x5e\xbe\x2f\xb4\xa2\x79\xf2\xbc\xcc\x51\x94"
+	"\xd2\x1d\xb2\xe6\xc5\x39\x66\xd7\xe5\x46\x75\x53\x76\xed\x49\xea"
+	"\x3b\xdd\x01\x27\xdb\x83\xa5\x9f\xd2\xee\xc8\xde\x9e\xde\xd2\xe7"
+	"\x99\xad\x9c\xe0\x71\x66\x29\xd8\x0d\xfe\xdc\xd1\xbc\xc7\x9a\xbe"
+	"\x8b\x26\x46\x57\xb6\x79\xfa\xad\x8b\x45\x2e\xb5\xe5\x89\x34\x01"
+	"\x93\x00\x9d\xe9\x58\x74\x8b\xda\x07\x92\xb5\x01\x4a\xe1\x44\x36"
+	"\xc7\x6c\xde\xc8\x7a\x17\xd0\xde\xee\x68\x92\xb5\xde\x21\x2b\x1c"
+	"\xbc\x65\x30\x1e\xae\x15\x3d\x9a\xaf\x20\xa3\xc4\x21\x70\xfb\x2f"
+	"\x36\x72\x31\xc0\xe8\x85\xdf\xc5\x50\x4c\x90\x10\x32\xa4\xc7\xee"
+	"\x59\x5a\x21\xf4\xf1\x33\xcf\xbe\xac\x67\xb1\x40\x7c\x0b\x3f\x64"
+	"\xe5\xd2\x2d\xb7\x7d\x0f\xce\xf7\x9b\x05\xee\x37\x61\xd2\x61\x9e"
+	"\x1a\x80\x2e\x79\xe6\x1b\x25\xb3\x61\x3d\x53\xe7\xe5\x97\x9a\xc2"
+	"\x39\xb1\xe3\x91\xc6\xee\x96\x2e\xa9\xb4\xb8\xad\xd8\x04\x3e\x11"
+	"\x31\x67\xb8\x6a\xcb\x6e\x1a\x4c\x7f\x74\xc7\x1f\x09\xd1\xd0\x6b"
+	"\x17\xde\xea\xe8\x0b\xe6\x6a\xee\x2f\xe3\x5b\x9c\x59\x5d\x00\x57"
+	"\xbf\x24\x25\xba\x22\x34\xb9\xc5\x3c\xc4\x57\x26\xd0\x6d\x89\xee"
+	"\x67\x79\x3c\x70\xf9\xc3\xb4\x30\xf0\x2e\xca\xfa\x74\x00\xd1\x00"
+	"\x6d\x03\x97\xd5\x08\x3f\x0b\x8e\xb8\x1d\xa3\x91\x7f\xa9\x3a\xf0"
+	"\x37\x57\x46\x87\x82\xa3\xb5\x8f\x51\xaa\xc7\x7b\xfe\x86\x26\xb9"
+	"\xfa\xe6\x1e\xee\x92\x9d\x3a\xed\x5b\x5e\x3f\xe5\xca\x5e\x13\x01"
+	"\xdd\x4c\x8d\x85\xf0\x60\x61\xb7\x60\x24\x83\x9f\xbe\x72\x21\x81"
+	"\x55\x7e\x7e\x6d\xf3\x28\xc8\x77\x5a\xae\x5a\x32\x86\xd5\x61\xad",
+	.expected_a_public =
+	"\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+	"\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+	"\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+	"\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+	"\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+	"\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+	"\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+	"\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+	"\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+	"\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+	"\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+	"\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+	"\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+	"\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+	"\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+	"\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+	"\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+	"\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+	"\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+	"\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+	"\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+	"\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+	"\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+	"\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+	"\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+	"\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+	"\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+	"\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+	"\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+	"\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+	"\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+	"\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+	.expected_ss =
+	"\xe2\xce\x0e\x4b\x64\xf3\x84\x62\x38\xfd\xe3\x6f\x69\x40\x22\xb0"
+	"\x73\x27\x03\x12\x82\xa4\x6e\x03\x57\xec\x3d\xa0\xc1\x4f\x4b\x09"
+	"\xa1\xd4\xe0\x1a\x5d\x91\x2e\x08\xad\x57\xfa\xcc\x55\x90\x5f\xa0"
+	"\x52\x27\x62\x8d\xe5\x2d\xa1\x5f\xf0\x30\x43\x77\x4e\x3f\x02\x58"
+	"\xcb\xa0\x51\xae\x1d\x24\xf9\x0a\xd1\x36\x0b\x95\x0f\x07\xd9\xf7"
+	"\xe2\x36\x14\x2f\xf0\x11\xc2\xc9\xaf\x66\x4e\x0d\xb4\x60\x01\x4e"
+	"\xa8\x49\xc6\xec\x5f\xb2\xbc\x05\x48\x91\x4e\xe1\xc3\x99\x9f\xeb"
+	"\x4a\xc1\xde\x05\x9a\x65\x39\x7d\x2f\x89\x85\xb2\xcf\xec\x25\x27"
+	"\x5f\x1c\x11\x63\xcf\x7b\x86\x98\x39\xae\xc2\x16\x8f\x79\xd1\x20"
+	"\xd0\xb4\xa0\xba\x44\xd8\xf5\x3a\x0a\x08\x4c\xd1\xb9\xdd\x0a\x5b"
+	"\x9e\x62\xf3\x52\x0c\x84\x12\x43\x9b\xd7\xdf\x86\x71\x03\xdd\x04"
+	"\x98\x55\x0c\x7b\xe2\xe8\x03\x17\x25\x84\xd9\xbd\xe1\xce\x64\xbe"
+	"\xca\x55\xd4\x5b\xef\x61\x5b\x68\x4b\x80\x37\x40\xae\x28\x87\x81"
+	"\x55\x34\x96\x50\x21\x47\x49\xc0\xda\x26\x46\xb8\xe8\xcc\x5a\x27"
+	"\x9c\x9d\x0a\x3d\xcc\x4c\x63\x27\x81\x82\x2e\xf4\xa8\x91\x37\x3e"
+	"\xa7\x34\x6a\x0f\x60\x44\xdd\x2e\xdc\xf9\x19\xf2\x2e\x81\x05\x51"
+	"\x16\xbc\xc0\x85\xa5\xd5\x08\x09\x1f\xcd\xed\xa4\xc5\xdb\x16\x43"
+	"\xb5\x7a\x71\x66\x19\x2e\xef\x13\xbc\x40\x39\x0a\x00\x45\x7e\x61"
+	"\xe9\x68\x60\x83\x00\x70\xd1\x71\xd3\xa2\x61\x3e\x00\x46\x93\x0d"
+	"\xbf\xe6\xa2\x07\xe6\x40\x1a\xf4\x57\xc6\x67\x39\xd8\xd7\x6b\xc5"
+	"\xa5\xd8\x38\x78\x12\xb4\x97\x12\xbe\x97\x13\xef\xe4\x74\x0c\xe0"
+	"\x75\x89\x64\xf4\xe8\x85\xda\x84\x7b\x1d\xfe\xdd\x21\xba\xda\x01"
+	"\x52\xdc\x59\xe5\x47\x50\x7e\x15\x20\xd0\x43\x37\x6e\x48\x39\x00"
+	"\xee\xd9\x54\x6d\x00\x65\xc9\x4b\x85\xa2\x8a\x40\x55\xd0\x63\x0c"
+	"\xb5\x7a\x0d\x37\x67\x27\x73\x18\x7f\x5a\xf5\x0e\x22\xb9\xb0\x3f"
+	"\xda\xf1\xec\x7c\x24\x01\x49\xa9\x09\x0e\x0f\xc4\xa9\xef\xc8\x2b"
+	"\x13\xd1\x0a\x6f\xf8\x92\x4b\x1d\xdd\x6c\x9c\x35\xde\x75\x46\x32"
+	"\xe6\xfb\xda\x58\xba\x81\x08\xca\xa9\xb6\x69\x71\x96\x2a\x1f\x2e"
+	"\x25\xe0\x37\xfe\xee\x4d\x27\xaa\x04\xda\x95\xbb\x93\xcf\x8f\xa2"
+	"\x1d\x67\x35\xe3\x51\x8f\x87\x3b\xa9\x62\x05\xee\x44\xb7\x2e\xd0"
+	"\x07\x63\x32\xf5\xcd\x64\x18\x20\xcf\x22\x42\x28\x22\x1a\xa8\xbb"
+	"\x74\x8a\x6f\x2a\xea\x8a\x48\x0a\xad\xd7\xed\xba\xa3\x89\x37\x01",
+	.secret_size = 528,
+	.b_public_size = 512,
+	.expected_a_public_size = 512,
+	.expected_ss_size = 512,
+	},
+};
+
+static const struct kpp_testvec ffdhe6144_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x03" /* len */
+	"\x00\x03\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x03\x10" /* len */
+	"\x00\x00\x03\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+	"\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+	"\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+	"\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+	"\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+	"\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+	"\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+	"\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+	"\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+	"\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+	"\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+	"\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+	"\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+	"\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+	"\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+	"\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+	"\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+	"\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+	"\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+	"\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+	"\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+	"\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+	"\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+	"\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+	"\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+	"\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+	"\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+	"\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+	"\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+	"\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+	"\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+	"\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+	"\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+	"\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+	"\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+	"\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+	"\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+	"\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+	"\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+	"\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+	"\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+	"\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+	"\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+	"\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+	"\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+	"\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+	"\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+	"\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+	.b_public =
+	"\x30\x31\xbe\x43\xd0\x14\x22\x6b\x4b\x8c\x9a\xca\xc6\xdd\xe5\x99"
+	"\xce\xb8\x30\x23\xb6\xa8\x8c\x4d\xfa\xef\xad\xa6\x6a\x21\x50\xa6"
+	"\x45\x2d\x19\x2a\x29\x81\xc5\xac\xb4\xa8\x5f\x6d\x5b\xc8\x5f\x12"
+	"\x35\x21\xfb\x37\xaa\x0c\x79\xeb\xd4\x83\x01\xda\xa3\xf3\x51\x6e"
+	"\x17\xf9\xef\x3f\xbd\x2f\xd2\x43\x82\x12\x48\xeb\x61\x4c\x8e\xf2"
+	"\x6c\x76\xf9\x6d\x42\x2a\xcb\x10\x13\x3b\xf6\x9b\xcd\x46\x1e\xa2"
+	"\xa7\x2c\x08\x56\xd2\x42\xf5\x03\xf0\x3e\xef\xa2\xa2\xf2\x4c\xf2"
+	"\xdb\x4f\xeb\x40\x15\x53\x27\xf7\xd4\x8e\x58\x23\xf5\x2c\x88\x04"
+	"\x1e\xb1\xb6\xe3\xd6\x9c\x49\x08\xa1\x4b\xb8\x33\xe4\x75\x85\xa1"
+	"\x86\x97\xce\x1d\xe9\x9f\xe2\xd8\xf2\x7e\xad\xdc\x8a\x4d\xbd\x06"
+	"\x52\x00\x9a\x2c\x69\xdd\x02\x0c\x69\x5a\xf9\x1d\xfd\xdc\xfb\x82"
+	"\xb2\xe5\xf3\x24\xba\xd1\x09\x76\x90\xb5\x7a\x92\xa6\x6b\x97\xc0"
+	"\xce\x13\x9b\x4b\xbc\x30\x91\xb2\x13\x8b\x57\x6c\x8b\x66\x6e\x58"
+	"\x3e\x91\x50\xc7\x6c\xe1\x18\xec\xbf\x69\xcd\xcb\xa0\xbc\x0d\x05"
+	"\xc4\xf8\x45\x92\xe0\x05\xd3\x08\xb3\x30\x19\xc8\x80\xf8\x17\x9f"
+	"\x1e\x6a\x49\x8e\x43\xef\x7a\x49\xa5\x93\xd9\xed\xd1\x07\x03\xe4"
+	"\xa3\x55\xeb\x1e\x2f\x69\xd7\x40\x8f\x6e\x1c\xb6\x94\xfb\xba\x4e"
+	"\x46\xd0\x38\x71\x00\x88\x93\x6a\x55\xfc\x16\x95\x1f\xb1\xf6\x2f"
+	"\x26\x45\x50\x54\x30\x62\x62\xe8\x80\xe5\x24\x0b\xe4\x15\x6b\x32"
+	"\x16\xc2\x30\x9b\x56\xb4\xc9\x5e\x50\xb4\x27\x82\x86\x01\xda\x68"
+	"\x44\x4b\x15\x81\x31\x13\x52\xd8\x08\xbc\xae\xf3\xa5\x94\x1c\x81"
+	"\xe8\x42\xd6\x42\xd6\xff\x99\x58\x0f\x61\x3e\x82\x9e\x2d\x13\x03"
+	"\x54\x02\x74\xf4\x6b\x43\x43\xce\x54\x44\x36\x3f\x55\xfa\xb2\x56"
+	"\xdc\xac\xb5\x65\x89\xbe\x36\xd2\x58\x65\x79\x4c\xf3\xe2\x01\xf1"
+	"\x69\x96\x29\x20\x5d\xee\xf5\x8a\x8b\x9f\x72\xf7\x27\x02\xde\x3b"
+	"\xc7\x52\x19\xdc\x8e\x22\x36\x09\x14\x59\x07\xbb\x1e\x49\x69\x4f"
+	"\x00\x7b\x9a\x5d\x23\xe9\xbe\x0d\x52\x90\xa3\x0d\xde\xe7\x80\x57"
+	"\x53\x69\x39\xe6\xf8\x33\xeb\x92\x0d\x9e\x04\x8b\x16\x16\x16\x1c"
+	"\xa9\xe6\xe3\x0e\x0a\xc6\xf6\x61\xd1\x44\x2b\x3e\x5e\x02\xfe\xaa"
+	"\xe3\xf3\x8f\xf9\xc8\x20\x37\xad\xbc\x95\xb8\xc5\xe7\x95\xda\xfb"
+	"\x80\x5b\xf6\x40\x28\xae\xc1\x4c\x09\xde\xff\x1e\xbf\x51\xd2\xfe"
+	"\x08\xdc\xb0\x48\x21\xf5\x4c\x43\xdc\x7b\x69\x83\xc8\x69\x5c\xc4"
+	"\xa9\x98\x76\x4b\xc4\x4a\xac\x1d\xa5\x52\xe3\x35\x43\xdd\x30\xd4"
+	"\xa0\x51\x9c\xc2\x62\x4c\x7e\xa5\xfb\xd3\x2c\x8a\x09\x7f\x53\xa3"
+	"\xcd\xca\x58\x1b\x4c\xaf\xba\x21\x8b\x88\x1d\xc0\xe9\x0a\x17\x30"
+	"\x33\xd6\xa2\xa5\x49\x50\x61\x3b\xff\x37\x71\x66\xef\x61\xbc\xb2"
+	"\x53\x82\xe5\x70\xef\x32\xff\x9d\x97\xe0\x82\xe0\xbb\x49\xc2\x29"
+	"\x58\x89\xdd\xe9\x62\x52\xfb\xba\x22\xa6\xd9\x16\xfa\x55\xb3\x06"
+	"\xed\x6d\x70\x6e\xdc\x47\x7c\x67\x1a\xcc\x27\x98\xd4\xd7\xe6\xf0"
+	"\xf8\x9f\x51\x3e\xf0\xee\xad\xb6\x78\x69\x71\xb5\xcb\x09\xa3\xa6"
+	"\x3f\x29\x24\x46\xe0\x65\xbc\x9f\x6c\xe9\xf9\x49\x49\x96\x75\xe5"
+	"\xe1\xff\x82\x70\xf4\x7e\xff\x8f\xec\x47\x98\x6d\x5b\x88\x60\xee"
+	"\x43\xb1\xe2\x14\xc1\x49\x95\x74\x46\xd3\x3f\x73\xb2\xe9\x88\xe0"
+	"\xd3\xb1\xc4\x2c\xef\xee\xdd\x6c\xc5\xa1\x29\xef\x86\xd2\x36\x8a"
+	"\x2f\x7c\x9d\x28\x0a\x6d\xc9\x5a\xdb\xd4\x04\x06\x36\x96\x09\x03"
+	"\x71\x5d\x38\x67\xa2\x08\x2a\x04\xe7\xd6\x51\x5a\x19\x9d\xe7\xf1"
+	"\x5d\x6f\xe2\xff\x48\x37\xb7\x8b\xb1\x14\xb4\x96\xcd\xf0\xa7\xbd"
+	"\xef\x20\xff\x0a\x8d\x08\xb7\x15\x98\x5a\x13\xd2\xda\x2a\x27\x75",
+	.expected_a_public =
+	"\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+	"\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+	"\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+	"\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+	"\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+	"\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+	"\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+	"\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+	"\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+	"\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+	"\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+	"\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+	"\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+	"\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+	"\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+	"\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+	"\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+	"\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+	"\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+	"\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+	"\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+	"\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+	"\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+	"\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+	"\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+	"\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+	"\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+	"\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+	"\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+	"\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+	"\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+	"\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+	"\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+	"\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+	"\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+	"\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+	"\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+	"\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+	"\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+	"\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+	"\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+	"\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+	"\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+	"\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+	"\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+	"\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+	"\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+	"\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+	.expected_ss =
+	"\x9a\x9c\x1c\xb7\x73\x2f\xf2\x12\xed\x59\x01\xbb\x75\xf7\xf5\xe4"
+	"\xa0\xa8\xbc\x3f\x3f\xb6\xf7\x74\x6e\xc4\xba\x6d\x6c\x4d\x93\x31"
+	"\x2b\xa7\xa4\xb3\x47\x8f\x77\x04\xb5\xa5\xab\xca\x6b\x5a\xe2\x86"
+	"\x02\x60\xca\xb4\xd7\x5e\xe0\x0f\x73\xdd\xa2\x38\x7c\xae\x0f\x5a"
+	"\x1a\xd7\xfd\xb6\xc8\x6f\xdd\xe0\x98\xd5\x07\xea\x1f\x2a\xbb\x9e"
+	"\xef\x01\x24\x04\xee\xf5\x89\xb1\x12\x26\x54\x95\xef\xcb\x84\xe9"
+	"\xae\x05\xef\x63\x25\x15\x65\x79\x79\x79\x91\xc3\x76\x72\xb4\x85"
+	"\x86\xd9\xd3\x03\xb0\xff\x04\x96\x05\x3c\xde\xbf\x47\x34\x76\x70"
+	"\x17\xd2\x24\x83\xb9\xbb\xcf\x70\x7c\xb8\xc6\x7b\x4e\x01\x86\x36"
+	"\xc7\xc5\xe5\x8b\x7c\x69\x74\x9a\xfe\x1f\x58\x85\x0f\x00\xf8\x4e"
+	"\xf1\x56\xdc\xd1\x11\x28\x2c\xcf\x6c\xb9\xc9\x57\x17\x2e\x19\x19"
+	"\x55\xb3\x4c\xd8\xfb\xe7\x6f\x70\x63\xf9\x53\x45\xdd\xd5\x62\x95"
+	"\xd3\x7d\x7e\xa0\x00\x1a\x62\x9f\x96\x0a\x5d\x0a\x25\x02\xbb\xff"
+	"\x5a\xe8\x9e\x5a\x66\x08\x93\xbc\x92\xaf\xd2\x28\x04\x97\xc1\x54"
+	"\xfe\xcc\x0a\x25\xa2\xf4\x1d\x5a\x9a\xb1\x3e\x9c\xba\x78\xe2\xcf"
+	"\x71\x70\xe3\x40\xea\xba\x69\x9b\x03\xdd\x99\x26\x09\x84\x9d\x69"
+	"\x4d\x3d\x0b\xe9\x3f\x51\xcd\x05\xe5\x00\xaf\x2c\xd3\xf6\xc0\x68"
+	"\xb5\x23\x53\x33\x14\xbd\x39\x1c\xbd\x1b\xe6\x72\x90\xcc\xc2\x86"
+	"\x1a\x42\x83\x55\xb3\xed\x0b\x62\x6d\x0e\xbb\x9e\x2a\x42\x32\x05"
+	"\x3f\xf2\x2c\xc8\x9f\x3c\xd2\xb1\x0b\xb6\x4c\xa0\x22\x36\xee\xb9"
+	"\x55\x23\x3e\x80\xc7\x28\x7c\x39\x11\xd3\x4a\x96\x2e\xef\x52\x34"
+	"\xf2\xda\xb1\xc6\xf5\x02\x10\xbf\x56\x6b\x50\x56\xcd\x2c\xfe\xe1"
+	"\x94\x14\x19\x24\x6e\x9a\xdf\x0c\xb8\xe2\xb8\xd5\xa3\xc1\x22\x8e"
+	"\x84\x92\x00\x16\xf1\x3f\x83\xf6\x36\x31\xa5\x38\xc6\xcf\xf8\x9b"
+	"\x03\xc7\x6f\xb9\xa1\x04\xdf\x20\x0f\x0b\x0f\x70\xff\x57\x36\x7f"
+	"\xb3\x6b\xcb\x8f\x48\xf7\xb2\xdb\x85\x05\xd1\xfe\x34\x05\xf6\x57"
+	"\xb4\x5b\xcc\x3f\x0e\xba\x36\x59\xb0\xfd\x4d\xf6\xf4\x5e\xd2\x65"
+	"\x1d\x98\x87\xb4\x5e\xff\x29\xaa\x84\x9b\x44\x0f\x06\x36\x61\xbd"
+	"\xdb\x51\xda\x56\xc2\xd6\x19\xe2\x57\x4f\xd0\x29\x71\xc8\xe4\xd6"
+	"\xfb\x8c\xd0\xfc\x4f\x25\x09\xa6\xfc\x67\xe2\xb8\xac\xd3\x88\x8f"
+	"\x1f\xf6\xa1\xe3\x45\xa6\x34\xe3\xb1\x6b\xb7\x37\x0e\x06\xc7\x63"
+	"\xde\xac\x3b\xac\x07\x91\x64\xcc\x12\x10\x46\x85\x14\x0b\x6b\x03"
+	"\xba\x4a\x85\xae\xc5\x8c\xa5\x9d\x36\x38\x33\xca\x42\x9c\x4b\x0c"
+	"\x46\xe1\x77\xe9\x1f\x80\xfe\xb7\x1d\x5a\xf4\xc6\x11\x26\x78\xea"
+	"\x81\x25\x77\x47\xed\x8b\x59\xc2\x6b\x49\xff\x83\x56\xec\xa5\xf0"
+	"\xe0\x8b\x15\xd4\x99\x40\x2a\x65\x2a\x98\xf4\x71\x35\x63\x84\x08"
+	"\x4d\xcd\x71\x85\x55\xbc\xa4\x1c\x90\x93\x03\x41\xde\xed\x78\x62"
+	"\x07\x30\x50\xac\x60\x21\x06\xc3\xab\xa4\x04\xc0\xc2\x32\x07\xc4"
+	"\x1f\x2f\xec\xe2\x32\xbf\xbe\x5e\x50\x5b\x2a\x19\x71\x44\x37\x76"
+	"\x8b\xbc\xdb\x73\x98\x65\x78\xc9\x33\x97\x7e\xdc\x60\xa8\x87\xf2"
+	"\xb5\x96\x55\x7f\x44\x07\xcb\x3b\xf3\xd7\x82\xfd\x77\x21\x82\x21"
+	"\x1a\x8b\xa2\xf5\x1f\x66\xd0\x57\x00\x4f\xa9\xa5\x33\xb8\x69\x91"
+	"\xe8\x2e\xf7\x73\x47\x89\x30\x9b\xb1\xfd\xe1\x5d\x11\xfd\x84\xd9"
+	"\xa2\x91\x1f\x8a\xa7\x7a\x77\x8e\x3b\x10\x1d\x0a\x59\x50\x34\xb0"
+	"\xc3\x90\x9f\x56\xb7\x43\xeb\x51\x99\x2b\x8e\x6d\x7b\x58\xe7\xc0"
+	"\x7f\x3d\xa0\x27\x50\xf2\x6e\xc8\x1e\x7f\x84\xb3\xe1\xf7\x09\x85"
+	"\xd2\x9b\x56\x6b\xba\xa5\x19\x2e\xec\xd8\x5c\xf5\x4e\x43\x36\x2e"
+	"\x89\x85\x41\x7f\x9c\x91\x2e\x62\xc3\x41\xcf\x0e\xa1\x7f\xeb\x50",
+	.secret_size = 784,
+	.b_public_size = 768,
+	.expected_a_public_size = 768,
+	.expected_ss_size = 768,
+	},
+};
+
+static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x04" /* len */
+	"\x00\x04\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x04\x10" /* len */
+	"\x00\x00\x04\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+	"\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+	"\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+	"\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+	"\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+	"\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+	"\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+	"\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+	"\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+	"\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+	"\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+	"\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+	"\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+	"\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+	"\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+	"\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+	"\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+	"\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+	"\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+	"\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+	"\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+	"\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+	"\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+	"\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+	"\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+	"\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+	"\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+	"\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+	"\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+	"\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+	"\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+	"\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+	"\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+	"\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+	"\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+	"\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+	"\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+	"\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+	"\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+	"\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+	"\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+	"\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+	"\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+	"\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+	"\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+	"\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+	"\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+	"\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+	"\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+	"\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+	"\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+	"\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+	"\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+	"\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+	"\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+	"\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+	"\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+	"\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+	"\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+	"\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+	"\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+	"\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+	.b_public =
+	"\x26\xa8\x3a\x97\xe0\x52\x76\x07\x26\xa7\xbb\x21\xfd\xe5\x69\xde"
+	"\xe6\xe0\xb5\xa0\xf1\xaa\x51\x2b\x56\x1c\x3c\x6c\xe5\x9f\x8f\x75"
+	"\x71\x04\x86\xf6\x43\x2f\x20\x7f\x45\x4f\x5c\xb9\xf3\x90\xbe\xa9"
+	"\xa0\xd7\xe8\x03\x0e\xfe\x99\x9b\x8a\x1c\xbe\xa7\x63\xe8\x2b\x45"
+	"\xd4\x2c\x65\x25\x4c\x33\xda\xc5\x85\x77\x5d\x62\xea\x93\xe4\x45"
+	"\x59\xff\xa1\xd2\xf1\x73\x11\xed\x02\x64\x8a\x1a\xfb\xe1\x88\xa6"
+	"\x50\x6f\xff\x87\x12\xbb\xfc\x10\xcf\x19\x41\xb0\x35\x44\x7d\x51"
+	"\xe9\xc0\x77\xf2\x73\x21\x2e\x62\xbf\x65\xa5\xd1\x3b\xb1\x3e\x19"
+	"\x75\x4b\xb7\x8e\x03\xc3\xdf\xc8\xb2\xe6\xec\x2d\x7d\xa5\x6a\xba"
+	"\x93\x47\x50\xeb\x6e\xdb\x88\x05\x45\xad\x03\x8c\xf7\x9a\xe1\xc9"
+	"\x1e\x16\x96\x37\xa5\x3e\xe9\xb9\xa8\xdc\xb9\xa9\xf6\xa1\x3d\xed"
+	"\xbe\x12\x29\x8a\x3d\x3d\x90\xfc\x94\xfe\x66\x28\x1c\x1b\xa4\x89"
+	"\x47\x66\x4f\xac\x14\x00\x22\x2d\x5c\x03\xea\x71\x4d\x19\x7d\xd6"
+	"\x58\x39\x4c\x3d\x06\x2b\x30\xa6\xdc\x2c\x8d\xd1\xde\x79\x77\xfa"
+	"\x9c\x6b\x72\x11\x8a\x7f\x7d\x37\x28\x2a\x88\xbf\x0a\xdb\xac\x3b"
+	"\xc5\xa5\xd5\x7e\x25\xec\xa6\x7f\x5b\x53\x75\x83\x49\xd4\x77\xcc"
+	"\x7d\x7e\xd3\x3d\x30\x2c\x98\x3f\x18\x9a\x11\x8a\x37\xda\x99\x0f"
+	"\x3b\x06\xe1\x87\xd5\xe9\x4e\xe0\x9c\x0e\x39\x34\xe2\xdd\xf6\x58"
+	"\x60\x63\xa6\xea\xe8\xc0\xb4\xde\xdf\xa0\xbc\x21\xc3\x2d\xf4\xa4"
+	"\xc8\x6f\x62\x6c\x0f\x71\x88\xf9\xda\x2d\x30\xd5\x95\xe1\xfc\x6d"
+	"\x88\xc5\xc3\x95\x51\x83\xde\x41\x46\x6f\x7e\x1b\x10\x48\xad\x2b"
+	"\x82\x88\xa2\x6f\x57\x4d\x4a\xbd\x90\xc8\x06\x8f\x52\x5d\x6e\xee"
+	"\x09\xe6\xa3\xcb\x30\x9c\x14\xf6\xac\x66\x9b\x81\x0a\x75\x42\x6b"
+	"\xab\x27\xec\x76\xfb\x8d\xc5\xbf\x0e\x93\x81\x7b\x81\xd4\x85\xa6"
+	"\x90\x5a\xa6\xa2\x8b\xa9\xb7\x34\xe6\x15\x36\x93\x8b\xe2\x99\xc7"
+	"\xad\x66\x7e\xd6\x89\xa9\xc8\x15\xcb\xc5\xeb\x06\x85\xd4\x2f\x6e"
+	"\x9b\x95\x7a\x06\x6c\xfa\x31\x1d\xc4\xe5\x7d\xfb\x10\x35\x88\xc2"
+	"\xbe\x1c\x16\x5d\xc2\xf4\x0d\xf3\xc9\x94\xb2\x7e\xa7\xbd\x9c\x03"
+	"\x32\xaf\x8b\x1a\xc8\xcc\x82\xd8\x87\x96\x6e\x3d\xcc\x93\xd2\x43"
+	"\x73\xf9\xde\xec\x49\x49\xf4\x56\x2a\xc8\x6e\x32\x70\x48\xf8\x70"
+	"\xa3\x96\x31\xf4\xf2\x08\xc5\x12\xd2\xeb\xb6\xea\xa3\x07\x05\x61"
+	"\x74\xa3\x04\x2f\x17\x82\x40\x5e\x4c\xd1\x51\xb8\x10\x5b\xc8\x9f"
+	"\x87\x73\x80\x0d\x6f\xc6\xb9\xf6\x7c\x31\x0a\xcc\xd9\x03\x0f\x7a"
+	"\x47\x69\xb1\x55\xab\xe9\xb5\x75\x62\x9e\x95\xbe\x7b\xa9\x53\x6e"
+	"\x28\x73\xdc\xb3\xa4\x8a\x1c\x91\xf5\x8a\xf9\x32\x2b\xbd\xa5\xdc"
+	"\x07\xb5\xaf\x49\xdb\x9c\x35\xc9\x69\xde\xac\xb1\xd0\x86\xcb\x31"
+	"\x0b\xc4\x4f\x63\x4e\x70\xa7\x80\xe3\xbc\x0b\x73\x0e\xf2\x8c\x87"
+	"\x88\x7b\xa9\x6d\xde\x8a\x73\x14\xb9\x80\x55\x03\x2b\x29\x64\x6a"
+	"\xda\x48\x0e\x78\x07\x40\x48\x46\x58\xa9\x4e\x68\x1d\xd1\xc1\xc8"
+	"\x3b\x35\x53\x61\xd5\xe3\x0d\x4c\x42\x74\x10\x67\x85\x9f\x66\x2a"
+	"\xf7\x2b\x7b\x77\x8b\x6e\xda\x2c\xc1\x5a\x20\x34\x3f\xf5\x8b\x6f"
+	"\xe4\x61\xf5\x58\xab\x72\x1a\xf1\x8d\x28\xcc\xa5\x30\x68\xb5\x50"
+	"\x7b\x81\x43\x89\x8e\xa9\xac\x63\x3a\x4a\x78\x7b\xd2\x45\xe6\xe0"
+	"\xdc\x5d\xf2\x1a\x2b\x54\x50\xa5\x9d\xf6\xe7\x9f\x25\xaf\x56\x6a"
+	"\x84\x2a\x75\xa3\x9a\xc7\xfa\x94\xec\x83\xab\xa5\xaa\xe1\xf9\x89"
+	"\x29\xa9\xf6\x53\x24\x24\xae\x4a\xe8\xbc\xe8\x9e\x5c\xd7\x54\x7c"
+	"\x65\x20\x97\x28\x94\x76\xf9\x9e\x81\xcf\x98\x6a\x3a\x7b\xec\xf3"
+	"\x09\x60\x2e\x43\x18\xb5\xf6\x8c\x44\x0f\xf2\x0a\x17\x5b\xac\x98"
+	"\x30\xab\x6e\xd5\xb3\xef\x25\x68\x50\xb6\xe1\xc0\xe4\x5a\x63\x43"
+	"\xea\xca\xda\x23\xc1\xc2\xe9\x30\xec\xb3\x9f\xbf\x1f\x09\x76\xaf"
+	"\x65\xbc\xb5\xab\x30\xac\x0b\x05\xef\x5c\xa3\x65\x77\x33\x1c\xc5"
+	"\xdf\xc9\x39\xab\xca\xf4\x3b\x88\x25\x6d\x50\x87\xb1\x79\xc2\x23"
+	"\x9d\xb5\x21\x01\xaa\xa3\xb7\x61\xa3\x48\x91\x72\x3d\x54\x85\x86"
+	"\x91\x81\x35\x78\xbf\x8f\x27\x57\xcb\x9b\x34\xab\x63\x40\xf1\xbc"
+	"\x23\x5a\x26\x6a\xba\x57\xe2\x8f\x2a\xdc\x82\xe0\x3b\x7f\xec\xd3"
+	"\xd8\x9d\xd3\x13\x54\x70\x64\xc3\xfd\xbf\xa3\x46\xa7\x53\x42\x7f"
+	"\xc1\xbd\x7b\xb3\x13\x47\x2a\x45\x1e\x76\x2c\x0d\x6d\x46\x26\x24"
+	"\xa8\xc7\x00\x2b\x10\x7f\x2a\x6c\xfc\x68\x4e\x6e\x85\x53\x00\xaf"
+	"\xd5\xfb\x59\x64\xc7\x9b\x24\xd1\x05\xdc\x34\x53\x6d\x27\xa9\x79"
+	"\xff\xd7\x5e\x7a\x40\x81\x8e\xc3\xf2\x38\xc9\x8d\x87\xb5\x38\xda"
+	"\x43\x64\x1b\x59\x62\x88\xc1\x6e\x85\x84\x33\xcd\x6d\x7b\x62\x1d"
+	"\x60\xf9\x98\xf7\xd1\xb1\xd4\xbe\x56\x6e\xa8\x6f\xff\xe7\x8b\x60"
+	"\x53\x80\xc7\x7c\xe0\x78\x89\xa9\xab\x42\x8f\x8e\x4d\x92\xac\xa7"
+	"\xfd\x47\x11\xc7\xdb\x7c\x77\xfb\xa4\x1d\x70\xaf\x56\x14\x52\xb0",
+	.expected_a_public =
+	"\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+	"\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+	"\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+	"\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+	"\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+	"\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+	"\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+	"\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+	"\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+	"\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+	"\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+	"\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+	"\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+	"\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+	"\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+	"\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+	"\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+	"\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+	"\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+	"\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+	"\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+	"\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+	"\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+	"\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+	"\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+	"\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+	"\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+	"\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+	"\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+	"\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+	"\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+	"\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+	"\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+	"\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+	"\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+	"\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+	"\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+	"\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+	"\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+	"\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+	"\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+	"\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+	"\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+	"\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+	"\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+	"\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+	"\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+	"\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+	"\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+	"\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+	"\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+	"\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+	"\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+	"\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+	"\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+	"\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+	"\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+	"\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+	"\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+	"\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+	"\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+	"\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+	"\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+	"\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+	.expected_ss =
+	"\xf9\x55\x4f\x48\x38\x74\xb7\x46\xa3\xc4\x2e\x88\xf0\x34\xab\x1d"
+	"\xcd\xa5\x58\xa7\x95\x88\x36\x62\x6f\x8a\xbd\xf2\xfb\x6f\x3e\xb9"
+	"\x91\x65\x58\xef\x70\x2f\xd5\xc2\x97\x70\xcb\xce\x8b\x78\x1c\xe0"
+	"\xb9\xfa\x77\x34\xd2\x4a\x19\x58\x11\xfd\x93\x84\x40\xc0\x8c\x19"
+	"\x8b\x98\x50\x83\xba\xfb\xe2\xad\x8b\x81\x84\x63\x90\x41\x4b\xf8"
+	"\xe8\x78\x86\x04\x09\x8d\x84\xd1\x43\xfd\xa3\x58\x21\x2a\x3b\xb1"
+	"\xa2\x5b\x48\x74\x3c\xa9\x16\x34\x28\xf0\x8e\xde\xe2\xcf\x8e\x68"
+	"\x53\xab\x65\x06\xb7\x86\xb1\x08\x4f\x73\x97\x00\x10\x95\xd1\x84"
+	"\x72\xcf\x14\xdb\xff\xa7\x80\xd8\xe5\xf2\x2c\x89\x37\xb0\x81\x2c"
+	"\xf5\xd6\x7d\x1b\xb0\xe2\x8e\x87\x32\x3d\x37\x6a\x79\xaa\xe7\x08"
+	"\xc9\x67\x55\x5f\x1c\xae\xa6\xf5\xef\x79\x3a\xaf\x3f\x82\x14\xe2"
+	"\xf3\x69\x91\xed\xb7\x9e\xc9\xde\xd0\x29\x70\xd9\xeb\x0f\xf5\xc7"
+	"\xf6\x7c\xa7\x7f\xec\xed\xe1\xbd\x13\xe1\x43\xe4\x42\x30\xe3\x5f"
+	"\xe0\xf3\x15\x55\x2f\x7a\x42\x17\x67\xcb\xc2\x4f\xd0\x85\xfc\x6c"
+	"\xec\xe8\xfc\x25\x78\x4b\xe4\x0f\xd4\x3d\x78\x28\xd3\x53\x79\xcb"
+	"\x2c\x82\x67\x9a\xdc\x32\x55\xd2\xda\xae\xd8\x61\xce\xd6\x59\x0b"
+	"\xc5\x44\xeb\x08\x81\x8c\x65\xb2\xb7\xa6\xff\xf7\xbf\x99\xc6\x8a"
+	"\xbe\xde\xc2\x17\x56\x05\x6e\xd2\xf1\x1e\xa2\x04\xeb\x02\x74\xaa"
+	"\x04\xfc\xf0\x6b\xd4\xfc\xf0\x7a\x5f\xfe\xe2\x74\x7f\xeb\x9b\x6a"
+	"\x8a\x09\x96\x5d\xe1\x91\xb6\x9e\x37\xd7\x63\xd7\xb3\x5c\xb5\xa3"
+	"\x5f\x62\x00\xdf\xc5\xbf\x85\xba\xa7\xa9\xb6\x1f\x76\x78\x65\x01"
+	"\xfe\x1d\x6c\xfe\x15\x9e\xf4\xb1\xbc\x8d\xad\x3c\xec\x69\x27\x57"
+	"\xa4\x89\x77\x46\xe1\x49\xc7\x22\xde\x79\xe0\xf7\x3a\xa1\x59\x8b"
+	"\x59\x71\xcc\xd6\x18\x24\xc1\x8a\x2f\xe3\xdf\xdd\x6c\xf7\x62\xaa"
+	"\x15\xaa\x39\x37\x3b\xaf\x7d\x6e\x88\xeb\x19\xa8\xa0\x26\xd3\xaa"
+	"\x2d\xcc\x5f\x56\x99\x86\xa9\xed\x4d\x02\x31\x40\x97\x70\x83\xa7"
+	"\x08\x98\x7e\x49\x46\xd9\x75\xb5\x7a\x6a\x40\x69\xa0\x6d\xb2\x18"
+	"\xc0\xad\x88\x05\x02\x95\x6f\xf7\x8f\xcb\xa2\xe4\x7b\xab\x4a\x0f"
+	"\x9a\x1b\xef\xcc\xd1\x6a\x5d\x1e\x6a\x2a\x8b\x5b\x80\xbc\x5f\x38"
+	"\xdd\xaf\xad\x44\x15\xb4\xaf\x26\x1c\x1a\x4d\xa7\x4b\xec\x88\x33"
+	"\x24\x42\xb5\x0c\x9c\x56\xd4\xba\xa7\xb9\x65\xd5\x76\xb2\xbc\x16"
+	"\x8e\xfa\x0c\x7a\xc0\xa2\x2c\x5a\x39\x56\x7d\xe6\xf8\xa9\xf4\x49"
+	"\xd0\x50\xf2\x5e\x4b\x0a\x43\xe4\x9a\xbb\xea\x35\x28\x99\x84\x83"
+	"\xec\xc1\xa0\x68\x15\x9a\x2b\x01\x04\x48\x09\x11\x1b\xb6\xa4\xd8"
+	"\x03\xad\xb6\x4c\x9e\x1d\x90\xae\x88\x0f\x75\x95\x25\xa0\x27\x13"
+	"\xb7\x4f\xe2\x3e\xd5\x59\x1a\x7c\xde\x95\x14\x28\xd1\xde\x84\xe4"
+	"\x07\x7c\x5b\x06\xd6\xe6\x9c\x8a\xbe\xd2\xb4\x62\xd1\x67\x8a\x9c"
+	"\xac\x4f\xfa\x70\xd6\xc8\xc0\xeb\x5e\xf6\x3e\xdc\x48\x8e\xce\x3f"
+	"\x92\x3e\x60\x77\x63\x60\x6b\x76\x04\xa5\xba\xc9\xab\x92\x4e\x0d"
+	"\xdc\xca\x82\x44\x5f\x3a\x42\xeb\x01\xe7\xe0\x33\xb3\x32\xaf\x4b"
+	"\x81\x35\x2d\xb6\x57\x15\xfe\x52\xc7\x54\x2e\x41\x3b\x22\x6b\x12"
+	"\x72\xdb\x5c\x66\xd0\xb6\xb4\xfe\x90\xc0\x20\x34\x95\xf9\xe4\xc7"
+	"\x7e\x71\x89\x4f\x6f\xfb\x2a\xf3\xdf\x3f\xe3\xcf\x0e\x1a\xd9\xf2"
+	"\xc1\x02\x67\x5d\xdc\xf1\x7d\xe8\xcf\x64\x77\x4d\x12\x03\x77\x2c"
+	"\xfb\xe1\x59\xf7\x2c\x96\x9c\xaf\x46\x9c\xc7\x67\xcf\xee\x94\x50"
+	"\xc7\xa1\x23\xe6\x9f\x4d\x73\x92\xad\xf9\x4a\xce\xdb\x44\xd5\xe3"
+	"\x17\x05\x37\xdb\x9c\x6c\xc5\x7e\xb7\xd4\x11\x4a\x8c\x51\x03\xaa"
+	"\x73\x4b\x16\xd9\x79\xf5\xf1\x67\x20\x9b\x25\xe5\x41\x52\x59\x06"
+	"\x8b\xf2\x23\x2f\x6e\xea\xf3\x24\x0a\x94\xbb\xb8\x7e\xd9\x23\x4a"
+	"\x9f\x1f\xe1\x13\xb5\xfe\x85\x2f\x4c\xbe\x6a\x66\x02\x1d\x90\xd2"
+	"\x01\x25\x8a\xfd\x78\x3a\x28\xb8\x18\xc1\x38\x16\x21\x6b\xb4\xf9"
+	"\x64\x0f\xf1\x73\xc4\x5c\xd1\x41\xf2\xfe\xe7\x26\xad\x79\x12\x75"
+	"\x49\x48\xdb\x21\x71\x35\xf7\xb7\x46\x5a\xa1\x81\x25\x47\x31\xea"
+	"\x1d\x76\xbb\x32\x5a\x90\xb0\x42\x1a\x47\xe8\x0c\x82\x92\x43\x1c"
+	"\x0b\xdd\xe5\x25\xce\xd3\x06\xcc\x59\x5a\xc9\xa0\x01\xac\x29\x12"
+	"\x31\x2e\x3d\x1a\xed\x3b\xf3\xa7\xef\x52\xc2\x0d\x18\x1f\x03\x28"
+	"\xc9\x2b\x38\x61\xa4\x01\xc9\x3c\x11\x08\x14\xd4\xe5\x31\xe9\x3c"
+	"\x1d\xad\xf8\x76\xc4\x84\x9f\xea\x16\x61\x3d\x6d\xa3\x32\x31\xcd"
+	"\x1c\xca\xb8\x74\xc2\x45\xf3\x01\x9c\x7a\xaf\xfd\xe7\x1e\x5a\x18"
+	"\xb1\x9d\xbb\x7a\x2d\x34\x40\x17\x49\xad\x1f\xeb\x2d\xa2\x26\xb8"
+	"\x16\x28\x4b\x72\xdd\xd0\x8d\x85\x4c\xdd\xf8\x57\x48\xd5\x1d\xfb"
+	"\xbd\xec\x11\x5d\x1e\x9c\x26\x81\xbf\xf1\x16\x12\x32\xc3\xf3\x07"
+	"\x0e\x6e\x7f\x17\xec\xfb\xf4\x5d\xe2\xb1\xca\x97\xca\x46\x20\x2d"
+	"\x09\x85\x19\x25\x89\xa8\x9b\x51\x74\xae\xc9\x1b\x4c\xb6\x80\x62",
+	.secret_size = 1040,
+	.b_public_size = 1024,
+	.expected_a_public_size = 1024,
+	.expected_ss_size = 1024,
+	},
+};
+
 static const struct kpp_testvec curve25519_tv_template[] = {
 {
 	.secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
-- 
GitLab


From 1e207964566738b49b003e80063fd712af75b82c Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:55 +0100
Subject: [PATCH 0931/1586] crypto: dh - implement private key generation
 primitive for ffdheXYZ(dh)

The support for NVME in-band authentication currently in the works ([1])
needs to generate ephemeral DH keys for use with the RFC 7919 safe-prime
FFDHE groups.

In analogy to ECDH and its ecc_gen_privkey(), implement a
dh_safe_prime_gen_privkey() and invoke it from the ffdheXYZ(dh) templates'
common ->set_secret(), i.e. dh_safe_prime_set_secret(), in case the input
->key_size is zero.

As the RFC 7919 FFDHE groups are classified as approved safe-prime groups
by SP800-56Arev3, it's worthwhile to make the new
dh_safe_prime_gen_privkey() to follow the approach specified in
SP800-56Arev3, sec. 5.6.1.1.3 ("Key-Pair Generation Using Extra Random
Bits") in order to achieve conformance.

SP800-56Arev3 specifies a lower as well as an upper bound on the generated
key's length:
- it must be >= two times the maximum supported security strength of
  the group in question and
- it must be <= the length of the domain parameter Q.

For any safe-prime group Q = (P - 1)/2 by definition and the individual
maximum supported security strengths as specified by SP800-56Arev3 have
been made available as part of the FFDHE dh_safe_prime definitions
introduced with a previous patch. Make dh_safe_prime_gen_privkey() pick
twice the maximum supported strength rounded up to the next power of two
for the output key size. This choice respects both, the lower and upper
bounds given by SP800-90Arev3 for any of the approved safe-prime groups and
is also in line with the NVME base spec 2.0, which requires the key size to
be >= 256bits.

[1] https://lore.kernel.org/r/20211202152358.60116-1-hare@suse.de

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig |   1 +
 crypto/dh.c    | 140 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index ba9434ad06efd..d6d7e84bb7f84 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -234,6 +234,7 @@ config CRYPTO_DH
 config CRYPTO_DH_RFC7919_GROUPS
 	bool "Support for RFC 7919 FFDHE group parameters"
 	depends on CRYPTO_DH
+	select CRYPTO_RNG_DEFAULT
 	help
 	  Provide support for RFC 7919 FFDHE group parameters. If unsure, say N.
 
diff --git a/crypto/dh.c b/crypto/dh.c
index d0adb1705fe7d..869a0476e5e2b 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -10,6 +10,7 @@
 #include <crypto/internal/kpp.h>
 #include <crypto/kpp.h>
 #include <crypto/dh.h>
+#include <crypto/rng.h>
 #include <linux/mpi.h>
 
 struct dh_ctx {
@@ -315,6 +316,128 @@ static void dh_safe_prime_exit_tfm(struct crypto_kpp *tfm)
 	crypto_free_kpp(tfm_ctx->dh_tfm);
 }
 
+static u64 __add_u64_to_be(__be64 *dst, unsigned int n, u64 val)
+{
+	unsigned int i;
+
+	for (i = n; val && i > 0; --i) {
+		u64 tmp = be64_to_cpu(dst[i - 1]);
+
+		tmp += val;
+		val = tmp >= val ? 0 : 1;
+		dst[i - 1] = cpu_to_be64(tmp);
+	}
+
+	return val;
+}
+
+static void *dh_safe_prime_gen_privkey(const struct dh_safe_prime *safe_prime,
+				       unsigned int *key_size)
+{
+	unsigned int n, oversampling_size;
+	__be64 *key;
+	int err;
+	u64 h, o;
+
+	/*
+	 * Generate a private key following NIST SP800-56Ar3,
+	 * sec. 5.6.1.1.1 and 5.6.1.1.3 resp..
+	 *
+	 * 5.6.1.1.1: choose key length N such that
+	 * 2 * ->max_strength <= N <= log2(q) + 1 = ->p_size * 8 - 1
+	 * with q = (p - 1) / 2 for the safe-prime groups.
+	 * Choose the lower bound's next power of two for N in order to
+	 * avoid excessively large private keys while still
+	 * maintaining some extra reserve beyond the bare minimum in
+	 * most cases. Note that for each entry in safe_prime_groups[],
+	 * the following holds for such N:
+	 * - N >= 256, in particular it is a multiple of 2^6 = 64
+	 *   bits and
+	 * - N < log2(q) + 1, i.e. N respects the upper bound.
+	 */
+	n = roundup_pow_of_two(2 * safe_prime->max_strength);
+	WARN_ON_ONCE(n & ((1u << 6) - 1));
+	n >>= 6; /* Convert N into units of u64. */
+
+	/*
+	 * Reserve one extra u64 to hold the extra random bits
+	 * required as per 5.6.1.1.3.
+	 */
+	oversampling_size = (n + 1) * sizeof(__be64);
+	key = kmalloc(oversampling_size, GFP_KERNEL);
+	if (!key)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * 5.6.1.1.3, step 3 (and implicitly step 4): obtain N + 64
+	 * random bits and interpret them as a big endian integer.
+	 */
+	err = -EFAULT;
+	if (crypto_get_default_rng())
+		goto out_err;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)key,
+				   oversampling_size);
+	crypto_put_default_rng();
+	if (err)
+		goto out_err;
+
+	/*
+	 * 5.6.1.1.3, step 5 is implicit: 2^N < q and thus,
+	 * M = min(2^N, q) = 2^N.
+	 *
+	 * For step 6, calculate
+	 * key = (key[] mod (M - 1)) + 1 = (key[] mod (2^N - 1)) + 1.
+	 *
+	 * In order to avoid expensive divisions, note that
+	 * 2^N mod (2^N - 1) = 1 and thus, for any integer h,
+	 * 2^N * h mod (2^N - 1) = h mod (2^N - 1) always holds.
+	 * The big endian integer key[] composed of n + 1 64bit words
+	 * may be written as key[] = h * 2^N + l, with h = key[0]
+	 * representing the 64 most significant bits and l
+	 * corresponding to the remaining 2^N bits. With the remark
+	 * from above,
+	 * h * 2^N + l mod (2^N - 1) = l + h mod (2^N - 1).
+	 * As both, l and h are less than 2^N, their sum after
+	 * this first reduction is guaranteed to be <= 2^(N + 1) - 2.
+	 * Or equivalently, that their sum can again be written as
+	 * h' * 2^N + l' with h' now either zero or one and if one,
+	 * then l' <= 2^N - 2. Thus, all bits at positions >= N will
+	 * be zero after a second reduction:
+	 * h' * 2^N + l' mod (2^N - 1) = l' + h' mod (2^N - 1).
+	 * At this point, it is still possible that
+	 * l' + h' = 2^N - 1, i.e. that l' + h' mod (2^N - 1)
+	 * is zero. This condition will be detected below by means of
+	 * the final increment overflowing in this case.
+	 */
+	h = be64_to_cpu(key[0]);
+	h = __add_u64_to_be(key + 1, n, h);
+	h = __add_u64_to_be(key + 1, n, h);
+	WARN_ON_ONCE(h);
+
+	/* Increment to obtain the final result. */
+	o = __add_u64_to_be(key + 1, n, 1);
+	/*
+	 * The overflow bit o from the increment is either zero or
+	 * one. If zero, key[1:n] holds the final result in big-endian
+	 * order. If one, key[1:n] is zero now, but needs to be set to
+	 * one, c.f. above.
+	 */
+	if (o)
+		key[n] = cpu_to_be64(1);
+
+	/* n is in units of u64, convert to bytes. */
+	*key_size = n << 3;
+	/* Strip the leading extra __be64, which is (virtually) zero by now. */
+	memmove(key, &key[1], *key_size);
+
+	return key;
+
+out_err:
+	kfree_sensitive(key);
+	return ERR_PTR(err);
+}
+
 static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
 				    unsigned int len)
 {
@@ -322,7 +445,7 @@ static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
 		dh_safe_prime_instance_ctx(tfm);
 	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
 	struct dh params;
-	void *buf;
+	void *buf = NULL, *key = NULL;
 	unsigned int buf_size;
 	int err;
 
@@ -338,10 +461,20 @@ static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
 	params.g = safe_prime_g;
 	params.g_size = sizeof(safe_prime_g);
 
+	if (!params.key_size) {
+		key = dh_safe_prime_gen_privkey(inst_ctx->safe_prime,
+						&params.key_size);
+		if (IS_ERR(key))
+			return PTR_ERR(key);
+		params.key = key;
+	}
+
 	buf_size = crypto_dh_key_len(&params);
 	buf = kmalloc(buf_size, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
+	if (!buf) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	err = crypto_dh_encode_key(buf, buf_size, &params);
 	if (err)
@@ -350,6 +483,7 @@ static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
 	err = crypto_kpp_set_secret(tfm_ctx->dh_tfm, buf, buf_size);
 out:
 	kfree_sensitive(buf);
+	kfree_sensitive(key);
 	return err;
 }
 
-- 
GitLab


From 209b7fc9c9249c400610cec1cbfba848f293f2e9 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:56 +0100
Subject: [PATCH 0932/1586] crypto: testmgr - add keygen tests for ffdheXYZ(dh)
 templates

Now that the ffdheXYZ(dh) templates support ephemeral key generation, add
->keygen = 1 TVs for each of them to the testmgr.c.

In order to facilitate string merging by the compiler, set party B's secret
and public keys to the ones specified for party A in the respective
existing known answer test. With GCC 7.5 on x86_64, this leads to an
increase of testmgr.o size by less than half a kB.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.h | 578 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 568 insertions(+), 10 deletions(-)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 4b880c63686a8..d1aa90993bbdd 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1547,6 +1547,76 @@ static const struct kpp_testvec ffdhe2048_dh_tv_template[] __maybe_unused = {
 	.expected_a_public_size = 256,
 	.expected_ss_size = 256,
 	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x01" /* len */
+	"\x00\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x10" /* len */
+	"\x00\x00\x01\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x23\x7d\xd0\x06\xfd\x7a\xe5\x7a\x08\xda\x98\x31\xc0\xb3\xd5\x85"
+	"\xe2\x0d\x2a\x91\x5f\x78\x4b\xa6\x62\xd0\xa6\x35\xd4\xef\x86\x39"
+	"\xf1\xdb\x71\x5e\xb0\x11\x2e\xee\x91\x3a\xaa\xf9\xe3\xdf\x8d\x8b"
+	"\x48\x41\xde\xe8\x78\x53\xc5\x5f\x93\xd2\x79\x0d\xbe\x8d\x83\xe8"
+	"\x8f\x00\xd2\xde\x13\x18\x04\x05\x20\x6d\xda\xfa\x1d\x0b\x24\x52"
+	"\x3a\x18\x2b\xe1\x1e\xae\x15\x3b\x0f\xaa\x09\x09\xf6\x01\x98\xe9"
+	"\x81\x5d\x6b\x83\x6e\x55\xf1\x5d\x6f\x6f\x0d\x9d\xa8\x72\x32\x63"
+	"\x60\xe6\x0b\xc5\x22\xe2\xf9\x46\x58\xa2\x1c\x2a\xb0\xd5\xaf\xe3"
+	"\x5b\x03\xb7\x36\xb7\xba\x55\x20\x08\x7c\x51\xd4\x89\x42\x9c\x14"
+	"\x23\xe2\x71\x3e\x15\x2a\x0d\x34\x8a\xde\xad\x84\x11\x15\x72\x18"
+	"\x42\x43\x0a\xe2\x58\x29\xb3\x90\x0f\x56\xd8\x8a\x0f\x0e\xbc\x0e"
+	"\x9c\xe7\xd5\xe6\x5b\xbf\x06\x64\x38\x12\xa5\x8d\x5b\x68\x34\xdd"
+	"\x75\x48\xc9\xa7\xa3\x58\x5a\x1c\xe1\xb2\xc5\xe3\x39\x03\xcf\xab"
+	"\xc2\x14\x07\xaf\x55\x80\xc7\x63\xe4\x03\xeb\xe9\x0a\x25\x61\x85"
+	"\x1d\x0e\x81\x52\x7b\xbc\x4a\x0c\xc8\x59\x6a\xac\x18\xfb\x8c\x0c"
+	"\xb4\x79\xbd\xa1\x4c\xbb\x02\xc9\xd5\x13\x88\x3d\x25\xaa\x77\x49",
+	.b_public =
+	"\x8b\xdb\xc1\xf7\xc6\xba\xa1\x38\x95\x6a\xa1\xb6\x04\x5e\xae\x52"
+	"\x72\xfc\xef\x2d\x9d\x71\x05\x9c\xd3\x02\xa9\xfb\x55\x0f\xfa\xc9"
+	"\xb4\x34\x51\xa3\x28\x89\x8d\x93\x92\xcb\xd9\xb5\xb9\x66\xfc\x67"
+	"\x15\x92\x6f\x73\x85\x15\xe2\xfc\x11\x6b\x97\x8c\x4b\x0f\x12\xfa"
+	"\x8d\x72\x76\x9b\x8f\x3b\xfe\x31\xbe\x42\x88\x4c\xd2\xb2\x70\xa6"
+	"\xa5\xe3\x7e\x73\x07\x12\x36\xaa\xc9\x5c\x83\xe1\xf1\x46\x41\x4f"
+	"\x7c\x52\xaf\xdc\xa4\xe6\x82\xa3\x86\x83\x47\x5a\x12\x3a\x0c\xe3"
+	"\xdd\xdb\x94\x03\x2a\x59\x91\xa0\x19\xe5\xf8\x07\xdd\x54\x6a\x22"
+	"\x43\xb7\xf3\x74\xd7\xb9\x30\xfe\x9c\xe8\xd1\xcf\x06\x43\x68\xb9"
+	"\x54\x8f\x54\xa2\xe5\x3c\xf2\xc3\x4c\xee\xd4\x7c\x5d\x0e\xb1\x7b"
+	"\x16\x68\xb5\xb3\x7d\xd4\x11\x83\x5c\x77\x17\xc4\xf0\x59\x76\x7a"
+	"\x83\x40\xe5\xd9\x4c\x76\x23\x5b\x17\x6d\xee\x4a\x92\x68\x4b\x89"
+	"\xa0\x6d\x23\x8c\x80\x31\x33\x3a\x12\xf4\x50\xa6\xcb\x13\x97\x01"
+	"\xb8\x2c\xe6\xd2\x38\xdf\xd0\x7f\xc6\x27\x19\x0e\xb2\x07\xfd\x1f"
+	"\x1b\x9c\x1b\x87\xf9\x73\x6a\x3f\x7f\xb0\xf9\x2f\x3c\x19\x9f\xc9"
+	"\x8f\x97\x21\x0e\x8e\xbb\x1a\x17\x20\x15\xdd\xc6\x42\x60\xae\x4d",
+	.secret_size = 16,
+	.b_secret_size = 272,
+	.b_public_size = 256,
+	.expected_a_public_size = 256,
+	.expected_ss_size = 256,
+	.genkey = true,
+	},
 };
 
 static const struct kpp_testvec ffdhe3072_dh_tv_template[] __maybe_unused = {
@@ -1670,6 +1740,92 @@ static const struct kpp_testvec ffdhe3072_dh_tv_template[] __maybe_unused = {
 	.expected_a_public_size = 384,
 	.expected_ss_size = 384,
 	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x90\x01" /* len */
+	"\x80\x01\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x01\x90" /* len */
+	"\x00\x00\x01\x80" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x6b\xb4\x97\x23\xfa\xc8\x5e\xa9\x7b\x63\xe7\x3e\x0e\x99\xc3\xb9"
+	"\xda\xb7\x48\x0d\xc3\xb1\xbf\x4f\x17\xc7\xa9\x51\xf6\x64\xff\xc4"
+	"\x31\x58\x87\x25\x83\x2c\x00\xf0\x41\x29\xf7\xee\xf9\xe6\x36\x76"
+	"\xd6\x3a\x24\xbe\xa7\x07\x0b\x93\xc7\x9f\x6c\x75\x0a\x26\x75\x76"
+	"\xe3\x0c\x42\xe0\x00\x04\x69\xd9\xec\x0b\x59\x54\x28\x8f\xd7\x9a"
+	"\x63\xf4\x5b\xdf\x85\x65\xc4\xe1\x95\x27\x4a\x42\xad\x36\x47\xa9"
+	"\x0a\xf8\x14\x1c\xf3\x94\x3b\x7e\x47\x99\x35\xa8\x18\xec\x70\x10"
+	"\xdf\xcb\xd2\x78\x88\xc1\x2d\x59\x93\xc1\xa4\x6d\xd7\x1d\xb9\xd5"
+	"\xf8\x30\x06\x7f\x98\x90\x0c\x74\x5e\x89\x2f\x64\x5a\xad\x5f\x53"
+	"\xb2\xa3\xa8\x83\xbf\xfc\x37\xef\xb8\x36\x0a\x5c\x62\x81\x64\x74"
+	"\x16\x2f\x45\x39\x2a\x91\x26\x87\xc0\x12\xcc\x75\x11\xa3\xa1\xc5"
+	"\xae\x20\xcf\xcb\x20\x25\x6b\x7a\x31\x93\x9d\x38\xb9\x57\x72\x46"
+	"\xd4\x84\x65\x87\xf1\xb5\xd3\xab\xfc\xc3\x4d\x40\x92\x94\x1e\xcd"
+	"\x1c\x87\xec\x3f\xcd\xbe\xd0\x95\x6b\x40\x02\xdd\x62\xeb\x0a\xda"
+	"\x4f\xbe\x8e\x32\x48\x8b\x6d\x83\xa0\x96\x62\x23\xec\x83\x91\x44"
+	"\xf9\x72\x01\xac\xa0\xe4\x72\x1d\x5a\x75\x05\x57\x90\xae\x7e\xb4"
+	"\x71\x39\x01\x05\xdc\xe9\xee\xcb\xf0\x61\x28\x91\x69\x8c\x31\x03"
+	"\x7a\x92\x15\xa1\x58\x67\x3d\x70\x82\xa6\x2c\xfe\x10\x56\x58\xd3"
+	"\x94\x67\xe1\xbe\xee\xc1\x64\x5c\x4b\xc8\x28\x3d\xc5\x66\x3a\xab"
+	"\x22\xc1\x7e\xa1\xbb\xf3\x19\x3b\xda\x46\x82\x45\xd4\x3c\x7c\xc6"
+	"\xce\x1f\x7f\x95\xa2\x17\xff\x88\xba\xd6\x4d\xdb\xd2\xea\xde\x39"
+	"\xd6\xa5\x18\x73\xbb\x64\x6e\x79\xe9\xdc\x3f\x92\x7f\xda\x1f\x49"
+	"\x33\x70\x65\x73\xa2\xd9\x06\xb8\x1b\x29\x29\x1a\xe0\xa3\xe6\x05"
+	"\x9a\xa8\xc2\x4e\x7a\x78\x1d\x22\x57\x21\xc8\xa3\x8d\x66\x3e\x23",
+	.b_public =
+	"\x1b\x6a\xba\xea\xa3\xcc\x50\x69\xa9\x41\x89\xaf\x04\xe1\x44\x22"
+	"\x97\x20\xd1\xf6\x1e\xcb\x64\x36\x6f\xee\x0b\x16\xc1\xd9\x91\xbe"
+	"\x57\xc8\xd9\xf2\xa1\x96\x91\xec\x41\xc7\x79\x00\x1a\x48\x25\x55"
+	"\xbe\xf3\x20\x8c\x38\xc6\x7b\xf2\x8b\x5a\xc3\xb5\x87\x0a\x86\x3d"
+	"\xb7\xd6\xce\xb0\x96\x2e\x5d\xc4\x00\x5e\x42\xe4\xe5\x50\x4f\xb8"
+	"\x6f\x18\xa4\xe1\xd3\x20\xfc\x3c\xf5\x0a\xff\x23\xa6\x5b\xb4\x17"
+	"\x3e\x7b\xdf\xb9\xb5\x3c\x1b\x76\x29\xcd\xb4\x46\x4f\x27\x8f\xd2"
+	"\xe8\x27\x66\xdb\xe8\xb3\xf5\xe1\xd0\x04\xcd\x89\xff\xba\x76\x67"
+	"\xe8\x4d\xcf\x86\x1c\x8a\xd1\xcf\x99\x27\xfb\xa9\x78\xcc\x94\xaf"
+	"\x3d\x04\xfd\x25\xc0\x47\xfa\x29\x80\x05\xf4\xde\xad\xdb\xab\x12"
+	"\xb0\x2b\x8e\xca\x02\x06\x6d\xad\x3e\x09\xb1\x22\xa3\xf5\x4c\x6d"
+	"\x69\x99\x58\x8b\xd8\x45\x2e\xe0\xc9\x3c\xf7\x92\xce\x21\x90\x6b"
+	"\x3b\x65\x9f\x64\x79\x8d\x67\x22\x1a\x37\xd3\xee\x51\xe2\xe7\x5a"
+	"\x93\x51\xaa\x3c\x4b\x04\x16\x32\xef\xe3\x66\xbe\x18\x94\x88\x64"
+	"\x79\xce\x06\x3f\xb8\xd6\xee\xdc\x13\x79\x6f\x20\x14\xc2\x6b\xce"
+	"\xc8\xda\x42\xa5\x93\x5b\xe4\x7f\x1a\xe6\xda\x0f\xb3\xc1\x5f\x30"
+	"\x50\x76\xe8\x37\x3d\xca\x77\x2c\xa8\xe4\x3b\xf9\x6f\xe0\x17\xed"
+	"\x0e\xef\xb7\x31\x14\xb5\xea\xd9\x39\x22\x89\xb6\x40\x57\xcc\x84"
+	"\xef\x73\xa7\xe9\x27\x21\x85\x89\xfa\xaf\x03\xda\x9c\x8b\xfd\x52"
+	"\x7d\xb0\xa4\xe4\xf9\xd8\x90\x55\xc4\x39\xd6\x9d\xaf\x3b\xce\xac"
+	"\xaa\x36\x14\x7a\x9b\x8b\x12\x43\xe1\xca\x61\xae\x46\x5b\xe7\xe5"
+	"\x88\x32\x80\xa0\x2d\x51\xbb\x2f\xea\xeb\x3c\x71\xb2\xae\xce\xca"
+	"\x61\xd2\x76\xe0\x45\x46\x78\x4e\x09\x2d\xc2\x54\xc2\xa9\xc7\xa8"
+	"\x55\x8e\x72\xa4\x8b\x8a\xc9\x01\xdb\xe9\x58\x11\xa1\xc4\xe7\x12",
+	.secret_size = 16,
+	.b_secret_size = 400,
+	.b_public_size = 384,
+	.expected_a_public_size = 384,
+	.expected_ss_size = 384,
+	.genkey = true,
+	},
 };
 
 static const struct kpp_testvec ffdhe4096_dh_tv_template[] __maybe_unused = {
@@ -1825,6 +1981,108 @@ static const struct kpp_testvec ffdhe4096_dh_tv_template[] __maybe_unused = {
 	.expected_a_public_size = 512,
 	.expected_ss_size = 512,
 	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x02" /* len */
+	"\x00\x02\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x02\x10" /* len */
+	"\x00\x00\x02\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x1a\x48\xf3\x6c\x61\x03\x42\x43\xd7\x42\x3b\xfa\xdb\x55\x6f\xa2"
+	"\xe1\x79\x52\x0b\x47\xc5\x03\x60\x2f\x26\xb9\x1a\x14\x15\x1a\xd9"
+	"\xe0\xbb\xa7\x82\x63\x41\xec\x26\x55\x00\xab\xe5\x21\x9d\x31\x14"
+	"\x0e\xe2\xc2\xb2\xb8\x37\xe6\xc3\x5a\xab\xae\x25\xdb\x71\x1e\xed"
+	"\xe8\x75\x9a\x04\xa7\x92\x2a\x99\x7e\xc0\x5b\x64\x75\x7f\xe5\xb5"
+	"\xdb\x6c\x95\x4f\xe9\xdc\x39\x76\x79\xb0\xf7\x00\x30\x8e\x86\xe7"
+	"\x36\xd1\xd2\x0c\x68\x7b\x94\xe9\x91\x85\x08\x86\xbc\x64\x87\xd2"
+	"\xf5\x5b\xaf\x03\xf6\x5f\x28\x25\xf1\xa3\x20\x5c\x1b\xb5\x26\x45"
+	"\x9a\x47\xab\xd6\xad\x49\xab\x92\x8e\x62\x6f\x48\x31\xea\xf6\x76"
+	"\xff\xa2\xb6\x28\x78\xef\x59\xc3\x71\x5d\xa8\xd9\x70\x89\xcc\xe2"
+	"\x63\x58\x5e\x3a\xa2\xa2\x88\xbf\x77\x20\x84\x33\x65\x64\x4e\x73"
+	"\xe5\x08\xd5\x89\x23\xd6\x07\xac\x29\x65\x2e\x02\xa8\x35\x96\x48"
+	"\xe7\x5d\x43\x6a\x42\xcc\xda\x98\xc4\x75\x90\x2e\xf6\xc4\xbf\xd4"
+	"\xbc\x31\x14\x0d\x54\x30\x11\xb2\xc9\xcf\xbb\xba\xbc\xc6\xf2\xcf"
+	"\xfe\x4a\x9d\xf3\xec\x78\x5d\x5d\xb4\x99\xd0\x67\x0f\x5a\x21\x1c"
+	"\x7b\x95\x2b\xcf\x49\x44\x94\x05\x1a\x21\x81\x25\x7f\xe3\x8a\x2a"
+	"\xdd\x88\xac\x44\x94\x23\x20\x3b\x75\xf6\x2a\x8a\x45\xf8\xb5\x1f"
+	"\xb9\x8b\xeb\xab\x9b\x38\x23\x26\xf1\x0f\x34\x47\x4f\x7f\xe1\x9e"
+	"\x84\x84\x78\xe5\xe3\x49\xeb\xcc\x2f\x02\x85\xa4\x18\x91\xde\x1a"
+	"\x60\x54\x33\x81\xd5\xae\xdb\x23\x9c\x4d\xa4\xdb\x22\x5b\xdf\xf4"
+	"\x8e\x05\x2b\x60\xba\xe8\x75\xfc\x34\x99\xcf\x35\xe1\x06\xba\xdc"
+	"\x79\x2a\x5e\xec\x1c\xbe\x79\x33\x63\x1c\xe7\x5f\x1e\x30\xd6\x1b"
+	"\xdb\x11\xb8\xea\x63\xff\xfe\x1a\x3c\x24\xf4\x78\x9c\xcc\x5d\x9a"
+	"\xc9\x2d\xc4\x9a\xd4\xa7\x65\x84\x98\xdb\x66\x76\xf0\x34\x31\x9f"
+	"\xce\xb5\xfb\x28\x07\xde\x1e\x0d\x9b\x01\x64\xeb\x2a\x37\x2f\x20"
+	"\xa5\x95\x72\x2b\x54\x51\x59\x91\xea\x50\x54\x0f\x2e\xb0\x1d\xf6"
+	"\xb9\x46\x43\xf9\xd0\x13\x21\x20\x47\x61\x1a\x1c\x30\xc6\x9e\x75"
+	"\x22\xe4\xf2\xb1\xab\x01\xdc\x5b\x3c\x1e\xa2\x6d\xc0\xb9\x9a\x2a"
+	"\x84\x61\xea\x85\x63\xa0\x77\xd0\xeb\x20\x68\xd5\x95\x6a\x1b\x8f"
+	"\x1f\x9a\xba\x44\x49\x8c\x77\xa6\xd9\xa0\x14\xf8\x7d\x9b\x4e\xfa"
+	"\xdc\x4f\x1c\x4d\x60\x50\x26\x7f\xd6\xc1\x91\x2b\xa6\x37\x5d\x94"
+	"\x69\xb2\x47\x59\xd6\xc3\x59\xbb\xd6\x9b\x71\x52\x85\x7a\xcb\x2d",
+	.b_public =
+	"\x1f\xff\xd6\xc4\x59\xf3\x4a\x9e\x81\x74\x4d\x27\xa7\xc6\x6b\x35"
+	"\xd8\xf5\xb3\x24\x97\x82\xe7\x2e\xf3\x21\x91\x23\x2f\x3d\x57\x7f"
+	"\x15\x8c\x84\x71\xe7\x25\x35\xe8\x07\x14\x06\x4c\x83\xdc\x55\x4a"
+	"\xf8\x45\xc5\xe9\xfa\x6e\xae\x6e\xcf\x4d\x11\x91\x26\x16\x6f\x86"
+	"\x89\x78\xaa\xb4\x25\x54\xb2\x74\x07\xe5\x26\x26\x0c\xad\xa4\x57"
+	"\x59\x61\x66\x71\x43\x22\xff\x49\x51\xa4\x76\x0e\x55\x7b\x60\x45"
+	"\x4f\xaf\xbd\x9c\xec\x64\x3f\x80\x0b\x0c\x31\x41\xf0\xfe\x2c\xb7"
+	"\x0a\xbe\xa5\x71\x08\x0d\x8d\x1e\x8a\x77\x9a\xd2\x90\x31\x96\xd0"
+	"\x3b\x31\xdc\xc6\x18\x59\x43\xa1\x19\x5a\x84\x68\x29\xad\x5e\x58"
+	"\xa2\x50\x3e\x83\xf5\x7a\xbd\x88\x17\x60\x89\x98\x9c\x19\x89\x27"
+	"\x89\xfc\x33\x87\x42\xd5\xde\x19\x14\xf2\x95\x82\x10\x87\xad\x82"
+	"\xdd\x6b\x51\x2d\x8d\x0e\x81\x4b\xde\xb3\x35\x6c\x0f\x4b\x56\x45"
+	"\x48\x87\xe9\x5a\xf9\x70\x10\x30\x8e\xa1\xbb\xa4\x70\xbf\xa0\xab"
+	"\x10\x31\x3c\x2c\xdc\xc4\xed\xe3\x51\xdc\xee\xd2\xa5\x5c\x4e\x6e"
+	"\xf6\xed\x60\x5a\xeb\xf3\x02\x19\x2a\x95\xe9\x46\xff\x37\x1b\xf0"
+	"\x1d\x10\x4a\x8f\x4f\x3a\x6e\xf5\xfc\x02\x6d\x09\x7d\xea\x69\x7b"
+	"\x13\xb0\xb6\x80\x5c\x15\x20\xa8\x4d\x15\x56\x11\x72\x49\xdb\x48"
+	"\x54\x40\x66\xd5\xcd\x17\x3a\x26\x95\xf6\xd7\xf2\x59\xa3\xda\xbb"
+	"\x26\xd0\xe5\x46\xbf\xee\x0e\x7d\xf1\xe0\x11\x02\x4d\xd3\xdc\xe2"
+	"\x3f\xc2\x51\x7e\xc7\x90\x33\x3c\x1c\xa0\x4c\x69\xcc\x1e\xc7\xac"
+	"\x17\xe0\xe5\xf4\x8c\x05\x64\x34\xfe\x84\x70\xd7\x6b\xed\xab\xf5"
+	"\x88\x9d\x3e\x4c\x5a\x9e\xd4\x74\xfd\xdd\x91\xd5\xd4\xcb\xbf\xf8"
+	"\xb7\x56\xb5\xe9\x22\xa6\x6d\x7a\x44\x05\x41\xbf\xdb\x61\x28\xc6"
+	"\x99\x49\x87\x3d\x28\x77\xf8\x83\x23\x7e\xa9\xa7\xee\x20\xdb\x6d"
+	"\x21\x50\xb7\xc9\x52\x57\x53\xa3\xcf\xdf\xd0\xf9\xb9\x62\x96\x89"
+	"\xf5\x5c\xa9\x8a\x11\x95\x01\x25\xc9\x81\x15\x76\xae\xf0\xc7\xc5"
+	"\x50\xae\x6f\xb5\xd2\x8a\x8e\x9a\xd4\x30\x55\xc6\xe9\x2c\x81\x6e"
+	"\x95\xf6\x45\x89\x55\x28\x34\x7b\xe5\x72\x9a\x2a\xe2\x98\x09\x35"
+	"\xe0\xe9\x75\x94\xe9\x34\x95\xb9\x13\x6e\xd5\xa1\x62\x5a\x1c\x94"
+	"\x28\xed\x84\x46\x76\x6d\x10\x37\x71\xa3\x31\x46\x64\xe4\x59\x44"
+	"\x17\x70\x1c\x23\xc9\x7e\xf6\xab\x8a\x24\xae\x25\xe2\xb2\x5f\x33"
+	"\xe4\xd7\xd3\x34\x2a\x49\x22\x16\x15\x9b\x90\x40\xda\x99\xd5\xaf",
+	.secret_size = 16,
+	.b_secret_size = 528,
+	.b_public_size = 512,
+	.expected_a_public_size = 512,
+	.expected_ss_size = 512,
+	.genkey = true,
+	},
 };
 
 static const struct kpp_testvec ffdhe6144_dh_tv_template[] __maybe_unused = {
@@ -2044,28 +2302,162 @@ static const struct kpp_testvec ffdhe6144_dh_tv_template[] __maybe_unused = {
 	.expected_a_public_size = 768,
 	.expected_ss_size = 768,
 	},
-};
-
-static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
 	{
 	.secret =
 #ifdef __LITTLE_ENDIAN
 	"\x01\x00" /* type */
-	"\x10\x04" /* len */
-	"\x00\x04\x00\x00" /* key_size */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x03" /* len */
+	"\x00\x03\x00\x00" /* key_size */
 	"\x00\x00\x00\x00" /* p_size */
 	"\x00\x00\x00\x00" /* g_size */
 #else
 	"\x00\x01" /* type */
-	"\x04\x10" /* len */
-	"\x00\x00\x04\x00" /* key_size */
+	"\x03\x10" /* len */
+	"\x00\x00\x03\x00" /* key_size */
 	"\x00\x00\x00\x00" /* p_size */
 	"\x00\x00\x00\x00" /* g_size */
 #endif
 	/* xa */
-	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
-	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
-	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+	"\x63\x3e\x6f\xe0\xfe\x9f\x4a\x01\x62\x77\xce\xf1\xc7\xcc\x49\x4d"
+	"\x92\x53\x56\xe3\x39\x15\x81\xb2\xcd\xdc\xaf\x5e\xbf\x31\x1f\x69"
+	"\xce\x41\x35\x24\xaa\x46\x53\xb5\xb7\x3f\x2b\xad\x95\x14\xfb\xe4"
+	"\x9a\x61\xcd\x0f\x1f\x02\xee\xa4\x79\x2c\x9d\x1a\x7c\x62\x82\x39"
+	"\xdd\x43\xcc\x58\x9f\x62\x47\x56\x1d\x0f\xc2\x67\xbc\x24\xd0\xf9"
+	"\x0a\x50\x1b\x10\xe7\xbb\xd1\xc2\x01\xbb\xc4\x4c\xda\x12\x60\x0e"
+	"\x95\x2b\xde\x09\xd6\x67\xe1\xbc\x4c\xb9\x67\xdf\xd0\x1f\x97\xb4"
+	"\xde\xcb\x6b\x78\x83\x51\x74\x33\x01\x7f\xf6\x0a\x95\x69\x93\x00"
+	"\x2a\xc3\x75\x8e\xef\xbe\x53\x11\x6d\xc4\xd0\x9f\x6d\x63\x48\xc1"
+	"\x91\x1f\x7d\x88\xa7\x90\x78\xd1\x7e\x52\x42\x10\x01\xb4\x27\x95"
+	"\x91\x43\xcc\x82\x91\x86\x62\xa0\x9d\xef\x65\x6e\x67\xcf\x19\x11"
+	"\x35\x37\x5e\x94\x97\x83\xa6\x83\x1c\x7e\x8a\x3e\x32\xb0\xce\xff"
+	"\x20\xdc\x7b\x6e\x18\xd9\x6b\x27\x31\xfc\xc3\xef\x47\x8d\xbe\x34"
+	"\x2b\xc7\x60\x74\x3c\x93\xb3\x8e\x54\x77\x4e\x73\xe6\x40\x72\x35"
+	"\xb0\xf0\x06\x53\x43\xbe\xd0\xc3\x87\xcc\x38\x96\xa9\x10\xa0\xd6"
+	"\x17\xed\xa5\x6a\xf4\xf6\xaa\x77\x40\xed\x7d\x2e\x58\x0f\x5b\x04"
+	"\x5a\x41\x12\x95\x22\xcb\xa3\xce\x8b\x6d\x6d\x89\xec\x7c\x1d\x25"
+	"\x27\x52\x50\xa0\x5b\x93\x8c\x5d\x3f\x56\xb9\xa6\x5e\xe5\xf7\x9b"
+	"\xc7\x9a\x4a\x2e\x79\xb5\xca\x29\x58\x52\xa0\x63\xe4\x9d\xeb\x4c"
+	"\x4c\xa8\x37\x0b\xe9\xa0\x18\xf1\x86\xf6\x4d\x32\xfb\x9e\x4f\xb3"
+	"\x7b\x5d\x58\x78\x70\xbd\x56\xac\x99\x75\x25\x71\x66\x76\x4e\x5e"
+	"\x67\x4f\xb1\x17\xa7\x8b\x55\x12\x87\x01\x4e\xd1\x66\xef\xd0\x70"
+	"\xaf\x14\x34\xee\x2a\x76\x49\x25\xa6\x2e\x43\x37\x75\x7d\x1a\xad"
+	"\x08\xd5\x01\x85\x9c\xe1\x20\xd8\x38\x5c\x57\xa5\xed\x9d\x46\x3a"
+	"\xb7\x46\x60\x29\x8b\xc4\x21\x50\x0a\x30\x9c\x57\x42\xe4\x35\xf8"
+	"\x12\x5c\x4f\xa2\x20\xc2\xc9\x43\xe3\x6d\x20\xbc\xdf\xb8\x37\x33"
+	"\x45\x43\x06\x4e\x08\x6f\x8a\xcd\x61\xc3\x1b\x05\x28\x82\xbe\xf0"
+	"\x48\x33\xe5\x93\xc9\x1a\x61\x16\x67\x03\x9d\x47\x9d\x74\xeb\xae"
+	"\x13\xf2\xb4\x1b\x09\x11\xf5\x15\xcb\x28\xfd\x50\xe0\xbc\x58\x36"
+	"\x38\x91\x2c\x07\x27\x1f\x49\x68\xf4\xce\xad\xf7\xba\xec\x5d\x3d"
+	"\xfd\x27\xe2\xcf\xf4\x56\xfe\x08\xa6\x11\x61\xcb\x6c\x9f\xf9\x3c"
+	"\x57\x0b\x8b\xaa\x00\x16\x18\xba\x1f\xe8\x4f\x01\xe2\x79\x2a\x0b"
+	"\xc1\xbd\x52\xef\xe6\xf7\x5a\x66\xfe\x07\x3b\x50\x6b\xbb\xcb\x39"
+	"\x3c\x94\xf6\x21\x0d\x68\x69\xa4\xed\x2e\xb5\x85\x03\x11\x38\x79"
+	"\xec\xb5\x22\x23\xdf\x9e\xad\xb4\xbe\xd7\xc7\xdf\xea\x30\x23\x8a"
+	"\xb7\x21\x0a\x9d\xbd\x99\x13\x7d\x5f\x7e\xaf\x28\x54\x3f\xca\x5e"
+	"\xf4\xfc\x05\x0d\x65\x67\xd8\xf6\x8e\x90\x9d\x0d\xcf\x62\x82\xd6"
+	"\x9f\x02\xf8\xca\xfa\x42\x24\x7f\x4d\xb7\xfc\x92\xa6\x4a\x51\xc4"
+	"\xd8\xae\x19\x87\xc6\xa3\x83\xbe\x7b\x6d\xc3\xf5\xb8\xad\x4a\x05"
+	"\x78\x84\x3a\x15\x2e\x40\xbe\x79\xa9\xc0\x12\xa1\x48\x39\xc3\xdb"
+	"\x47\x4f\x7d\xea\x6d\xc7\xfa\x2c\x4e\xe9\xa5\x85\x81\xea\x6c\xcd"
+	"\x8a\xe5\x74\x17\x76\x31\x31\x75\x96\x83\xca\x81\xbb\x5c\xa9\x79"
+	"\x2c\xbd\x09\xfe\xe4\x86\x0d\x8c\x76\x9c\xbc\xe8\x93\xe4\xd0\xe4"
+	"\x0f\xf8\xff\x24\x7e\x66\x61\x69\xfb\xe4\x46\x08\x94\x99\xa5\x53"
+	"\xd7\xe4\x29\x72\x86\x86\xe8\x1d\x37\xfa\xcb\xd0\x8d\x51\xd0\xbf"
+	"\x81\xcf\x55\xb9\xc5\x78\x8c\x74\xa0\x16\x3a\xd2\x19\x94\x29\x6a"
+	"\x5e\xec\xd3\x20\xa0\xb2\xfd\xce\xd4\x14\xa3\x39\x10\xa9\xf4\x4e"
+	"\xba\x21\x09\x5c\xe6\x61\x43\x51\xae\xc4\x71\xd7\x21\xef\x98\x39",
+	.b_public =
+	"\x45\x96\x5a\xb7\x78\x5c\xa4\x4d\x39\xb2\x5f\xc8\xc2\xaa\x1a\xf4"
+	"\xa6\x68\xf6\x6f\x7e\xa8\x4a\x5b\x0e\xba\x0a\x99\x85\xf9\x63\xd4"
+	"\x58\x21\x6d\xa8\x3c\xf4\x05\x10\xb0\x0d\x6f\x1c\xa0\x17\x85\xae"
+	"\x68\xbf\xcc\x00\xc8\x86\x1b\x24\x31\xc9\x49\x23\x91\xe0\x71\x29"
+	"\x06\x39\x39\x93\x49\x9c\x75\x18\x1a\x8b\x61\x73\x1c\x7f\x37\xd5"
+	"\xf1\xab\x20\x5e\x62\x25\xeb\x58\xd5\xfa\xc9\x7f\xad\x57\xd5\xcc"
+	"\x0d\xc1\x7a\x2b\x33\x2a\x76\x84\x33\x26\x97\xcf\x47\x9d\x72\x2a"
+	"\xc9\x39\xde\xa8\x42\x27\x2d\xdc\xee\x00\x60\xd2\x4f\x13\xe0\xde"
+	"\xd5\xc7\xf6\x7d\x8b\x2a\x43\x49\x40\x99\xc2\x61\x84\x8e\x57\x09"
+	"\x7c\xcc\x19\x46\xbd\x4c\xd2\x7c\x7d\x02\x4d\x88\xdf\x58\x24\x80"
+	"\xeb\x19\x3b\x2a\x13\x2b\x19\x85\x3c\xd8\x31\x03\x00\xa4\xd4\x57"
+	"\x23\x2c\x24\x37\xb3\x62\xea\x35\x29\xd0\x2c\xac\xfd\xbd\xdf\x3d"
+	"\xa6\xce\xfa\x0d\x5b\xb6\x15\x8b\xe3\x58\xe9\xad\x99\x87\x29\x51"
+	"\x8d\x97\xd7\xa9\x55\xf0\x72\x6e\x4e\x58\xcb\x2b\x4d\xbd\xd0\x48"
+	"\x7d\x14\x86\xdb\x3f\xa2\x5f\x6e\x35\x4a\xe1\x70\xb1\x53\x72\xb7"
+	"\xbc\xe9\x3d\x1b\x33\xc0\x54\x6f\x43\x55\x76\x85\x7f\x9b\xa5\xb3"
+	"\xc1\x1d\xd3\xfe\xe2\xd5\x96\x3d\xdd\x92\x04\xb1\xad\x75\xdb\x13"
+	"\x4e\x49\xfc\x35\x34\xc5\xda\x13\x98\xb8\x12\xbe\xda\x90\x55\x7c"
+	"\x11\x6c\xbe\x2b\x8c\x51\x29\x23\xc1\x51\xbc\x0c\x1c\xe2\x20\xfc"
+	"\xfe\xf2\xaa\x71\x9b\x21\xdf\x25\x1f\x68\x21\x7e\xe1\xc9\x87\xa0"
+	"\x20\xf6\x8d\x4f\x27\x8c\x3c\x0f\x9d\xf4\x69\x25\xaa\x49\xab\x94"
+	"\x22\x5a\x92\x3a\xba\xb4\xc2\x8c\x5a\xaa\x04\xbf\x46\xc5\xaa\x93"
+	"\xab\x0d\xe9\x54\x6c\x3a\x64\xa6\xa2\x21\x66\xee\x1c\x10\x21\x84"
+	"\xf2\x9e\xcc\x57\xac\xc2\x25\x62\xad\xbb\x59\xef\x25\x61\x6c\x81"
+	"\x38\x8a\xdc\x8c\xeb\x7b\x18\x1d\xaf\xa9\xc5\x9a\xf4\x49\x26\x8a"
+	"\x25\xc4\x3e\x31\x95\x28\xef\xf7\x72\xe9\xc5\xaa\x59\x72\x2b\x67"
+	"\x47\xe8\x6b\x51\x05\x24\xb8\x18\xb3\x34\x0f\x8c\x2b\x80\xba\x61"
+	"\x1c\xbe\x9e\x9a\x7c\xe3\x60\x5e\x49\x02\xff\x50\x8a\x64\x28\x64"
+	"\x46\x7b\x83\x14\x72\x6e\x59\x9b\x56\x09\xb4\xf0\xde\x52\xc3\xf3"
+	"\x58\x17\x6a\xae\xb1\x0f\xf4\x39\xcc\xd8\xce\x4d\xe1\x51\x17\x88"
+	"\xe4\x98\xd9\xd1\xa9\x55\xbc\xbf\x7e\xc4\x51\x96\xdb\x44\x1d\xcd"
+	"\x8d\x74\xad\xa7\x8f\x87\x83\x75\xfc\x36\xb7\xd2\xd4\x89\x16\x97"
+	"\xe4\xc6\x2a\xe9\x65\xc8\xca\x1c\xbd\x86\xaf\x57\x80\xf7\xdd\x42"
+	"\xc0\x3b\x3f\x87\x51\x02\x2f\xf8\xd8\x68\x0f\x3d\x95\x2d\xf1\x67"
+	"\x09\xa6\x5d\x0b\x7e\x01\xb4\xb2\x32\x01\xa8\xd0\x58\x0d\xe6\xa2"
+	"\xd8\x4b\x22\x10\x7d\x11\xf3\xc2\x4e\xb8\x43\x8e\x31\x79\x59\xe2"
+	"\xc4\x96\x29\x17\x40\x06\x0d\xdf\xdf\xc3\x02\x30\x2a\xd1\x8e\xf2"
+	"\xee\x2d\xd2\x12\x63\x5a\x1d\x3c\xba\x4a\xc4\x56\x90\xc6\x12\x0b"
+	"\xe0\x04\x3f\x35\x59\x8e\x40\x75\xf4\x4c\x10\x61\xb9\x30\x89\x7c"
+	"\x8d\x0e\x25\xb7\x5a\x6b\x97\x05\xc6\x37\x80\x6e\x94\x56\xa8\x5f"
+	"\x03\x94\x59\xc8\xc5\x3e\xdc\x23\xe5\x68\x4f\xd7\xbb\x6d\x7e\xc1"
+	"\x8d\xf9\xcc\x3f\x38\xad\x77\xb3\x18\x61\xed\x04\xc0\x71\xa7\x96"
+	"\xb1\xaf\x1d\x69\x78\xda\x6d\x89\x8b\x50\x75\x99\x44\xb3\xb2\x75"
+	"\xd1\xc8\x14\x40\xa1\x0a\xbf\xc4\x45\xc4\xee\x12\x90\x76\x26\x64"
+	"\xb7\x73\x2e\x0b\x0c\xfa\xc3\x55\x29\x24\x1b\x7a\x00\x27\x07\x26"
+	"\x36\xf0\x38\x1a\xe3\xb7\xc4\x8d\x1c\x9c\xa9\xc0\xc1\x45\x91\x9e"
+	"\x86\xdd\x82\x94\x45\xfa\xcd\x5a\x19\x12\x7d\xef\xda\x17\xad\x21"
+	"\x17\x89\x8b\x45\xa7\xf5\xed\x51\x9e\x58\x13\xdc\x84\xa4\xe6\x37",
+	.secret_size = 16,
+	.b_secret_size = 784,
+	.b_public_size = 768,
+	.expected_a_public_size = 768,
+	.expected_ss_size = 768,
+	.genkey = true,
+	},
+};
+
+static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x04" /* len */
+	"\x00\x04\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x04\x10" /* len */
+	"\x00\x00\x04\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
 	"\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
 	"\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
 	"\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
@@ -2327,6 +2719,172 @@ static const struct kpp_testvec ffdhe8192_dh_tv_template[] __maybe_unused = {
 	.expected_a_public_size = 1024,
 	.expected_ss_size = 1024,
 	},
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x00" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x00\x10" /* len */
+	"\x00\x00\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00", /* g_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x01\x00" /* type */
+	"\x10\x04" /* len */
+	"\x00\x04\x00\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#else
+	"\x00\x01" /* type */
+	"\x04\x10" /* len */
+	"\x00\x00\x04\x00" /* key_size */
+	"\x00\x00\x00\x00" /* p_size */
+	"\x00\x00\x00\x00" /* g_size */
+#endif
+	/* xa */
+	"\x76\x6e\xeb\xf9\xeb\x76\xae\x37\xcb\x19\x49\x8b\xeb\xaf\xb0\x4b"
+	"\x6d\xe9\x15\xad\xda\xf2\xef\x58\xe9\xd6\xdd\x4c\xb3\x56\xd0\x3b"
+	"\x00\xb0\x65\xed\xae\xe0\x2e\xdf\x8f\x45\x3f\x3c\x5d\x2f\xfa\x96"
+	"\x36\x33\xb2\x01\x8b\x0f\xe8\x46\x15\x6d\x60\x5b\xec\x32\xc3\x3b"
+	"\x06\xf3\xb4\x1b\x9a\xef\x3c\x03\x0e\xcc\xce\x1d\x24\xa0\xc9\x08"
+	"\x65\xf9\x45\xe5\xd2\x43\x08\x88\x58\xd6\x46\xe7\xbb\x25\xac\xed"
+	"\x3b\xac\x6f\x5e\xfb\xd6\x19\xa6\x20\x3a\x1d\x0c\xe8\x00\x72\x54"
+	"\xd7\xd9\xc9\x26\x49\x18\xc6\xb8\xbc\xdd\xf3\xce\xf3\x7b\x69\x04"
+	"\x5c\x6f\x11\xdb\x44\x42\x72\xb6\xb7\x84\x17\x86\x47\x3f\xc5\xa1"
+	"\xd8\x86\xef\xe2\x27\x49\x2b\x8f\x3e\x91\x12\xd9\x45\x96\xf7\xe6"
+	"\x77\x76\x36\x58\x71\x9a\xb1\xdb\xcf\x24\x9e\x7e\xad\xce\x45\xba"
+	"\xb5\xec\x8e\xb9\xd6\x7b\x3d\x76\xa4\x85\xad\xd8\x49\x9b\x80\x9d"
+	"\x7f\x9f\x85\x09\x9e\x86\x5b\x6b\xf3\x8d\x39\x5e\x6f\xe4\x30\xc8"
+	"\xa5\xf3\xdf\x68\x73\x6b\x2e\x9a\xcb\xac\x0a\x0d\x44\xc1\xaf\xb2"
+	"\x11\x1b\x7c\x43\x08\x44\x43\xe2\x4e\xfd\x93\x30\x99\x09\x12\xbb"
+	"\xf6\x31\x34\xa5\x3d\x45\x98\xee\xd7\x2a\x1a\x89\xf5\x37\x92\x33"
+	"\xa0\xdd\xf5\xfb\x1f\x90\x42\x55\x5a\x0b\x82\xff\xf0\x96\x92\x15"
+	"\x65\x5a\x55\x96\xca\x1b\xd5\xe5\xb5\x94\xde\x2e\xa6\x03\x57\x9e"
+	"\x15\xe4\x32\x2b\x1f\xb2\x22\x21\xe9\xa0\x05\xd3\x65\x6c\x11\x66"
+	"\x25\x38\xbb\xa3\x6c\xc2\x0b\x2b\xd0\x7a\x20\x26\x29\x37\x5d\x5f"
+	"\xd8\xff\x2a\xcd\x46\x6c\xd6\x6e\xe5\x77\x1a\xe6\x33\xf1\x8e\xc8"
+	"\x10\x30\x11\x00\x27\xf9\x7d\x0e\x28\x43\xa7\x67\x38\x7f\x16\xda"
+	"\xd0\x01\x8e\xa4\xe8\x6f\xcd\x23\xaf\x77\x52\x34\xad\x7e\xc3\xed"
+	"\x2d\x10\x0a\x33\xdc\xcf\x1b\x88\x0f\xcc\x48\x7f\x42\xf0\x9e\x13"
+	"\x1f\xf5\xd1\xe9\x90\x87\xbd\xfa\x5f\x1d\x77\x55\xcb\xc3\x05\xaf"
+	"\x71\xd0\xe0\xab\x46\x31\xd7\xea\x89\x54\x2d\x39\xaf\xf6\x4f\x74"
+	"\xaf\x46\x58\x89\x78\x95\x2e\xe6\x90\xb7\xaa\x00\x73\x9f\xed\xb9"
+	"\x00\xd6\xf6\x6d\x26\x59\xcd\x56\xdb\xf7\x3d\x5f\xeb\x6e\x46\x33"
+	"\xb1\x23\xed\x9f\x8d\x58\xdc\xb4\x28\x3b\x90\x09\xc4\x61\x02\x1f"
+	"\xf8\x62\xf2\x6e\xc1\x94\x71\x66\x93\x11\xdf\xaa\x3e\xd7\xb5\xe5"
+	"\xc1\x78\xe9\x14\xcd\x55\x16\x51\xdf\x8d\xd0\x94\x8c\x43\xe9\xb8"
+	"\x1d\x42\x7f\x76\xbc\x6f\x87\x42\x88\xde\xd7\x52\x78\x00\x4f\x18"
+	"\x02\xe7\x7b\xe2\x8a\xc3\xd1\x43\xa5\xac\xda\xb0\x8d\x19\x96\xd4"
+	"\x81\xe0\x75\xe9\xca\x41\x7e\x1f\x93\x0b\x26\x24\xb3\xaa\xdd\x10"
+	"\x20\xd3\xf2\x9f\x3f\xdf\x65\xde\x67\x79\xdc\x76\x9f\x3c\x72\x75"
+	"\x65\x8a\x30\xcc\xd2\xcc\x06\xb1\xab\x62\x86\x78\x5d\xb8\xce\x72"
+	"\xb3\x12\xc7\x9f\x07\xd0\x6b\x98\x82\x9b\x6c\xbb\x15\xe5\xcc\xf4"
+	"\xc8\xf4\x60\x81\xdc\xd3\x09\x1b\x5e\xd4\xf3\x55\xcf\x1c\x16\x83"
+	"\x61\xb4\x2e\xcc\x08\x67\x58\xfd\x46\x64\xbc\x29\x4b\xdd\xda\xec"
+	"\xdc\xc6\xa9\xa5\x73\xfb\xf8\xf3\xaf\x89\xa8\x9e\x25\x14\xfa\xac"
+	"\xeb\x1c\x7c\x80\x96\x66\x4d\x41\x67\x9b\x07\x4f\x0a\x97\x17\x1c"
+	"\x4d\x61\xc7\x2e\x6f\x36\x98\x29\x50\x39\x6d\xe7\x70\xda\xf0\xc8"
+	"\x05\x80\x7b\x32\xff\xfd\x12\xde\x61\x0d\xf9\x4c\x21\xf1\x56\x72"
+	"\x3d\x61\x46\xc0\x2d\x07\xd1\x6c\xd3\xbe\x9a\x21\x83\x85\xf7\xed"
+	"\x53\x95\x44\x40\x8f\x75\x12\x18\xc2\x9a\xfd\x5e\xce\x66\xa6\x7f"
+	"\x57\xc0\xd7\x73\x76\xb3\x13\xda\x2e\x58\xc6\x27\x40\xb2\x2d\xef"
+	"\x7d\x72\xb4\xa8\x75\x6f\xcc\x5f\x42\x3e\x2c\x90\x36\x59\xa0\x34"
+	"\xaa\xce\xbc\x04\x4c\xe6\x56\xc2\xcd\xa6\x1c\x59\x04\x56\x53\xcf"
+	"\x6d\xd7\xf0\xb1\x4f\x91\xfa\x84\xcf\x4b\x8d\x50\x4c\xf8\x2a\x31"
+	"\x5f\xe3\xba\x79\xb4\xcc\x59\x64\xe3\x7a\xfa\xf6\x06\x9d\x04\xbb"
+	"\xce\x61\xbf\x9e\x59\x0a\x09\x51\x6a\xbb\x0b\x80\xe0\x91\xc1\x51"
+	"\x04\x58\x67\x67\x4b\x42\x4f\x95\x68\x75\xe2\x1f\x9c\x14\x70\xfd"
+	"\x3a\x8a\xce\x8b\x04\xa1\x89\xe7\xb4\xbf\x70\xfe\xf3\x0c\x48\x04"
+	"\x3a\xd2\x85\x68\x03\xe7\xfa\xec\x5b\x55\xb7\x95\xfd\x5b\x19\x35"
+	"\xad\xcb\x4a\x63\x03\x44\x64\x2a\x48\x59\x9a\x26\x43\x96\x8c\xe6"
+	"\xbd\xb7\x90\xd4\x5f\x8d\x08\x28\xa8\xc5\x89\x70\xb9\x6e\xd3\x3b"
+	"\x76\x0e\x37\x98\x15\x27\xca\xc9\xb0\xe0\xfd\xf3\xc6\xdf\x69\xce"
+	"\xe1\x5f\x6a\x3e\x5c\x86\xe2\x58\x41\x11\xf0\x7e\x56\xec\xe4\xc9"
+	"\x0d\x87\x91\xfb\xb9\xc8\x0d\x34\xab\xb0\xc6\xf2\xa6\x00\x7b\x18"
+	"\x92\xf4\x43\x7f\x01\x85\x2e\xef\x8c\x72\x50\x10\xdb\xf1\x37\x62"
+	"\x16\x85\x71\x01\xa8\x2b\xf0\x13\xd3\x7c\x0b\xaf\xf1\xf3\xd1\xee"
+	"\x90\x41\x5f\x7d\x5b\xa9\x83\x4b\xfa\x80\x59\x50\x73\xe1\xc4\xf9"
+	"\x5e\x4b\xde\xd9\xf5\x22\x68\x5e\x65\xd9\x37\xe4\x1a\x08\x0e\xb1"
+	"\x28\x2f\x40\x9e\x37\xa8\x12\x56\xb7\xb8\x64\x94\x68\x94\xff\x9f",
+	.b_public =
+	"\xa1\x6c\x9e\xda\x45\x4d\xf6\x59\x04\x00\xc1\xc6\x8b\x12\x3b\xcd"
+	"\x07\xe4\x3e\xec\xac\x9b\xfc\xf7\x6d\x73\x39\x9e\x52\xf8\xbe\x33"
+	"\xe2\xca\xea\x99\x76\xc7\xc9\x94\x5c\xf3\x1b\xea\x6b\x66\x4b\x51"
+	"\x90\xf6\x4f\x75\xd5\x85\xf4\x28\xfd\x74\xa5\x57\xb1\x71\x0c\xb6"
+	"\xb6\x95\x70\x2d\xfa\x4b\x56\xe0\x56\x10\x21\xe5\x60\xa6\x18\xa4"
+	"\x78\x8c\x07\xc0\x2b\x59\x9c\x84\x5b\xe9\xb9\x74\xbf\xbc\x65\x48"
+	"\x27\x82\x40\x53\x46\x32\xa2\x92\x91\x9d\xf6\xd1\x07\x0e\x1d\x07"
+	"\x1b\x41\x04\xb1\xd4\xce\xae\x6e\x46\xf1\x72\x50\x7f\xff\xa8\xa2"
+	"\xbc\x3a\xc1\xbb\x28\xd7\x7d\xcd\x7a\x22\x01\xaf\x57\xb0\xa9\x02"
+	"\xd4\x8a\x92\xd5\xe6\x8e\x6f\x11\x39\xfe\x36\x87\x89\x42\x25\x42"
+	"\xd9\xbe\x67\x15\xe1\x82\x8a\x5e\x98\xc2\xd5\xde\x9e\x13\x1a\xe7"
+	"\xf9\x9f\x8e\x2d\x49\xdc\x4d\x98\x8c\xdd\xfd\x24\x7c\x46\xa9\x69"
+	"\x3b\x31\xb3\x12\xce\x54\xf6\x65\x75\x40\xc2\xf1\x04\x92\xe3\x83"
+	"\xeb\x02\x3d\x79\xc0\xf9\x7c\x28\xb3\x97\x03\xf7\x61\x1c\xce\x95"
+	"\x1a\xa0\xb3\x77\x1b\xc1\x9f\xf8\xf6\x3f\x4d\x0a\xfb\xfa\x64\x1c"
+	"\xcb\x37\x5b\xc3\x28\x60\x9f\xd1\xf2\xc4\xee\x77\xaa\x1f\xe9\xa2"
+	"\x89\x4c\xc6\xb7\xb3\xe4\xa5\xed\xa7\xe8\xac\x90\xdc\xc3\xfb\x56"
+	"\x9c\xda\x2c\x1d\x1a\x9a\x8c\x82\x92\xee\xdc\xa0\xa4\x01\x6e\x7f"
+	"\xc7\x0e\xc2\x73\x7d\xa6\xac\x12\x01\xc0\xc0\xc8\x7c\x84\x86\xc7"
+	"\xa5\x94\xe5\x33\x84\x71\x6e\x36\xe3\x3b\x81\x30\xe0\xc8\x51\x52"
+	"\x2b\x9e\x68\xa2\x6e\x09\x95\x8c\x7f\x78\x82\xbd\x53\x26\xe7\x95"
+	"\xe0\x03\xda\xc0\xc3\x6e\xcf\xdc\xb3\x14\xfc\xe9\x5b\x9b\x70\x6c"
+	"\x93\x04\xab\x13\xf7\x17\x6d\xee\xad\x32\x48\xe9\xa0\x94\x1b\x14"
+	"\x64\x4f\xa1\xb3\x8d\x6a\xca\x28\xfe\x4a\xf4\xf0\xc5\xb7\xf9\x8a"
+	"\x8e\xff\xfe\x57\x6f\x20\xdb\x04\xab\x02\x31\x22\x42\xfd\xbd\x77"
+	"\xea\xce\xe8\xc7\x5d\xe0\x8e\xd6\x66\xd0\xe4\x04\x2f\x5f\x71\xc7"
+	"\x61\x2d\xa5\x3f\x2f\x46\xf2\xd8\x5b\x25\x82\xf0\x52\x88\xc0\x59"
+	"\xd3\xa3\x90\x17\xc2\x04\x13\xc3\x13\x69\x4f\x17\xb1\xb3\x46\x4f"
+	"\xa7\xe6\x8b\x5e\x3e\x95\x0e\xf5\x42\x17\x7f\x4d\x1f\x1b\x7d\x65"
+	"\x86\xc5\xc8\xae\xae\xd8\x4f\xe7\x89\x41\x69\xfd\x06\xce\x5d\xed"
+	"\x44\x55\xad\x51\x98\x15\x78\x8d\x68\xfc\x93\x72\x9d\x22\xe5\x1d"
+	"\x21\xc3\xbe\x3a\x44\x34\xc0\xa3\x1f\xca\xdf\x45\xd0\x5c\xcd\xb7"
+	"\x72\xeb\xae\x7a\xad\x3f\x05\xa0\xe3\x6e\x5a\xd8\x52\xa7\xf1\x1e"
+	"\xb4\xf2\xcf\xe7\xdf\xa7\xf2\x22\x00\xb2\xc4\x17\x3d\x2c\x15\x04"
+	"\x71\x28\x69\x5c\x69\x21\xc8\xf1\x9b\xd8\xc7\xbc\x27\xa3\x85\xe9"
+	"\x53\x77\xd3\x65\xc3\x86\xdd\xb3\x76\x13\xfb\xa1\xd4\xee\x9d\xe4"
+	"\x51\x3f\x83\x59\xe4\x47\xa8\xa6\x0d\x68\xd5\xf6\xf4\xca\x31\xcd"
+	"\x30\x48\x34\x90\x11\x8e\x87\xe9\xea\xc9\xd0\xc3\xba\x28\xf9\xc0"
+	"\xc9\x8e\x23\xe5\xc2\xee\xf2\x47\x9c\x41\x1c\x10\x33\x27\x23\x49"
+	"\xe5\x0d\x18\xbe\x19\xc1\xba\x6c\xdc\xb7\xa1\xe7\xc5\x0d\x6f\xf0"
+	"\x8c\x62\x6e\x0d\x14\xef\xef\xf2\x8e\x01\xd2\x76\xf5\xc1\xe1\x92"
+	"\x3c\xb3\x76\xcd\xd8\xdd\x9b\xe0\x8e\xdc\x24\x34\x13\x65\x0f\x11"
+	"\xaf\x99\x7a\x2f\xe6\x1f\x7d\x17\x3e\x8a\x68\x9a\x37\xc8\x8d\x3e"
+	"\xa3\xfe\xfe\x57\x22\xe6\x0e\x50\xb5\x98\x0b\x71\xd8\x01\xa2\x8d"
+	"\x51\x96\x50\xc2\x41\x31\xd8\x23\x98\xfc\xd1\x9d\x7e\x27\xbb\x69"
+	"\x78\xe0\x87\xf7\xe4\xdd\x58\x13\x9d\xec\x00\xe4\xb9\x70\xa2\x94"
+	"\x5d\x52\x4e\xf2\x5c\xd1\xbc\xfd\xee\x9b\xb9\xe5\xc4\xc0\xa8\x77"
+	"\x67\xa4\xd1\x95\x34\xe4\x6d\x5f\x25\x02\x8d\x65\xdd\x11\x63\x55"
+	"\x04\x01\x21\x60\xc1\x5c\xef\x77\x33\x01\x1c\xa2\x11\x2b\xdd\x2b"
+	"\x74\x99\x23\x38\x05\x1b\x7e\x2e\x01\x52\xfe\x9c\x23\xde\x3e\x1a"
+	"\x72\xf4\xff\x7b\x02\xaa\x08\xcf\xe0\x5b\x83\xbe\x85\x5a\xe8\x9d"
+	"\x11\x3e\xff\x2f\xc6\x97\x67\x36\x6c\x0f\x81\x9c\x26\x29\xb1\x0f"
+	"\xbb\x53\xbd\xf4\xec\x2a\x84\x41\x28\x3b\x86\x40\x95\x69\x55\x5f"
+	"\x30\xee\xda\x1e\x6c\x4b\x25\xd6\x2f\x2c\x0e\x3c\x1a\x26\xa0\x3e"
+	"\xef\x09\xc6\x2b\xe5\xa1\x0c\x03\xa8\xf5\x39\x70\x31\xc4\x32\x79"
+	"\xd1\xd9\xc2\xcc\x32\x4a\xf1\x2f\x57\x5a\xcc\xe5\xc3\xc5\xd5\x4e"
+	"\x86\x56\xca\x64\xdb\xab\x61\x85\x8f\xf9\x20\x02\x40\x66\x76\x9e"
+	"\x5e\xd4\xac\xf0\x47\xa6\x50\x5f\xc2\xaf\x55\x9b\xa3\xc9\x8b\xf8"
+	"\x42\xd5\xcf\x1a\x95\x22\xd9\xd1\x0b\x92\x51\xca\xde\x46\x02\x0d"
+	"\x8b\xee\xd9\xa0\x04\x74\xf5\x0e\xb0\x3a\x62\xec\x3c\x91\x29\x33"
+	"\xa7\x78\x22\x92\xac\x27\xe6\x2d\x6f\x56\x8a\x5d\x72\xc2\xf1\x5c"
+	"\x54\x11\x97\x24\x61\xcb\x0c\x52\xd4\x57\x56\x22\x86\xf0\x19\x27"
+	"\x76\x30\x04\xf4\x39\x7b\x1a\x5a\x04\x0d\xec\x59\x9a\x31\x4c\x40"
+	"\x19\x6d\x3c\x41\x1b\x0c\xca\xeb\x25\x39\x6c\x96\xf8\x55\xd0\xec",
+	.secret_size = 16,
+	.b_secret_size = 1040,
+	.b_public_size = 1024,
+	.expected_a_public_size = 1024,
+	.expected_ss_size = 1024,
+	.genkey = true,
+	},
 };
 
 static const struct kpp_testvec curve25519_tv_template[] = {
-- 
GitLab


From c8e8236cf71f1e808120804339d2c23dc304f9cd Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:57 +0100
Subject: [PATCH 0933/1586] crypto: dh - allow for passing NULL to the
 ffdheXYZ(dh)s' ->set_secret()

Ephemeral key generation can be requested from any of the ffdheXYZ(dh)
variants' common ->set_secret() by passing it an (encoded) struct dh
with the key parameter being unset, i.e. with ->key_size == 0. As the
whole purpose of the ffdheXYZ(dh) templates is to fill in the group
parameters as appropriate, they expect ->p and ->g to be unset in any
input struct dh as well. This means that a user would have to encode an
all-zeroes struct dh instance via crypto_dh_encode_key() when requesting
ephemeral key generation from a ffdheXYZ(dh) instance, which is kind of
pointless.

Make dh_safe_prime_set_secret() to decode a struct dh from the supplied
buffer only if the latter is non-NULL and initialize it with all zeroes
otherwise.

That is, it is now possible to call

  crypto_kpp_set_secret(tfm, NULL, 0);

on any ffdheXYZ(dh) tfm for requesting ephemeral key generation.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index 869a0476e5e2b..d0d24f615b2db 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -444,17 +444,18 @@ static int dh_safe_prime_set_secret(struct crypto_kpp *tfm, const void *buffer,
 	struct dh_safe_prime_instance_ctx *inst_ctx =
 		dh_safe_prime_instance_ctx(tfm);
 	struct dh_safe_prime_tfm_ctx *tfm_ctx = kpp_tfm_ctx(tfm);
-	struct dh params;
+	struct dh params = {};
 	void *buf = NULL, *key = NULL;
 	unsigned int buf_size;
 	int err;
 
-	err = __crypto_dh_decode_key(buffer, len, &params);
-	if (err)
-		return err;
-
-	if (params.p_size || params.g_size)
-		return -EINVAL;
+	if (buffer) {
+		err = __crypto_dh_decode_key(buffer, len, &params);
+		if (err)
+			return err;
+		if (params.p_size || params.g_size)
+			return -EINVAL;
+	}
 
 	params.p = inst_ctx->safe_prime->p;
 	params.p_size = inst_ctx->safe_prime->p_size;
-- 
GitLab


From d6097b8d5d55f26cd2244e7e7f00a5a077772a91 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:58 +0100
Subject: [PATCH 0934/1586] crypto: api - allow algs only in specific
 constructions in FIPS mode

Currently we do not distinguish between algorithms that fail on
the self-test vs. those which are disabled in FIPS mode (not allowed).
Both are marked as having failed the self-test.

Recently the need arose to allow the usage of certain algorithms only
as arguments to specific template instantiations in FIPS mode. For
example, standalone "dh" must be blocked, but e.g. "ffdhe2048(dh)" is
allowed. Other potential use cases include "cbcmac(aes)", which must
only be used with ccm(), or "ghash", which must be used only for
gcm().

This patch allows this scenario by adding a new flag FIPS_INTERNAL to
indicate those algorithms that are not FIPS-allowed. They can then be
used as template arguments only, i.e. when looked up via
crypto_grab_spawn() to be more specific. The FIPS_INTERNAL bit gets
propagated upwards recursively into the surrounding template
instances, until the construction eventually matches an explicit
testmgr entry with ->fips_allowed being set, if any.

The behaviour to skip !->fips_allowed self-test executions in FIPS
mode will be retained. Note that this effectively means that
FIPS_INTERNAL algorithms are handled very similarly to the INTERNAL
ones in this regard. It is expected that the FIPS_INTERNAL algorithms
will receive sufficient testing when the larger constructions they're
a part of, if any, get exercised by testmgr.

Note that as a side-effect of this patch algorithms which are not
FIPS-allowed will now return ENOENT instead of ELIBBAD. Hopefully
this is not an issue as some people were relying on this already.

Link: https://lore.kernel.org/r/YeEVSaMEVJb3cQkq@gondor.apana.org.au
Originally-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c        | 18 ++++++++++++++++--
 crypto/api.c           | 19 +++++++++++++++++--
 crypto/tcrypt.c        |  4 ++--
 crypto/testmgr.c       | 23 +++++++++++++++++++----
 include/linux/crypto.h |  9 +++++++++
 5 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 9f15e11f5d73b..53c5149e6abf9 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -328,8 +328,16 @@ void crypto_alg_tested(const char *name, int err)
 found:
 	q->cra_flags |= CRYPTO_ALG_DEAD;
 	alg = test->adult;
-	if (err || list_empty(&alg->cra_list))
+
+	if (list_empty(&alg->cra_list))
+		goto complete;
+
+	if (err == -ECANCELED)
+		alg->cra_flags |= CRYPTO_ALG_FIPS_INTERNAL;
+	else if (err)
 		goto complete;
+	else
+		alg->cra_flags &= ~CRYPTO_ALG_FIPS_INTERNAL;
 
 	alg->cra_flags |= CRYPTO_ALG_TESTED;
 
@@ -610,6 +618,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 {
 	struct crypto_larval *larval;
 	struct crypto_spawn *spawn;
+	u32 fips_internal = 0;
 	int err;
 
 	err = crypto_check_alg(&inst->alg);
@@ -632,11 +641,15 @@ int crypto_register_instance(struct crypto_template *tmpl,
 		spawn->inst = inst;
 		spawn->registered = true;
 
+		fips_internal |= spawn->alg->cra_flags;
+
 		crypto_mod_put(spawn->alg);
 
 		spawn = next;
 	}
 
+	inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL);
+
 	larval = __crypto_register_alg(&inst->alg);
 	if (IS_ERR(larval))
 		goto unlock;
@@ -689,7 +702,8 @@ int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst,
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	alg = crypto_find_alg(name, spawn->frontend, type, mask);
+	alg = crypto_find_alg(name, spawn->frontend,
+			      type | CRYPTO_ALG_FIPS_INTERNAL, mask);
 	if (IS_ERR(alg))
 		return PTR_ERR(alg);
 
diff --git a/crypto/api.c b/crypto/api.c
index cf0869dd130b3..549f9aced1da2 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -223,6 +223,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	else if (crypto_is_test_larval(larval) &&
 		 !(alg->cra_flags & CRYPTO_ALG_TESTED))
 		alg = ERR_PTR(-EAGAIN);
+	else if (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL)
+		alg = ERR_PTR(-EAGAIN);
 	else if (!crypto_mod_get(alg))
 		alg = ERR_PTR(-EAGAIN);
 	crypto_mod_put(&larval->alg);
@@ -233,6 +235,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 					    u32 mask)
 {
+	const u32 fips = CRYPTO_ALG_FIPS_INTERNAL;
 	struct crypto_alg *alg;
 	u32 test = 0;
 
@@ -240,8 +243,20 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
 		test |= CRYPTO_ALG_TESTED;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name, type | test, mask | test);
-	if (!alg && test) {
+	alg = __crypto_alg_lookup(name, (type | test) & ~fips,
+				  (mask | test) & ~fips);
+	if (alg) {
+		if (((type | mask) ^ fips) & fips)
+			mask |= fips;
+		mask &= fips;
+
+		if (!crypto_is_larval(alg) &&
+		    ((type ^ alg->cra_flags) & mask)) {
+			/* Algorithm is disallowed in FIPS mode. */
+			crypto_mod_put(alg);
+			alg = ERR_PTR(-ENOENT);
+		}
+	} else if (test) {
 		alg = __crypto_alg_lookup(name, type, mask);
 		if (alg && !crypto_is_larval(alg)) {
 			/* Test failed */
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 2a808e843de52..2bacf8384f59f 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1473,8 +1473,8 @@ static inline int tcrypt_test(const char *alg)
 	pr_debug("testing %s\n", alg);
 
 	ret = alg_test(alg, alg, 0, 0);
-	/* non-fips algs return -EINVAL in fips mode */
-	if (fips_enabled && ret == -EINVAL)
+	/* non-fips algs return -EINVAL or -ECANCELED in fips mode */
+	if (fips_enabled && (ret == -EINVAL || ret == -ECANCELED))
 		ret = 0;
 	return ret;
 }
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 394d1952b2c59..5fabd7bbfba56 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -5650,6 +5650,13 @@ static int alg_find_test(const char *alg)
 	return -1;
 }
 
+static int alg_fips_disabled(const char *driver, const char *alg)
+{
+	pr_info("alg: %s (%s) is disabled due to FIPS\n", alg, driver);
+
+	return -ECANCELED;
+}
+
 int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 {
 	int i;
@@ -5686,9 +5693,13 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 	if (i < 0 && j < 0)
 		goto notest;
 
-	if (fips_enabled && ((i >= 0 && !alg_test_descs[i].fips_allowed) ||
-			     (j >= 0 && !alg_test_descs[j].fips_allowed)))
-		goto non_fips_alg;
+	if (fips_enabled) {
+		if (j >= 0 && !alg_test_descs[j].fips_allowed)
+			return -EINVAL;
+
+		if (i >= 0 && !alg_test_descs[i].fips_allowed)
+			goto non_fips_alg;
+	}
 
 	rc = 0;
 	if (i >= 0)
@@ -5718,9 +5729,13 @@ test_done:
 
 notest:
 	printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
+
+	if (type & CRYPTO_ALG_FIPS_INTERNAL)
+		return alg_fips_disabled(driver, alg);
+
 	return 0;
 non_fips_alg:
-	return -EINVAL;
+	return alg_fips_disabled(driver, alg);
 }
 
 #endif /* CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 855869e1fd327..2324ab6f1846b 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -132,6 +132,15 @@
  */
 #define CRYPTO_ALG_ALLOCATES_MEMORY	0x00010000
 
+/*
+ * Mark an algorithm as a service implementation only usable by a
+ * template and never by a normal user of the kernel crypto API.
+ * This is intended to be used by algorithms that are themselves
+ * not FIPS-approved but may instead be used to implement parts of
+ * a FIPS-approved algorithm (e.g., dh vs. ffdhe2048(dh)).
+ */
+#define CRYPTO_ALG_FIPS_INTERNAL	0x00020000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
GitLab


From 32f07cc40c9bb41452dc9d6c514a2012d9682b39 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:10:59 +0100
Subject: [PATCH 0935/1586] crypto: dh - disallow plain "dh" usage in FIPS mode

SP800-56Arev3, sec. 5.5.2 ("Assurance of Domain-Parameter Validity")
asserts that an implementation needs to verify domain paramtere validity,
which boils down to either
- the domain parameters corresponding to some known safe-prime group
  explicitly listed to be approved in the document or
- for parameters conforming to a "FIPS 186-type parameter-size set",
  that the implementation needs to perform an explicit domain parameter
  verification, which would require access to the "seed" and "counter"
  values used in their generation.

The latter is not easily feasible and moreover, SP800-56Arev3 states that
safe-prime groups are preferred and that FIPS 186-type parameter sets
should only be supported for backward compatibility, if it all.

Mark "dh" as not fips_allowed in testmgr. Note that the safe-prime
ffdheXYZ(dh) wrappers are not affected by this change: as these enforce
some approved safe-prime group each, their usage is still allowed in FIPS
mode.

This change will effectively render the keyctl(KEYCTL_DH_COMPUTE) syscall
unusable in FIPS mode, but it has been brought up that this might even be
a good thing ([1]).

[1] https://lore.kernel.org/r/20211217055227.GA20698@gondor.apana.org.au

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5fabd7bbfba56..2d632a285869f 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4650,7 +4650,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "dh",
 		.test = alg_test_kpp,
-		.fips_allowed = 1,
 		.suite = {
 			.kpp = __VECS(dh_tv_template)
 		}
-- 
GitLab


From 81771ff2411a4cd1224c4b16e0b4247e71bba0de Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:11:00 +0100
Subject: [PATCH 0936/1586] lib/mpi: export mpi_rshift

A subsequent patch will make the crypto/dh's dh_is_pubkey_valid() to
calculate a safe-prime groups Q parameter from P: Q = (P - 1) / 2. For
implementing this, mpi_rshift() will be needed. Export it so that it's
accessible from crypto/dh.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/mpi-bit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index 142b680835df3..070ba784c9f15 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -242,6 +242,7 @@ void mpi_rshift(MPI x, MPI a, unsigned int n)
 	}
 	MPN_NORMALIZE(x->d, x->nlimbs);
 }
+EXPORT_SYMBOL_GPL(mpi_rshift);
 
 /****************
  * Shift A by COUNT limbs to the left
-- 
GitLab


From 35d2bf20683f60288441844ab19ce671075643e6 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Mon, 21 Feb 2022 13:11:01 +0100
Subject: [PATCH 0937/1586] crypto: dh - calculate Q from P for the full public
 key verification

As the ->q in struct dh_ctx gets never set anywhere, the code in
dh_is_pubkey_valid() for doing the full public key validation in accordance
to SP800-56Arev3 is effectively dead.

However, for safe-prime groups Q = (P - 1)/2 by definition and
as the safe-prime groups are the only possible groups in FIPS mode (via
those ffdheXYZ() templates), this enables dh_is_pubkey_valid() to calculate
Q on the fly for these.
Implement this.

With this change, the last code accessing struct dh_ctx's ->q is now gone.
Remove this member from struct dh_ctx.

Signed-off-by: Nicolai Stange <nstange@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index d0d24f615b2db..cca289477485e 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -15,7 +15,6 @@
 
 struct dh_ctx {
 	MPI p;	/* Value is guaranteed to be set. */
-	MPI q;	/* Value is optional. */
 	MPI g;	/* Value is guaranteed to be set. */
 	MPI xa;	/* Value is guaranteed to be set. */
 };
@@ -23,7 +22,6 @@ struct dh_ctx {
 static void dh_clear_ctx(struct dh_ctx *ctx)
 {
 	mpi_free(ctx->p);
-	mpi_free(ctx->q);
 	mpi_free(ctx->g);
 	mpi_free(ctx->xa);
 	memset(ctx, 0, sizeof(*ctx));
@@ -99,11 +97,12 @@ err_clear_ctx:
 /*
  * SP800-56A public key verification:
  *
- * * If Q is provided as part of the domain paramenters, a full validation
- *   according to SP800-56A section 5.6.2.3.1 is performed.
+ * * For the safe-prime groups in FIPS mode, Q can be computed
+ *   trivially from P and a full validation according to SP800-56A
+ *   section 5.6.2.3.1 is performed.
  *
- * * If Q is not provided, a partial validation according to SP800-56A section
- *   5.6.2.3.2 is performed.
+ * * For all other sets of group parameters, only a partial validation
+ *   according to SP800-56A section 5.6.2.3.2 is performed.
  */
 static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
 {
@@ -114,21 +113,40 @@ static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
 	 * Step 1: Verify that 2 <= y <= p - 2.
 	 *
 	 * The upper limit check is actually y < p instead of y < p - 1
-	 * as the mpi_sub_ui function is yet missing.
+	 * in order to save one mpi_sub_ui() invocation here. Note that
+	 * p - 1 is the non-trivial element of the subgroup of order 2 and
+	 * thus, the check on y^q below would fail if y == p - 1.
 	 */
 	if (mpi_cmp_ui(y, 1) < 1 || mpi_cmp(y, ctx->p) >= 0)
 		return -EINVAL;
 
-	/* Step 2: Verify that 1 = y^q mod p */
-	if (ctx->q) {
-		MPI val = mpi_alloc(0);
+	/*
+	 * Step 2: Verify that 1 = y^q mod p
+	 *
+	 * For the safe-prime groups q = (p - 1)/2.
+	 */
+	if (fips_enabled) {
+		MPI val, q;
 		int ret;
 
+		val = mpi_alloc(0);
 		if (!val)
 			return -ENOMEM;
 
-		ret = mpi_powm(val, y, ctx->q, ctx->p);
+		q = mpi_alloc(mpi_get_nlimbs(ctx->p));
+		if (!q) {
+			mpi_free(val);
+			return -ENOMEM;
+		}
+
+		/*
+		 * ->p is odd, so no need to explicitly subtract one
+		 * from it before shifting to the right.
+		 */
+		mpi_rshift(q, ctx->p, 1);
 
+		ret = mpi_powm(val, y, q, ctx->p);
+		mpi_free(q);
 		if (ret) {
 			mpi_free(val);
 			return ret;
-- 
GitLab


From 4920a4a7262dd1c647b515c05358ccbee0ab828c Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 21 Feb 2022 14:31:18 -0800
Subject: [PATCH 0938/1586] crypto: cleanup comments

For spdx
/* */ for *.h, // for *.c
Space before spdx tag

Replacements
paramenters to parameters
aymmetric to asymmetric
sigature to signature
boudary to boundary
compliled to compiled
eninges to engines
explicity to explicitly

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/asymmetric_keys/signature.c   | 2 +-
 crypto/asymmetric_keys/x509_parser.h | 2 +-
 crypto/async_tx/async_xor.c          | 8 ++++----
 crypto/async_tx/raid6test.c          | 4 ++--
 crypto/cfb.c                         | 2 +-
 crypto/sm2.c                         | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 4aff3eebec177..2deff81f8af50 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -35,7 +35,7 @@ void public_key_signature_free(struct public_key_signature *sig)
 EXPORT_SYMBOL_GPL(public_key_signature_free);
 
 /**
- * query_asymmetric_key - Get information about an aymmetric key.
+ * query_asymmetric_key - Get information about an asymmetric key.
  * @params: Various parameters.
  * @info: Where to put the information.
  */
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index c233f136fb354..668f5c9f0b54a 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -22,7 +22,7 @@ struct x509_certificate {
 	time64_t	valid_to;
 	const void	*tbs;			/* Signed data */
 	unsigned	tbs_size;		/* Size of signed data */
-	unsigned	raw_sig_size;		/* Size of sigature */
+	unsigned	raw_sig_size;		/* Size of signature */
 	const void	*raw_sig;		/* Signature data */
 	const void	*raw_serial;		/* Raw serial number in ASN.1 */
 	unsigned	raw_serial_size;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index d8a91521144e0..1a3855284091d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -170,8 +170,8 @@ dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset,
  *
  * xor_blocks always uses the dest as a source so the
  * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation.  The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation.  The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
  * in the source list.
  *
  * src_list note: if the dest is also a source it must be at index zero.
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(async_xor_offs);
  *
  * xor_blocks always uses the dest as a source so the
  * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
- * the calculation.  The assumption with dma eninges is that they only
- * use the destination buffer as a source when it is explicity specified
+ * the calculation.  The assumption with dma engines is that they only
+ * use the destination buffer as a source when it is explicitly specified
  * in the source list.
  *
  * src_list note: if the dest is also a source it must be at index zero.
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 66db82e5a3b13..c9d218e53bcb0 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -217,7 +217,7 @@ static int raid6_test(void)
 		err += test(12, &tests);
 	}
 
-	/* the 24 disk case is special for ioatdma as it is the boudary point
+	/* the 24 disk case is special for ioatdma as it is the boundary point
 	 * at which it needs to switch from 8-source ops to 16-source
 	 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
 	 */
@@ -241,7 +241,7 @@ static void raid6_test_exit(void)
 }
 
 /* when compiled-in wait for drivers to load first (assumes dma drivers
- * are also compliled-in)
+ * are also compiled-in)
  */
 late_initcall(raid6_test);
 module_exit(raid6_test_exit);
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 0d664dfb47bcd..5c36b7b65e2aa 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -1,4 +1,4 @@
-//SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * CFB: Cipher FeedBack mode
  *
diff --git a/crypto/sm2.c b/crypto/sm2.c
index ae3f77a660703..f3e1592965c01 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SM2 asymmetric public-key algorithm
  * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and
-- 
GitLab


From 7976c1492571a5fb234c416559a0d9790855c635 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 23 Feb 2022 08:07:01 +0100
Subject: [PATCH 0939/1586] crypto: crypto_xor - use helpers for unaligned
 accesses

Dereferencing a misaligned pointer is undefined behavior in C, and may
result in codegen on architectures such as ARM that trigger alignments
traps and expensive fixups in software.

Instead, use the get_aligned()/put_aligned() accessors, which are cheap
or even completely free when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y.

In the converse case, the prior alignment checks ensure that the casts
are safe, and so no unaligned accessors are necessary.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 24 +++++++++++++++++++++---
 include/crypto/algapi.h | 10 ++++++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 53c5149e6abf9..b078e7a6e64c0 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -1022,7 +1022,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-		*(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u64 l = get_unaligned((u64 *)src1) ^
+				get_unaligned((u64 *)src2);
+			put_unaligned(l, (u64 *)dst);
+		} else {
+			*(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2;
+		}
 		dst += 8;
 		src1 += 8;
 		src2 += 8;
@@ -1030,7 +1036,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (len >= 4 && !(relalign & 3)) {
-		*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u32 l = get_unaligned((u32 *)src1) ^
+				get_unaligned((u32 *)src2);
+			put_unaligned(l, (u32 *)dst);
+		} else {
+			*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
+		}
 		dst += 4;
 		src1 += 4;
 		src2 += 4;
@@ -1038,7 +1050,13 @@ void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 	}
 
 	while (len >= 2 && !(relalign & 1)) {
-		*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
+			u16 l = get_unaligned((u16 *)src1) ^
+				get_unaligned((u16 *)src2);
+			put_unaligned(l, (u16 *)dst);
+		} else {
+			*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
+		}
 		dst += 2;
 		src1 += 2;
 		src2 += 2;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f76ec723ceaee..f50c5d1725da5 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -13,6 +13,8 @@
 #include <linux/list.h>
 #include <linux/types.h>
 
+#include <asm/unaligned.h>
+
 /*
  * Maximum values for blocksize and alignmask, used to allocate
  * static buffers that are big enough for any combination of
@@ -154,9 +156,11 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 	    (size % sizeof(unsigned long)) == 0) {
 		unsigned long *d = (unsigned long *)dst;
 		unsigned long *s = (unsigned long *)src;
+		unsigned long l;
 
 		while (size > 0) {
-			*d++ ^= *s++;
+			l = get_unaligned(d) ^ get_unaligned(s++);
+			put_unaligned(l, d++);
 			size -= sizeof(unsigned long);
 		}
 	} else {
@@ -173,9 +177,11 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
 		unsigned long *d = (unsigned long *)dst;
 		unsigned long *s1 = (unsigned long *)src1;
 		unsigned long *s2 = (unsigned long *)src2;
+		unsigned long l;
 
 		while (size > 0) {
-			*d++ = *s1++ ^ *s2++;
+			l = get_unaligned(s1++) ^ get_unaligned(s2++);
+			put_unaligned(l, d++);
 			size -= sizeof(unsigned long);
 		}
 	} else {
-- 
GitLab


From 52af29abffca9f091bb6e8f615d693b5380cbe7a Mon Sep 17 00:00:00 2001
From: Harsha <harsha.harsha@xilinx.com>
Date: Wed, 23 Feb 2022 16:05:01 +0530
Subject: [PATCH 0940/1586] crypto: xilinx - Updated Makefile for xilinx
 subdirectory

This patch updates the Makefile for xilinx subdirectory.
CONFIG_CRYPTO_DEV_ZYNQMP_AES protects zynqmp-aes-gcm.o and it is used
twice (in drivers/crypto/Makefile and drivers/crypto/xilinx/Makefile)
and it is enough to use it once.

Signed-off-by: Harsha <harsha.harsha@xilinx.com>
Reviewed-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 1fe5120eb9663..0a4fff23d272e 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
-obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/
+obj-y += xilinx/
 obj-y += hisilicon/
 obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/
 obj-y += keembay/
-- 
GitLab


From 80f940ef527efdd8e36c69f7b5ee8e07ac8891d9 Mon Sep 17 00:00:00 2001
From: Harsha <harsha.harsha@xilinx.com>
Date: Wed, 23 Feb 2022 16:05:02 +0530
Subject: [PATCH 0941/1586] firmware: xilinx: Add ZynqMP SHA API for SHA3
 functionality

This patch adds zynqmp_pm_sha_hash API in the ZynqMP firmware to compute
SHA3 hash of given data.

Signed-off-by: Harsha <harsha.harsha@xilinx.com>
Signed-off-by: Kalyani Akula <kalyani.akula@xilinx.com>
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/firmware/xilinx/zynqmp.c     | 26 ++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  8 ++++++++
 2 files changed, 34 insertions(+)

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 450c5f6a1cbf1..5e5b0bb2e4e02 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -1120,6 +1120,32 @@ int zynqmp_pm_aes_engine(const u64 address, u32 *out)
 }
 EXPORT_SYMBOL_GPL(zynqmp_pm_aes_engine);
 
+/**
+ * zynqmp_pm_sha_hash - Access the SHA engine to calculate the hash
+ * @address:	Address of the data/ Address of output buffer where
+ *		hash should be stored.
+ * @size:	Size of the data.
+ * @flags:
+ *	BIT(0) - for initializing csudma driver and SHA3(Here address
+ *		 and size inputs can be NULL).
+ *	BIT(1) - to call Sha3_Update API which can be called multiple
+ *		 times when data is not contiguous.
+ *	BIT(2) - to get final hash of the whole updated data.
+ *		 Hash will be overwritten at provided address with
+ *		 48 bytes.
+ *
+ * Return:	Returns status, either success or error code.
+ */
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags)
+{
+	u32 lower_addr = lower_32_bits(address);
+	u32 upper_addr = upper_32_bits(address);
+
+	return zynqmp_pm_invoke_fn(PM_SECURE_SHA, upper_addr, lower_addr,
+				   size, flags, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_sha_hash);
+
 /**
  * zynqmp_pm_register_notifier() - PM API for register a subsystem
  *                                to be notified about specific
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 907cb01890cf7..f6783f58c64a9 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -93,6 +93,7 @@ enum pm_api_id {
 	PM_FPGA_LOAD = 22,
 	PM_FPGA_GET_STATUS = 23,
 	PM_GET_CHIPID = 24,
+	PM_SECURE_SHA = 26,
 	PM_PINCTRL_REQUEST = 28,
 	PM_PINCTRL_RELEASE = 29,
 	PM_PINCTRL_GET_FUNCTION = 30,
@@ -427,6 +428,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
 			      const u32 qos,
 			      const enum zynqmp_pm_request_ack ack);
 int zynqmp_pm_aes_engine(const u64 address, u32 *out);
+int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags);
 int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags);
 int zynqmp_pm_fpga_get_status(u32 *value);
 int zynqmp_pm_write_ggs(u32 index, u32 value);
@@ -601,6 +603,12 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out)
 	return -ENODEV;
 }
 
+static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size,
+				     const u32 flags)
+{
+	return -ENODEV;
+}
+
 static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size,
 				      const u32 flags)
 {
-- 
GitLab


From 7ecc3e34474b7055994314ab6cff75eac7d03b71 Mon Sep 17 00:00:00 2001
From: Harsha <harsha.harsha@xilinx.com>
Date: Wed, 23 Feb 2022 16:05:03 +0530
Subject: [PATCH 0942/1586] crypto: xilinx - Add Xilinx SHA3 driver

This patch adds SHA3 driver support for the Xilinx ZynqMP SoC.
Xilinx ZynqMP SoC has SHA3 engine used for secure hash calculation.
The flow is
SHA3 request from Userspace -> SHA3 driver-> ZynqMp driver-> Firmware ->
SHA3 HW Engine

SHA3 HW engine in Xilinx ZynqMP SoC, does not support parallel processing
of 2 hash requests.
Therefore, software fallback is being used for init, update, final,
export and import in the ZynqMP SHA driver
For digest, the calculation of SHA3 hash is done by the hardened
SHA3 accelerator in Xilinx ZynqMP SoC.

Signed-off-by: Harsha <harsha.harsha@xilinx.com>
Signed-off-by: Kalyani Akula <kalyani.akula@xilinx.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig             |  10 +
 drivers/crypto/xilinx/Makefile     |   1 +
 drivers/crypto/xilinx/zynqmp-sha.c | 283 +++++++++++++++++++++++++++++
 3 files changed, 294 insertions(+)
 create mode 100644 drivers/crypto/xilinx/zynqmp-sha.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4f705674f94fa..bf4e55e730aa0 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -808,6 +808,16 @@ config CRYPTO_DEV_ZYNQMP_AES
 	  accelerator. Select this if you want to use the ZynqMP module
 	  for AES algorithms.
 
+config CRYPTO_DEV_ZYNQMP_SHA3
+	bool "Support for Xilinx ZynqMP SHA3 hardware accelerator"
+	depends on ARCH_ZYNQMP
+	select CRYPTO_SHA3
+	help
+	  Xilinx ZynqMP has SHA3 engine used for secure hash calculation.
+	  This driver interfaces with SHA3 hardware engine.
+	  Select this if you want to use the ZynqMP module
+	  for SHA3 hash computation.
+
 source "drivers/crypto/chelsio/Kconfig"
 
 source "drivers/crypto/virtio/Kconfig"
diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile
index 534e32daf76ab..730feff5b5f2f 100644
--- a/drivers/crypto/xilinx/Makefile
+++ b/drivers/crypto/xilinx/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o
+obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_SHA3) += zynqmp-sha.o
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
new file mode 100644
index 0000000000000..89549f4788bae
--- /dev/null
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP SHA Driver.
+ * Copyright (c) 2022 Xilinx Inc.
+ */
+#include <linux/cacheflush.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha3.h>
+#include <linux/crypto.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#define ZYNQMP_DMA_BIT_MASK		32U
+#define ZYNQMP_DMA_ALLOC_FIXED_SIZE	0x1000U
+
+enum zynqmp_sha_op {
+	ZYNQMP_SHA3_INIT = 1,
+	ZYNQMP_SHA3_UPDATE = 2,
+	ZYNQMP_SHA3_FINAL = 4,
+};
+
+struct zynqmp_sha_drv_ctx {
+	struct shash_alg sha3_384;
+	struct device *dev;
+};
+
+struct zynqmp_sha_tfm_ctx {
+	struct device *dev;
+	struct crypto_shash *fbk_tfm;
+};
+
+struct zynqmp_sha_desc_ctx {
+	struct shash_desc fbk_req;
+};
+
+static dma_addr_t update_dma_addr, final_dma_addr;
+static char *ubuf, *fbuf;
+
+static int zynqmp_sha_init_tfm(struct crypto_shash *hash)
+{
+	const char *fallback_driver_name = crypto_shash_alg_name(hash);
+	struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+	struct shash_alg *alg = crypto_shash_alg(hash);
+	struct crypto_shash *fallback_tfm;
+	struct zynqmp_sha_drv_ctx *drv_ctx;
+
+	drv_ctx = container_of(alg, struct zynqmp_sha_drv_ctx, sha3_384);
+	tfm_ctx->dev = drv_ctx->dev;
+
+	/* Allocate a fallback and abort if it failed. */
+	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm))
+		return PTR_ERR(fallback_tfm);
+
+	tfm_ctx->fbk_tfm = fallback_tfm;
+	hash->descsize += crypto_shash_descsize(tfm_ctx->fbk_tfm);
+
+	return 0;
+}
+
+static void zynqmp_sha_exit_tfm(struct crypto_shash *hash)
+{
+	struct zynqmp_sha_tfm_ctx *tfm_ctx = crypto_shash_ctx(hash);
+
+	if (tfm_ctx->fbk_tfm) {
+		crypto_free_shash(tfm_ctx->fbk_tfm);
+		tfm_ctx->fbk_tfm = NULL;
+	}
+
+	memzero_explicit(tfm_ctx, sizeof(struct zynqmp_sha_tfm_ctx));
+}
+
+static int zynqmp_sha_init(struct shash_desc *desc)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	dctx->fbk_req.tfm = tctx->fbk_tfm;
+	return crypto_shash_init(&dctx->fbk_req);
+}
+
+static int zynqmp_sha_update(struct shash_desc *desc, const u8 *data, unsigned int length)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_update(&dctx->fbk_req, data, length);
+}
+
+static int zynqmp_sha_final(struct shash_desc *desc, u8 *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_final(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_finup(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_finup(&dctx->fbk_req, data, length, out);
+}
+
+static int zynqmp_sha_import(struct shash_desc *desc, const void *in)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct zynqmp_sha_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	dctx->fbk_req.tfm = tctx->fbk_tfm;
+	return crypto_shash_import(&dctx->fbk_req, in);
+}
+
+static int zynqmp_sha_export(struct shash_desc *desc, void *out)
+{
+	struct zynqmp_sha_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	return crypto_shash_export(&dctx->fbk_req, out);
+}
+
+static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
+{
+	unsigned int remaining_len = len;
+	int update_size;
+	int ret;
+
+	ret = zynqmp_pm_sha_hash(0, 0, ZYNQMP_SHA3_INIT);
+	if (ret)
+		return ret;
+
+	while (remaining_len != 0) {
+		memzero_explicit(ubuf, ZYNQMP_DMA_ALLOC_FIXED_SIZE);
+		if (remaining_len >= ZYNQMP_DMA_ALLOC_FIXED_SIZE) {
+			update_size = ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+			remaining_len -= ZYNQMP_DMA_ALLOC_FIXED_SIZE;
+		} else {
+			update_size = remaining_len;
+			remaining_len = 0;
+		}
+		memcpy(ubuf, data, update_size);
+		flush_icache_range((unsigned long)ubuf, (unsigned long)ubuf + update_size);
+		ret = zynqmp_pm_sha_hash(update_dma_addr, update_size, ZYNQMP_SHA3_UPDATE);
+		if (ret)
+			return ret;
+
+		data += update_size;
+	}
+
+	ret = zynqmp_pm_sha_hash(final_dma_addr, SHA3_384_DIGEST_SIZE, ZYNQMP_SHA3_FINAL);
+	memcpy(out, fbuf, SHA3_384_DIGEST_SIZE);
+	memzero_explicit(fbuf, SHA3_384_DIGEST_SIZE);
+
+	return ret;
+}
+
+static struct zynqmp_sha_drv_ctx sha3_drv_ctx = {
+	.sha3_384 = {
+		.init = zynqmp_sha_init,
+		.update = zynqmp_sha_update,
+		.final = zynqmp_sha_final,
+		.finup = zynqmp_sha_finup,
+		.digest = zynqmp_sha_digest,
+		.export = zynqmp_sha_export,
+		.import = zynqmp_sha_import,
+		.init_tfm = zynqmp_sha_init_tfm,
+		.exit_tfm = zynqmp_sha_exit_tfm,
+		.descsize = sizeof(struct zynqmp_sha_desc_ctx),
+		.statesize = sizeof(struct sha3_state),
+		.digestsize = SHA3_384_DIGEST_SIZE,
+		.base = {
+			.cra_name = "sha3-384",
+			.cra_driver_name = "zynqmp-sha3-384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
+				     CRYPTO_ALG_ALLOCATES_MEMORY |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = SHA3_384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct zynqmp_sha_tfm_ctx),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		}
+	}
+};
+
+static int zynqmp_sha_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int err;
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
+	if (err < 0) {
+		dev_err(dev, "No usable DMA configuration\n");
+		return err;
+	}
+
+	err = crypto_register_shash(&sha3_drv_ctx.sha3_384);
+	if (err < 0) {
+		dev_err(dev, "Failed to register shash alg.\n");
+		return err;
+	}
+
+	sha3_drv_ctx.dev = dev;
+	platform_set_drvdata(pdev, &sha3_drv_ctx);
+
+	ubuf = dma_alloc_coherent(dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, &update_dma_addr, GFP_KERNEL);
+	if (!ubuf) {
+		err = -ENOMEM;
+		goto err_shash;
+	}
+
+	fbuf = dma_alloc_coherent(dev, SHA3_384_DIGEST_SIZE, &final_dma_addr, GFP_KERNEL);
+	if (!fbuf) {
+		err = -ENOMEM;
+		goto err_mem;
+	}
+
+	return 0;
+
+err_mem:
+	dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+
+err_shash:
+	crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+	return err;
+}
+
+static int zynqmp_sha_remove(struct platform_device *pdev)
+{
+	sha3_drv_ctx.dev = platform_get_drvdata(pdev);
+
+	dma_free_coherent(sha3_drv_ctx.dev, ZYNQMP_DMA_ALLOC_FIXED_SIZE, ubuf, update_dma_addr);
+	dma_free_coherent(sha3_drv_ctx.dev, SHA3_384_DIGEST_SIZE, fbuf, final_dma_addr);
+	crypto_unregister_shash(&sha3_drv_ctx.sha3_384);
+
+	return 0;
+}
+
+static struct platform_driver zynqmp_sha_driver = {
+	.probe = zynqmp_sha_probe,
+	.remove = zynqmp_sha_remove,
+	.driver = {
+		.name = "zynqmp-sha3-384",
+	},
+};
+
+static int __init sha_driver_init(void)
+{
+	struct platform_device *pdev;
+	int ret;
+
+	ret = platform_driver_register(&zynqmp_sha_driver);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple(zynqmp_sha_driver.driver.name,
+					       0, NULL, 0);
+	if (IS_ERR(pdev)) {
+		ret = PTR_ERR(pdev);
+		platform_driver_unregister(&zynqmp_sha_driver);
+		pr_info("Failed to register ZynqMP SHA3 dvixe %d\n", ret);
+	}
+
+	return ret;
+}
+
+device_initcall(sha_driver_init);
+
+static void __exit sha_driver_exit(void)
+{
+	platform_driver_unregister(&zynqmp_sha_driver);
+}
+
+MODULE_DESCRIPTION("ZynqMP SHA3 hardware acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Harsha <harsha.harsha@xilinx.com>");
-- 
GitLab


From 9578de385c783e525bfe06bd6e775f95a8759ccb Mon Sep 17 00:00:00 2001
From: Harsha <harsha.harsha@xilinx.com>
Date: Wed, 23 Feb 2022 16:05:04 +0530
Subject: [PATCH 0943/1586] MAINTAINERS: Add maintainer for Xilinx ZynqMP SHA3
 driver

This patch adds an entry for ZynqMP SHA3 driver in the list of
Maintainers.

Signed-off-by: Harsha <harsha.harsha@xilinx.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..6ee244105f5fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21229,6 +21229,11 @@ T:	git https://github.com/Xilinx/linux-xlnx.git
 F:	Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
 F:	drivers/phy/xilinx/phy-zynqmp.c
 
+XILINX ZYNQMP SHA3 DRIVER
+M:	Harsha <harsha.harsha@xilinx.com>
+S:	Maintained
+F:	drivers/crypto/xilinx/zynqmp-sha.c
+
 XILINX EVENT MANAGEMENT DRIVER
 M:	Abhyuday Godhasara <abhyuday.godhasara@xilinx.com>
 S:	Maintained
-- 
GitLab


From 647d41d3952d726d4ae49e853a9eff68ebad3b3f Mon Sep 17 00:00:00 2001
From: Petr Vorel <pvorel@suse.cz>
Date: Wed, 23 Feb 2022 16:11:15 +0100
Subject: [PATCH 0944/1586] crypto: vmx - add missing dependencies

vmx-crypto module depends on CRYPTO_AES, CRYPTO_CBC, CRYPTO_CTR or
CRYPTO_XTS, thus add them.

These dependencies are likely to be enabled, but if
CRYPTO_DEV_VMX=y && !CRYPTO_MANAGER_DISABLE_TESTS
and either of CRYPTO_AES, CRYPTO_CBC, CRYPTO_CTR or CRYPTO_XTS is built
as module or disabled, alg_test() from crypto/testmgr.c complains during
boot about failing to allocate the generic fallback implementations
(2 == ENOENT):

[    0.540953] Failed to allocate xts(aes) fallback: -2
[    0.541014] alg: skcipher: failed to allocate transform for p8_aes_xts: -2
[    0.541120] alg: self-tests for p8_aes_xts (xts(aes)) failed (rc=-2)
[    0.544440] Failed to allocate ctr(aes) fallback: -2
[    0.544497] alg: skcipher: failed to allocate transform for p8_aes_ctr: -2
[    0.544603] alg: self-tests for p8_aes_ctr (ctr(aes)) failed (rc=-2)
[    0.547992] Failed to allocate cbc(aes) fallback: -2
[    0.548052] alg: skcipher: failed to allocate transform for p8_aes_cbc: -2
[    0.548156] alg: self-tests for p8_aes_cbc (cbc(aes)) failed (rc=-2)
[    0.550745] Failed to allocate transformation for 'aes': -2
[    0.550801] alg: cipher: Failed to load transform for p8_aes: -2
[    0.550892] alg: self-tests for p8_aes (aes) failed (rc=-2)

Fixes: c07f5d3da643 ("crypto: vmx - Adding support for XTS")
Fixes: d2e3ae6f3aba ("crypto: vmx - Enabling VMX module for PPC64")

Suggested-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/vmx/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
index c85fab7ef0bdd..b2c28b87f14b3 100644
--- a/drivers/crypto/vmx/Kconfig
+++ b/drivers/crypto/vmx/Kconfig
@@ -2,7 +2,11 @@
 config CRYPTO_DEV_VMX_ENCRYPT
 	tristate "Encryption acceleration support on P8 CPU"
 	depends on CRYPTO_DEV_VMX
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_CTR
 	select CRYPTO_GHASH
+	select CRYPTO_XTS
 	default m
 	help
 	  Support for VMX cryptographic acceleration instructions on Power8 CPU.
-- 
GitLab


From 959e375464912a32b9d971642d933ae71b9f0267 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Feb 2022 18:26:20 +0200
Subject: [PATCH 0945/1586] crypto: cavium/nitrox - don't cast parameter in bit
 operations

While in this particular case it would not be a (critical) issue,
the pattern itself is bad and error prone in case the location
of the parameter is changed.

Don't cast parameter to unsigned long pointer in the bit operations.
Instead copy to a local variable on stack of a proper type and use.

Fixes: cf718eaa8f9b ("crypto: cavium/nitrox - Enabled Mailbox support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_mbx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 2e9c0d2143632..9e7308e39b304 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bitmap.h>
 #include <linux/workqueue.h>
 
 #include "nitrox_csr.h"
@@ -120,6 +121,7 @@ static void pf2vf_resp_handler(struct work_struct *work)
 
 void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 {
+	DECLARE_BITMAP(csr, BITS_PER_TYPE(u64));
 	struct nitrox_vfdev *vfdev;
 	struct pf2vf_work *pfwork;
 	u64 value, reg_addr;
@@ -129,7 +131,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 	/* loop for VF(0..63) */
 	reg_addr = NPS_PKT_MBOX_INT_LO;
 	value = nitrox_read_csr(ndev, reg_addr);
-	for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+	bitmap_from_u64(csr, value);
+	for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
 		/* get the vfno from ring */
 		vfno = RING_TO_VFNO(i, ndev->iov.max_vf_queues);
 		vfdev = ndev->iov.vfdev + vfno;
@@ -151,7 +154,8 @@ void nitrox_pf2vf_mbox_handler(struct nitrox_device *ndev)
 	/* loop for VF(64..127) */
 	reg_addr = NPS_PKT_MBOX_INT_HI;
 	value = nitrox_read_csr(ndev, reg_addr);
-	for_each_set_bit(i, (const unsigned long *)&value, BITS_PER_LONG) {
+	bitmap_from_u64(csr, value);
+	for_each_set_bit(i, csr, BITS_PER_TYPE(csr)) {
 		/* get the vfno from ring */
 		vfno = RING_TO_VFNO(i + 64, ndev->iov.max_vf_queues);
 		vfdev = ndev->iov.vfdev + vfno;
-- 
GitLab


From e6205ad58a7ac194abfb33897585b38687d797fa Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Fri, 25 Feb 2022 14:38:59 +0800
Subject: [PATCH 0946/1586] hwrng: cavium - fix NULL but dereferenced
 coccicheck error

Fix following coccicheck warning:
./drivers/char/hw_random/cavium-rng-vf.c:182:17-20: ERROR:
pdev is NULL but dereferenced.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cavium-rng-vf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 6f66919652bf5..7c55f4cf4a8ba 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -179,7 +179,7 @@ static int cavium_map_pf_regs(struct cavium_rng *rng)
 	pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
 			      PCI_DEVID_CAVIUM_RNG_PF, NULL);
 	if (!pdev) {
-		dev_err(&pdev->dev, "Cannot find RNG PF device\n");
+		pr_err("Cannot find RNG PF device\n");
 		return -EIO;
 	}
 
-- 
GitLab


From 280ee3c3aaa8158e69db5453d8ffd11fce49979c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 25 Feb 2022 15:27:24 +0800
Subject: [PATCH 0947/1586] crypto: octeontx2 - fix missing unlock

Add the missing unlock before return from error path.

Fixes: 4363f3d3ce8f ("crypto: octeontx2 - add synchronization between mailbox accesses")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c | 4 +++-
 drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 17a9dd20c8c35..a402ccfac5577 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -143,8 +143,10 @@ static void cptpf_flr_wq_handler(struct work_struct *work)
 	mutex_lock(&pf->lock);
 	req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
 				      sizeof(struct msg_rsp));
-	if (!req)
+	if (!req) {
+		mutex_unlock(&pf->lock);
 		return;
+	}
 
 	req->sig = OTX2_MBOX_REQ_SIG;
 	req->id = MBOX_MSG_VF_FLR;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
index fee758b86d29f..dee0aa60b6985 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c
@@ -20,8 +20,10 @@ static int forward_to_af(struct otx2_cptpf_dev *cptpf,
 
 	mutex_lock(&cptpf->lock);
 	msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size);
-	if (msg == NULL)
+	if (msg == NULL) {
+		mutex_unlock(&cptpf->lock);
 		return -ENOMEM;
+	}
 
 	memcpy((uint8_t *)msg + sizeof(struct mbox_msghdr),
 	       (uint8_t *)req + sizeof(struct mbox_msghdr), size);
-- 
GitLab


From b3998b3bc658017dc36c69a8224fb11a3d1b1382 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Mon, 21 Feb 2022 13:26:15 +0530
Subject: [PATCH 0948/1586] ext4: improve fast_commit performance and
 scalability

Currently ext4_fc_commit_dentry_updates() is of quadratic time
complexity, which is causing performance bottlenecks with high
threads/file/dir count with fs_mark.

This patch makes commit dentry updates (and hence ext4_fc_commit()) path
to linear time complexity. Hence improves the performance of workloads
which does fsync on multiple threads/open files one-by-one.

Absolute numbers in avg file creates per sec (from fs_mark in 1K order)
=======================================================================
no.     Order   without-patch(K)   with-patch(K)   Diff(%)
1       1        16.90              17.51           +3.60
2       2,2      32.08              31.80           -0.87
3       3,3      53.97              55.01           +1.92
4       4,4      78.94              76.90           -2.58
5       5,5      95.82              95.37           -0.46
6       6,6      87.92              103.38          +17.58
7       6,10      0.73              126.13          +17178.08
8       6,14      2.33              143.19          +6045.49

workload type
==============
For e.g. 7th row order of 6,10 (2^6 == 64 && 2^10 == 1024)
echo /run/riteshh/mnt/{1..64} |sed -E 's/[[:space:]]+/ -d /g' \
  | xargs -I {} bash -c "sudo fs_mark -L 100 -D 1024 -n 1024 -s0 -S5 -d {}"

Perf profile
(w/o patches)
=============================
87.15%  [kernel]  [k] ext4_fc_commit           --> Heavy contention/bottleneck
 1.98%  [kernel]  [k] perf_event_interrupt
 0.96%  [kernel]  [k] power_pmu_enable
 0.91%  [kernel]  [k] update_sd_lb_stats.constprop.0
 0.67%  [kernel]  [k] ktime_get

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/930f35d4fd5f83e2673c868781d9ebf15e91bf4e.1645426817.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h        |  2 ++
 fs/ext4/fast_commit.c | 74 ++++++++++++++++++++++++++++++++-----------
 fs/ext4/fast_commit.h |  1 +
 3 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0d4f284c0514e..3f87cca49f0ce 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1046,6 +1046,8 @@ struct ext4_inode_info {
 
 	/* Fast commit related info */
 
+	/* For tracking dentry create updates */
+	struct list_head i_fc_dilist;
 	struct list_head i_fc_list;	/*
 					 * inodes that need fast commit
 					 * protected by sbi->s_fc_lock.
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 3c5baca38767b..5ac594e034024 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -199,6 +199,7 @@ void ext4_fc_init_inode(struct inode *inode)
 	ext4_fc_reset_inode(inode);
 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
 	INIT_LIST_HEAD(&ei->i_fc_list);
+	INIT_LIST_HEAD(&ei->i_fc_dilist);
 	init_waitqueue_head(&ei->i_fc_wait);
 	atomic_set(&ei->i_fc_updates, 0);
 }
@@ -279,6 +280,8 @@ void ext4_fc_stop_update(struct inode *inode)
 void ext4_fc_del(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_fc_dentry_update *fc_dentry;
 
 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
@@ -286,7 +289,7 @@ void ext4_fc_del(struct inode *inode)
 
 restart:
 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-	if (list_empty(&ei->i_fc_list)) {
+	if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 		return;
 	}
@@ -295,8 +298,33 @@ restart:
 		ext4_fc_wait_committing_inode(inode);
 		goto restart;
 	}
-	list_del_init(&ei->i_fc_list);
-	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+
+	if (!list_empty(&ei->i_fc_list))
+		list_del_init(&ei->i_fc_list);
+
+	/*
+	 * Since this inode is getting removed, let's also remove all FC
+	 * dentry create references, since it is not needed to log it anyways.
+	 */
+	if (list_empty(&ei->i_fc_dilist)) {
+		spin_unlock(&sbi->s_fc_lock);
+		return;
+	}
+
+	fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
+	WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
+	list_del_init(&fc_dentry->fcd_list);
+	list_del_init(&fc_dentry->fcd_dilist);
+
+	WARN_ON(!list_empty(&ei->i_fc_dilist));
+	spin_unlock(&sbi->s_fc_lock);
+
+	if (fc_dentry->fcd_name.name &&
+		fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
+		kfree(fc_dentry->fcd_name.name);
+	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
+
+	return;
 }
 
 /*
@@ -427,7 +455,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
 		node->fcd_name.name = node->fcd_iname;
 	}
 	node->fcd_name.len = dentry->d_name.len;
-
+	INIT_LIST_HEAD(&node->fcd_dilist);
 	spin_lock(&sbi->s_fc_lock);
 	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
 		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
@@ -435,6 +463,20 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
 	else
 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
+
+	/*
+	 * This helps us keep a track of all fc_dentry updates which is part of
+	 * this ext4 inode. So in case the inode is getting unlinked, before
+	 * even we get a chance to fsync, we could remove all fc_dentry
+	 * references while evicting the inode in ext4_fc_del().
+	 * Also with this, we don't need to loop over all the inodes in
+	 * sbi->s_fc_q to get the corresponding inode in
+	 * ext4_fc_commit_dentry_updates().
+	 */
+	if (dentry_update->op == EXT4_FC_TAG_CREAT) {
+		WARN_ON(!list_empty(&ei->i_fc_dilist));
+		list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
+	}
 	spin_unlock(&sbi->s_fc_lock);
 	mutex_lock(&ei->i_fc_lock);
 
@@ -954,7 +996,7 @@ __releases(&sbi->s_fc_lock)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
 	struct inode *inode;
-	struct ext4_inode_info *ei, *ei_n;
+	struct ext4_inode_info *ei;
 	int ret;
 
 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
@@ -970,21 +1012,16 @@ __releases(&sbi->s_fc_lock)
 			spin_lock(&sbi->s_fc_lock);
 			continue;
 		}
-
-		inode = NULL;
-		list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
-					 i_fc_list) {
-			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
-				inode = &ei->vfs_inode;
-				break;
-			}
-		}
 		/*
-		 * If we don't find inode in our list, then it was deleted,
-		 * in which case, we don't need to record it's create tag.
+		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
+		 * corresponding inode pointer
 		 */
-		if (!inode)
-			continue;
+		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
+		ei = list_first_entry(&fc_dentry->fcd_dilist,
+				struct ext4_inode_info, i_fc_dilist);
+		inode = &ei->vfs_inode;
+		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
+
 		spin_unlock(&sbi->s_fc_lock);
 
 		/*
@@ -1228,6 +1265,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
 					     struct ext4_fc_dentry_update,
 					     fcd_list);
 		list_del_init(&fc_dentry->fcd_list);
+		list_del_init(&fc_dentry->fcd_dilist);
 		spin_unlock(&sbi->s_fc_lock);
 
 		if (fc_dentry->fcd_name.name &&
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 083ad1cb705a7..02afa52e8e41b 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -109,6 +109,7 @@ struct ext4_fc_dentry_update {
 	struct qstr fcd_name;	/* Dirent name */
 	unsigned char fcd_iname[DNAME_INLINE_LEN];	/* Dirent name string */
 	struct list_head fcd_list;
+	struct list_head fcd_dilist;
 };
 
 struct ext4_fc_stats {
-- 
GitLab


From a861fb9fa51da7b1957f612b742ce62a95591628 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Sun, 27 Feb 2022 19:15:20 -0800
Subject: [PATCH 0949/1586] ext4: use time_is_before_jiffies() instead of open
 coding it

Use the helper function time_is_{before,after}_jiffies() to improve
code readability.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Link: https://lore.kernel.org/r/1646018120-61462-1-git-send-email-wangqing@vivo.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/resize.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f507f34be602f..90a941d20dfff 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -14,6 +14,7 @@
 
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/jiffies.h>
 
 #include "ext4_jbd2.h"
 
@@ -2100,7 +2101,7 @@ retry:
 	 */
 	while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
 					      flexbg_size)) {
-		if (jiffies - last_update_time > HZ * 10) {
+		if (time_is_before_jiffies(last_update_time + HZ * 10)) {
 			if (last_update_time)
 				ext4_msg(sb, KERN_INFO,
 					 "resized to %llu blocks",
-- 
GitLab


From 7aab5c84a0f6ec2290e2ba4a6b245178b1bf949a Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Mon, 28 Feb 2022 10:48:15 +0800
Subject: [PATCH 0950/1586] ext4: fix fs corruption when tring to remove a
 non-empty directory with IO error

We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda  test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
	[  110.920551] ext4_empty_dir: inject fault
	[  110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
	comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
	e2fsck 1.42.9 (28-Dec-2013)
	Pass 1: Checking inodes, blocks, and sizes
	Pass 2: Checking directory structure
	Entry '..' in .../??? (13) has deleted/unused inode 12.  Clear<y>? yes
	Pass 3: Checking directory connectivity
	Unconnected directory inode 13 (...)
	Connect to /lost+found<y>? yes
	Pass 4: Checking reference counts
	Inode 13 ref count is 3, should be 2.  Fix<y>? yes
	Pass 5: Checking group summary information

	/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
	/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks

ext4_rmdir
	if (!ext4_empty_dir(inode))
		goto end_rmdir;
ext4_empty_dir
	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
	if (IS_ERR(bh))
		return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inline.c |  9 ++++-----
 fs/ext4/namei.c  | 10 +++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index e429418036050..9c076262770d9 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1783,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 	void *inline_pos;
 	unsigned int offset;
 	struct ext4_dir_entry_2 *de;
-	bool ret = true;
+	bool ret = false;
 
 	err = ext4_get_inode_loc(dir, &iloc);
 	if (err) {
 		EXT4_ERROR_INODE_ERR(dir, -err,
 				     "error %d getting inode %lu block",
 				     err, dir->i_ino);
-		return true;
+		return false;
 	}
 
 	down_read(&EXT4_I(dir)->xattr_sem);
 	if (!ext4_has_inline_data(dir)) {
 		*has_inline_data = 0;
+		ret = true;
 		goto out;
 	}
 
@@ -1804,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 		ext4_warning(dir->i_sb,
 			     "bad inline directory (dir #%lu) - no `..'",
 			     dir->i_ino);
-		ret = true;
 		goto out;
 	}
 
@@ -1823,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 				     dir->i_ino, le32_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len), de->name_len,
 				     inline_size);
-			ret = true;
 			goto out;
 		}
 		if (le32_to_cpu(de->inode)) {
-			ret = false;
 			goto out;
 		}
 		offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
 	}
 
+	ret = true;
 out:
 	up_read(&EXT4_I(dir)->xattr_sem);
 	brelse(iloc.bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8cf0a924a49bf..39e223f7bf64d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode)
 	if (inode->i_size < ext4_dir_rec_len(1, NULL) +
 					ext4_dir_rec_len(2, NULL)) {
 		EXT4_ERROR_INODE(inode, "invalid size");
-		return true;
+		return false;
 	}
 	/* The first directory block must not be a hole,
 	 * so treat it as DIRENT_HTREE
 	 */
 	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
 	if (IS_ERR(bh))
-		return true;
+		return false;
 
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
@@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode)
 	    le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
 		ext4_warning_inode(inode, "directory missing '.'");
 		brelse(bh);
-		return true;
+		return false;
 	}
 	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
 	de = ext4_next_entry(de, sb->s_blocksize);
@@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode)
 	    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
 		ext4_warning_inode(inode, "directory missing '..'");
 		brelse(bh);
-		return true;
+		return false;
 	}
 	offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
 	while (offset < inode->i_size) {
@@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode)
 				continue;
 			}
 			if (IS_ERR(bh))
-				return true;
+				return false;
 		}
 		de = (struct ext4_dir_entry_2 *) (bh->b_data +
 					(offset & (sb->s_blocksize - 1)));
-- 
GitLab


From fd9b6fad667c7516a130cf0f3caddcb965710c12 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 1 Mar 2022 17:21:36 +0800
Subject: [PATCH 0951/1586] ext4: fix ext4_mb_clear_bb() kernel-doc comment

Remove the excess description of @bh in ext4_mb_clear_bb() kernel-doc
comment to remove warnings found by running scripts/kernel-doc, which
is caused by using 'make W=1'.

fs/ext4/mballoc.c:5895: warning: Excess function parameter 'bh'
description in 'ext4_mb_clear_bb'

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220301092136.34764-1-yang.lee@linux.alibaba.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5f0bc6d0aabe0..3546fa27d0034 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5884,7 +5884,6 @@ static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
  *			Used by ext4_free_blocks()
  * @handle:		handle for this transaction
  * @inode:		inode
- * @bh:			optional buffer of the block to be freed
  * @block:		starting physical block to be freed
  * @count:		number of blocks to be freed
  * @flags:		flags used by ext4_free_blocks
-- 
GitLab


From 5c93e8ecd5bd3bfdee013b6da0850357eb6ca4d8 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Tue, 1 Mar 2022 19:17:04 +0800
Subject: [PATCH 0952/1586] ext4: fix underflow in ext4_max_bitmap_size()

when ext4 filesystem is created with 64k block size, ^extent and
^huge_file features. the upper_limit would underflow during the
computations in ext4_max_bitmap_size(). The problem is the size of block
index tree for such large block size is more than i_blocks can carry.
So fix the computation to count with this possibility. After this fix,
the 'res' cannot overflow loff_t on the extreme case of filesystem with
huge_files and 64K block size, so this patch also revert commit
75ca6ad408f4 ("ext4: fix loff_t overflow in ext4_max_bitmap_size()").

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220301111704.2153829-1-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1fe91a26b88c7..1e5f4994fe576 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3481,8 +3481,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
  */
 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 {
-	unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
+	loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
 	int meta_blocks;
+	unsigned int ppb = 1 << (bits - 2);
 
 	/*
 	 * This is calculated to be the largest file size for a dense, block
@@ -3514,27 +3515,42 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 
 	}
 
+	/* Compute how many blocks we can address by block tree */
+	res += ppb;
+	res += ppb * ppb;
+	res += ((loff_t)ppb) * ppb * ppb;
+	/* Compute how many metadata blocks are needed */
+	meta_blocks = 1;
+	meta_blocks += 1 + ppb;
+	meta_blocks += 1 + ppb + ppb * ppb;
+	/* Does block tree limit file size? */
+	if (res + meta_blocks <= upper_limit)
+		goto check_lfs;
+
+	res = upper_limit;
+	/* How many metadata blocks are needed for addressing upper_limit? */
+	upper_limit -= EXT4_NDIR_BLOCKS;
 	/* indirect blocks */
 	meta_blocks = 1;
+	upper_limit -= ppb;
 	/* double indirect blocks */
-	meta_blocks += 1 + (1LL << (bits-2));
-	/* tripple indirect blocks */
-	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
-	upper_limit -= meta_blocks;
-	upper_limit <<= bits;
-
-	res += 1LL << (bits-2);
-	res += 1LL << (2*(bits-2));
-	res += 1LL << (3*(bits-2));
+	if (upper_limit < ppb * ppb) {
+		meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
+		res -= meta_blocks;
+		goto check_lfs;
+	}
+	meta_blocks += 1 + ppb;
+	upper_limit -= ppb * ppb;
+	/* tripple indirect blocks for the rest */
+	meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
+		DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
+	res -= meta_blocks;
+check_lfs:
 	res <<= bits;
-	if (res > upper_limit)
-		res = upper_limit;
-
 	if (res > MAX_LFS_FILESIZE)
 		res = MAX_LFS_FILESIZE;
 
-	return (loff_t)res;
+	return res;
 }
 
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-- 
GitLab


From 6b71b69dd98b197deea0806417e88308e0876a10 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Mar 2022 12:16:44 +0000
Subject: [PATCH 0953/1586] ext4: remove redundant assignment to variable
 split_flag1

Variable split_flag1 is being assigned a value that is never read,
it is being re-assigned a new value in the following code block.
The assignment is redundant and can be removed.

Cleans up clang scan build warning:
fs/ext4/extents.c:3371:2: warning: Value stored to 'split_flag1' is
never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20220301121644.997833-1-colin.i.king@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c0f3f83e0c1b1..0d98cf402282c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3368,7 +3368,6 @@ static int ext4_split_extent(handle_t *handle,
 		return -EFSCORRUPTED;
 	}
 	unwritten = ext4_ext_is_unwritten(ex);
-	split_flag1 = 0;
 
 	if (map->m_lblk >= ee_block) {
 		split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
-- 
GitLab


From dc8fea13f98ace0ae8815dd44d1e60c184f3f930 Mon Sep 17 00:00:00 2001
From: "Minghao Chi (CGEL ZTE)" <chi.minghao@zte.com.cn>
Date: Thu, 3 Mar 2022 09:21:31 +0000
Subject: [PATCH 0954/1586] spi: Use of_device_get_match_data()

Use of_device_get_match_data() to simplify the code.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi (CGEL ZTE) <chi.minghao@zte.com.cn>
Link: https://lore.kernel.org/r/20220303092131.2060044-1-chi.minghao@zte.com.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-npcm-fiu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index b62471ab6d7f2..71ed75bf0a20b 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -671,7 +671,6 @@ static const struct of_device_id npcm_fiu_dt_ids[] = {
 static int npcm_fiu_probe(struct platform_device *pdev)
 {
 	const struct fiu_data *fiu_data_match;
-	const struct of_device_id *match;
 	struct device *dev = &pdev->dev;
 	struct spi_controller *ctrl;
 	struct npcm_fiu_spi *fiu;
@@ -685,13 +684,12 @@ static int npcm_fiu_probe(struct platform_device *pdev)
 
 	fiu = spi_controller_get_devdata(ctrl);
 
-	match = of_match_device(npcm_fiu_dt_ids, dev);
-	if (!match || !match->data) {
+	fiu_data_match = of_device_get_match_data(dev);
+	if (!fiu_data_match) {
 		dev_err(dev, "No compatible OF match\n");
 		return -ENODEV;
 	}
 
-	fiu_data_match = match->data;
 	id = of_alias_get_id(dev->of_node, "fiu");
 	if (id < 0 || id >= fiu_data_match->fiu_max) {
 		dev_err(dev, "Invalid platform device id: %d\n", id);
-- 
GitLab


From cc5095747edfb054ca2068d01af20be3fcc3634f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 3 Mar 2022 09:38:47 -0500
Subject: [PATCH 0955/1586] ext4: don't BUG if someone dirty pages without
 asking ext4 first

[un]pin_user_pages_remote is dirtying pages without properly warning
the file system in advance.  A related race was noted by Jan Kara in
2018[1]; however, more recently instead of it being a very hard-to-hit
race, it could be reliably triggered by process_vm_writev(2) which was
discovered by Syzbot[2].

This is technically a bug in mm/gup.c, but arguably ext4 is fragile in
that if some other kernel subsystem dirty pages without properly
notifying the file system using page_mkwrite(), ext4 will BUG, while
other file systems will not BUG (although data will still be lost).

So instead of crashing with a BUG, issue a warning (since there may be
potential data loss) and just mark the page as clean to avoid
unprivileged denial of service attacks until the problem can be
properly fixed.  More discussion and background can be found in the
thread starting at [2].

[1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
[2] https://lore.kernel.org/r/Yg0m6IjcNmfaSokM@google.com

Reported-by: syzbot+d59332e2db681cf18f0318a06e994ebbb529a8db@syzkaller.appspotmail.com
Reported-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Link: https://lore.kernel.org/r/YiDS9wVfq4mM2jGK@mit.edu
---
 fs/ext4/inode.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 01c9e4f743ba9..531a94f48637c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1993,6 +1993,15 @@ static int ext4_writepage(struct page *page,
 	else
 		len = PAGE_SIZE;
 
+	/* Should never happen but for bugs in other kernel subsystems */
+	if (!page_has_buffers(page)) {
+		ext4_warning_inode(inode,
+		   "page %lu does not have buffers attached", page->index);
+		ClearPageDirty(page);
+		unlock_page(page);
+		return 0;
+	}
+
 	page_bufs = page_buffers(page);
 	/*
 	 * We cannot do block allocation or other extent handling in this
@@ -2594,6 +2603,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 			wait_on_page_writeback(page);
 			BUG_ON(PageWriteback(page));
 
+			/*
+			 * Should never happen but for buggy code in
+			 * other subsystems that call
+			 * set_page_dirty() without properly warning
+			 * the file system first.  See [1] for more
+			 * information.
+			 *
+			 * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
+			 */
+			if (!page_has_buffers(page)) {
+				ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
+				ClearPageDirty(page);
+				unlock_page(page);
+				continue;
+			}
+
 			if (mpd->map.m_len == 0)
 				mpd->first_page = page->index;
 			mpd->next_page = page->index + 1;
-- 
GitLab


From b4fa042e92e17f243bdfa2c53e3cd4c8b3dfb56c Mon Sep 17 00:00:00 2001
From: Agathe Porte <agathe.porte@nokia.com>
Date: Tue, 22 Feb 2022 14:36:09 -0800
Subject: [PATCH 0956/1586] dt-bindings: hwmon: add tmp464.yaml

Add basic description of the tmp464 driver DT bindings.

Signed-off-by: Agathe Porte <agathe.porte@nokia.com>
Cc: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220222223610.23098-1-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 .../devicetree/bindings/hwmon/ti,tmp464.yaml  | 114 ++++++++++++++++++
 MAINTAINERS                                   |   7 ++
 2 files changed, 121 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml

diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
new file mode 100644
index 0000000000000..801ca9ba7d344
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/hwmon/ti,tmp464.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TMP464 and TMP468 temperature sensors
+
+maintainers:
+  - Agathe Porte <agathe.porte@nokia.com>
+
+description: |
+  ±0.0625°C Remote and Local temperature sensor
+  https://www.ti.com/lit/ds/symlink/tmp464.pdf
+  https://www.ti.com/lit/ds/symlink/tmp468.pdf
+
+properties:
+  compatible:
+    enum:
+      - ti,tmp464
+      - ti,tmp468
+
+  reg:
+    maxItems: 1
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+patternProperties:
+  "^channel@([0-8])$":
+    type: object
+    description: |
+      Represents channels of the device and their specific configuration.
+
+    properties:
+      reg:
+        description: |
+          The channel number. 0 is local channel, 1-8 are remote channels.
+        items:
+          minimum: 0
+          maximum: 8
+
+      label:
+        description: |
+          A descriptive name for this channel, like "ambient" or "psu".
+
+      ti,n-factor:
+        description: |
+          The value (two's complement) to be programmed in the channel specific N correction register.
+          For remote channels only.
+        $ref: /schemas/types.yaml#/definitions/int32
+        items:
+          minimum: -128
+          maximum: 127
+
+    required:
+      - reg
+
+    additionalProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4b {
+        compatible = "ti,tmp464";
+        reg = <0x4b>;
+      };
+    };
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      sensor@4b {
+        compatible = "ti,tmp464";
+        reg = <0x4b>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        channel@0 {
+          reg = <0x0>;
+          label = "local";
+        };
+
+        channel@1 {
+          reg = <0x1>;
+          ti,n-factor = <(-10)>;
+          label = "external";
+        };
+
+        channel@2 {
+          reg = <0x2>;
+          ti,n-factor = <0x10>;
+          label = "somelabel";
+        };
+
+        channel@3 {
+          reg = <0x3>;
+          status = "disabled";
+        };
+      };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 27a87a6646357..3766875f4f2ae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19496,6 +19496,13 @@ S:	Maintained
 F:	Documentation/hwmon/tmp401.rst
 F:	drivers/hwmon/tmp401.c
 
+TMP464 HARDWARE MONITOR DRIVER
+M:	Agathe Porte <agathe.porte@nokia.com>
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
+
 TMP513 HARDWARE MONITOR DRIVER
 M:	Eric Tremblay <etremblay@distech-controls.com>
 L:	linux-hwmon@vger.kernel.org
-- 
GitLab


From 007e433cf0373334a2bef1b0c9831647184906ba Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 22 Feb 2022 14:36:10 -0800
Subject: [PATCH 0957/1586] hwmon: Add driver for Texas Instruments TMP464 and
 TMP468

Add support for Texas Instruments TMP464 and TMP468 temperature sensor
ICs.

TI's TMP464 is an I2C temperature sensor chip. This chip is similar
to TI's TMP421 chip, but with 16bit-wide registers (instead of
8bit-wide registers). The chip has one local sensor and four remote
sensors. TMP468 is similar to TMP464 but has one local and eight
remote sensors.

Originally-from: Agathe Porte <agathe.porte@nokia.com>
Cc: Agathe Porte <agathe.porte@nokia.com>
Cc: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Tested-by: Agathe Porte <agathe.porte@nokia.com>
Link: https://lore.kernel.org/r/20220222223610.23098-2-linux@roeck-us.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst  |   1 +
 Documentation/hwmon/tmp464.rst |  73 ++++
 MAINTAINERS                    |   2 +
 drivers/hwmon/Kconfig          |  11 +
 drivers/hwmon/Makefile         |   1 +
 drivers/hwmon/tmp464.c         | 712 +++++++++++++++++++++++++++++++++
 6 files changed, 800 insertions(+)
 create mode 100644 Documentation/hwmon/tmp464.rst
 create mode 100644 drivers/hwmon/tmp464.c

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 075d1defcd204..9d2787a12a693 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -195,6 +195,7 @@ Hardware Monitoring Kernel Drivers
    tmp108
    tmp401
    tmp421
+   tmp464
    tmp513
    tps23861
    tps40422
diff --git a/Documentation/hwmon/tmp464.rst b/Documentation/hwmon/tmp464.rst
new file mode 100644
index 0000000000000..7596e7623d060
--- /dev/null
+++ b/Documentation/hwmon/tmp464.rst
@@ -0,0 +1,73 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver tmp464
+====================
+
+Supported chips:
+
+  * Texas Instruments TMP464
+
+    Prefix: 'tmp464'
+
+    Addresses scanned: I2C 0x48, 0x49, 0x4a and 0x4b
+
+    Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp464.html
+
+  * Texas Instruments TMP468
+
+    Prefix: 'tmp468'
+
+    Addresses scanned: I2C 0x48, 0x49, 0x4a and 0x4b
+
+    Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp468.html
+
+Authors:
+
+	Agathe Porte <agathe.porte@nokia.com>
+	Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for Texas Instruments TMP464 and TMP468
+temperature sensor chips. TMP464 provides one local and four remote
+sensors. TMP468 provides one local and eight remote sensors.
+Temperature is measured in degrees Celsius. The chips are wired over
+I2C/SMBus and specified over a temperature range of -40 to +125 degrees
+Celsius. Resolution for both the local and remote channels is 0.0625
+degree C.
+
+The chips support only temperature measurements. The driver exports
+temperature values, limits, and alarms via the following sysfs files:
+
+**temp[1-9]_input**
+
+**temp[1-9]_max**
+
+**temp[1-9]_max_hyst**
+
+**temp[1-9]_max_alarm**
+
+**temp[1-9]_crit**
+
+**temp[1-9]_crit_alarm**
+
+**temp[1-9]_crit_hyst**
+
+**temp[2-9]_offset**
+
+**temp[2-9]_fault**
+
+Each sensor can be individually disabled via Devicetree or from sysfs
+via:
+
+**temp[1-9]_enable**
+
+If labels were specified in Devicetree, additional sysfs files will
+be present:
+
+**temp[1-9]_label**
+
+The update interval is configurable with the following sysfs attribute.
+
+**update_interval**
diff --git a/MAINTAINERS b/MAINTAINERS
index 3766875f4f2ae..a9c9ee378f93e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19502,6 +19502,8 @@ M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/hwmon/ti,tmp464.yaml
+F:	Documentation/hwmon/tmp464.rst
+F:	drivers/hwmon/tmp464.c
 
 TMP513 HARDWARE MONITOR DRIVER
 M:	Eric Tremblay <etremblay@distech-controls.com>
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 463d5b110a85f..9ab4e9b3d27b6 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1996,6 +1996,17 @@ config SENSORS_TMP421
 	  This driver can also be built as a module. If so, the module
 	  will be called tmp421.
 
+config SENSORS_TMP464
+	tristate "Texas Instruments TMP464 and compatible"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  If you say yes here you get support for Texas Instruments TMP464
+	  and TMP468 temperature sensor chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called tmp464.
+
 config SENSORS_TMP513
 	tristate "Texas Instruments TMP513 and compatibles"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 7bba3415ca3df..4ed138d0621f9 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -195,6 +195,7 @@ obj-$(CONFIG_SENSORS_TMP103)	+= tmp103.o
 obj-$(CONFIG_SENSORS_TMP108)	+= tmp108.o
 obj-$(CONFIG_SENSORS_TMP401)	+= tmp401.o
 obj-$(CONFIG_SENSORS_TMP421)	+= tmp421.o
+obj-$(CONFIG_SENSORS_TMP464)	+= tmp464.o
 obj-$(CONFIG_SENSORS_TMP513)	+= tmp513.o
 obj-$(CONFIG_SENSORS_VEXPRESS)	+= vexpress-hwmon.o
 obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
diff --git a/drivers/hwmon/tmp464.c b/drivers/hwmon/tmp464.c
new file mode 100644
index 0000000000000..7814f39bd1a37
--- /dev/null
+++ b/drivers/hwmon/tmp464.c
@@ -0,0 +1,712 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Driver for the Texas Instruments TMP464 SMBus temperature sensor IC.
+ * Supported models: TMP464, TMP468
+
+ * Copyright (C) 2022 Agathe Porte <agathe.porte@nokia.com>
+ * Preliminary support by:
+ * Lionel Pouliquen <lionel.lp.pouliquen@nokia.com>
+ */
+
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END };
+
+#define TMP464_NUM_CHANNELS		5	/* chan 0 is internal, 1-4 are remote */
+#define TMP468_NUM_CHANNELS		9	/* chan 0 is internal, 1-8 are remote */
+
+#define MAX_CHANNELS			9
+
+#define TMP464_TEMP_REG(channel)	(channel)
+#define TMP464_TEMP_OFFSET_REG(channel)	(0x40 + ((channel) - 1) * 8)
+#define TMP464_N_FACTOR_REG(channel)	(0x41 + ((channel) - 1) * 8)
+
+static const u8 TMP464_THERM_LIMIT[MAX_CHANNELS] = {
+	0x39, 0x42, 0x4A, 0x52, 0x5A, 0x62, 0x6a, 0x72, 0x7a };
+static const u8 TMP464_THERM2_LIMIT[MAX_CHANNELS] = {
+	0x3A, 0x43, 0x4B, 0x53, 0x5B, 0x63, 0x6b, 0x73, 0x7b };
+
+#define TMP464_THERM_STATUS_REG			0x21
+#define TMP464_THERM2_STATUS_REG		0x22
+#define TMP464_REMOTE_OPEN_REG			0x23
+#define TMP464_CONFIG_REG			0x30
+#define TMP464_TEMP_HYST_REG			0x38
+#define TMP464_LOCK_REG				0xc4
+
+/* Identification */
+#define TMP464_MANUFACTURER_ID_REG		0xFE
+#define TMP464_DEVICE_ID_REG			0xFF
+
+/* Flags */
+#define TMP464_CONFIG_SHUTDOWN			BIT(5)
+#define TMP464_CONFIG_RANGE			0x04
+#define TMP464_CONFIG_REG_REN(x)		(BIT(7 + (x)))
+#define TMP464_CONFIG_REG_REN_MASK		GENMASK(15, 7)
+#define TMP464_CONFIG_CONVERSION_RATE_B0	2
+#define TMP464_CONFIG_CONVERSION_RATE_B2	4
+#define TMP464_CONFIG_CONVERSION_RATE_MASK      GENMASK(TMP464_CONFIG_CONVERSION_RATE_B2, \
+							TMP464_CONFIG_CONVERSION_RATE_B0)
+
+#define TMP464_UNLOCK_VAL			0xeb19
+#define TMP464_LOCK_VAL				0x5ca6
+#define TMP464_LOCKED				0x8000
+
+/* Manufacturer / Device ID's */
+#define TMP464_MANUFACTURER_ID			0x5449
+#define TMP464_DEVICE_ID			0x1468
+#define TMP468_DEVICE_ID			0x0468
+
+static const struct i2c_device_id tmp464_id[] = {
+	{ "tmp464", TMP464_NUM_CHANNELS },
+	{ "tmp468", TMP468_NUM_CHANNELS },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tmp464_id);
+
+static const struct of_device_id __maybe_unused tmp464_of_match[] = {
+	{
+		.compatible = "ti,tmp464",
+		.data = (void *)TMP464_NUM_CHANNELS
+	},
+	{
+		.compatible = "ti,tmp468",
+		.data = (void *)TMP468_NUM_CHANNELS
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmp464_of_match);
+
+struct tmp464_channel {
+	const char *label;
+	bool enabled;
+};
+
+struct tmp464_data {
+	struct regmap *regmap;
+	struct mutex update_lock;
+	int channels;
+	s16 config_orig;
+	u16 open_reg;
+	unsigned long last_updated;
+	bool valid;
+	int update_interval;
+	struct tmp464_channel channel[MAX_CHANNELS];
+};
+
+static int temp_from_reg(s16 reg)
+{
+	return DIV_ROUND_CLOSEST((reg >> 3) * 625, 10);
+}
+
+static s16 temp_to_limit_reg(long temp)
+{
+	return DIV_ROUND_CLOSEST(temp, 500) << 6;
+}
+
+static s16 temp_to_offset_reg(long temp)
+{
+	return DIV_ROUND_CLOSEST(temp * 10, 625) << 3;
+}
+
+static int tmp464_enable_channels(struct tmp464_data *data)
+{
+	struct regmap *regmap = data->regmap;
+	u16 enable = 0;
+	int i;
+
+	for (i = 0; i < data->channels; i++)
+		if (data->channel[i].enabled)
+			enable |= TMP464_CONFIG_REG_REN(i);
+
+	return regmap_update_bits(regmap, TMP464_CONFIG_REG, TMP464_CONFIG_REG_REN_MASK, enable);
+}
+
+static int tmp464_chip_read(struct device *dev, u32 attr, int channel, long *val)
+{
+	struct tmp464_data *data = dev_get_drvdata(dev);
+
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		*val = data->update_interval;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int tmp464_temp_read(struct device *dev, u32 attr, int channel, long *val)
+{
+	struct tmp464_data *data = dev_get_drvdata(dev);
+	struct regmap *regmap = data->regmap;
+	unsigned int regval, regval2;
+	int err = 0;
+
+	mutex_lock(&data->update_lock);
+
+	switch (attr) {
+	case hwmon_temp_max_alarm:
+		err = regmap_read(regmap, TMP464_THERM_STATUS_REG, &regval);
+		if (err < 0)
+			break;
+		*val = !!(regval & BIT(channel + 7));
+		break;
+	case hwmon_temp_crit_alarm:
+		err = regmap_read(regmap, TMP464_THERM2_STATUS_REG, &regval);
+		if (err < 0)
+			break;
+		*val = !!(regval & BIT(channel + 7));
+		break;
+	case hwmon_temp_fault:
+		/*
+		 * The chip clears TMP464_REMOTE_OPEN_REG after it is read
+		 * and only updates it after the next measurement cycle is
+		 * complete. That means we have to cache the value internally
+		 * for one measurement cycle and report the cached value.
+		 */
+		if (!data->valid || time_after(jiffies, data->last_updated +
+					       msecs_to_jiffies(data->update_interval))) {
+			err = regmap_read(regmap, TMP464_REMOTE_OPEN_REG, &regval);
+			if (err < 0)
+				break;
+			data->open_reg = regval;
+			data->last_updated = jiffies;
+			data->valid = true;
+		}
+		*val = !!(data->open_reg & BIT(channel + 7));
+		break;
+	case hwmon_temp_max_hyst:
+		err = regmap_read(regmap, TMP464_THERM_LIMIT[channel], &regval);
+		if (err < 0)
+			break;
+		err = regmap_read(regmap, TMP464_TEMP_HYST_REG, &regval2);
+		if (err < 0)
+			break;
+		regval -= regval2;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_max:
+		err = regmap_read(regmap, TMP464_THERM_LIMIT[channel], &regval);
+		if (err < 0)
+			break;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_crit_hyst:
+		err = regmap_read(regmap, TMP464_THERM2_LIMIT[channel], &regval);
+		if (err < 0)
+			break;
+		err = regmap_read(regmap, TMP464_TEMP_HYST_REG, &regval2);
+		if (err < 0)
+			break;
+		regval -= regval2;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_crit:
+		err = regmap_read(regmap, TMP464_THERM2_LIMIT[channel], &regval);
+		if (err < 0)
+			break;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_offset:
+		err = regmap_read(regmap, TMP464_TEMP_OFFSET_REG(channel), &regval);
+		if (err < 0)
+			break;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_input:
+		if (!data->channel[channel].enabled) {
+			err = -ENODATA;
+			break;
+		}
+		err = regmap_read(regmap, TMP464_TEMP_REG(channel), &regval);
+		if (err < 0)
+			break;
+		*val = temp_from_reg(regval);
+		break;
+	case hwmon_temp_enable:
+		*val = data->channel[channel].enabled;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	mutex_unlock(&data->update_lock);
+
+	return err;
+}
+
+static int tmp464_read(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel, long *val)
+{
+	switch (type) {
+	case hwmon_chip:
+		return tmp464_chip_read(dev, attr, channel, val);
+	case hwmon_temp:
+		return tmp464_temp_read(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int tmp464_read_string(struct device *dev, enum hwmon_sensor_types type,
+			      u32 attr, int channel, const char **str)
+{
+	struct tmp464_data *data = dev_get_drvdata(dev);
+
+	*str = data->channel[channel].label;
+
+	return 0;
+}
+
+static int tmp464_set_convrate(struct tmp464_data *data, long interval)
+{
+	int rate;
+
+	/*
+	 * For valid rates, interval in milli-seconds can be calculated as
+	 *      interval = 125 << (7 - rate);
+	 * or
+	 *      interval = (1 << (7 - rate)) * 125;
+	 * The rate is therefore
+	 *      rate = 7 - __fls(interval / 125);
+	 * and the rounded rate is
+	 *      rate = 7 - __fls(interval * 4 / (125 * 3));
+	 * Use clamp_val() to avoid overflows, and to ensure valid input
+	 * for __fls.
+	 */
+	interval = clamp_val(interval, 125, 16000);
+	rate = 7 - __fls(interval * 4 / (125 * 3));
+	data->update_interval = 125 << (7 - rate);
+
+	return regmap_update_bits(data->regmap, TMP464_CONFIG_REG,
+				  TMP464_CONFIG_CONVERSION_RATE_MASK,
+				  rate << TMP464_CONFIG_CONVERSION_RATE_B0);
+}
+
+static int tmp464_chip_write(struct tmp464_data *data, u32 attr, int channel, long val)
+{
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		return tmp464_set_convrate(data, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int tmp464_temp_write(struct tmp464_data *data, u32 attr, int channel, long val)
+{
+	struct regmap *regmap = data->regmap;
+	unsigned int regval;
+	int err = 0;
+
+	switch (attr) {
+	case hwmon_temp_max_hyst:
+		err = regmap_read(regmap, TMP464_THERM_LIMIT[0], &regval);
+		if (err < 0)
+			break;
+		val = clamp_val(val, -256000, 256000);	/* prevent overflow/underflow */
+		val = clamp_val(temp_from_reg(regval) - val, 0, 255000);
+		err = regmap_write(regmap, TMP464_TEMP_HYST_REG,
+				   DIV_ROUND_CLOSEST(val, 1000) << 7);
+		break;
+	case hwmon_temp_max:
+		val = temp_to_limit_reg(clamp_val(val, -255000, 255500));
+		err = regmap_write(regmap, TMP464_THERM_LIMIT[channel], val);
+		break;
+	case hwmon_temp_crit:
+		val = temp_to_limit_reg(clamp_val(val, -255000, 255500));
+		err = regmap_write(regmap, TMP464_THERM2_LIMIT[channel], val);
+		break;
+	case hwmon_temp_offset:
+		val = temp_to_offset_reg(clamp_val(val, -128000, 127937));
+		err = regmap_write(regmap, TMP464_TEMP_OFFSET_REG(channel), val);
+		break;
+	case hwmon_temp_enable:
+		data->channel[channel].enabled = !!val;
+		err = tmp464_enable_channels(data);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int tmp464_write(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long val)
+{
+	struct tmp464_data *data = dev_get_drvdata(dev);
+	int err;
+
+	mutex_lock(&data->update_lock);
+
+	switch (type) {
+	case hwmon_chip:
+		err = tmp464_chip_write(data, attr, channel, val);
+		break;
+	case hwmon_temp:
+		err = tmp464_temp_write(data, attr, channel, val);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	mutex_unlock(&data->update_lock);
+
+	return err;
+}
+
+static umode_t tmp464_is_visible(const void *_data, enum hwmon_sensor_types type,
+				 u32 attr, int channel)
+{
+	const struct tmp464_data *data = _data;
+
+	if (channel >= data->channels)
+		return 0;
+
+	if (type == hwmon_chip) {
+		if (attr == hwmon_chip_update_interval)
+			return 0644;
+		return 0;
+	}
+
+	switch (attr) {
+	case hwmon_temp_input:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_crit_alarm:
+	case hwmon_temp_crit_hyst:
+		return 0444;
+	case hwmon_temp_enable:
+	case hwmon_temp_max:
+	case hwmon_temp_crit:
+		return 0644;
+	case hwmon_temp_max_hyst:
+		if (!channel)
+			return 0644;
+		return 0444;
+	case hwmon_temp_label:
+		if (data->channel[channel].label)
+			return 0444;
+		return 0;
+	case hwmon_temp_fault:
+		if (channel)
+			return 0444;
+		return 0;
+	case hwmon_temp_offset:
+		if (channel)
+			return 0644;
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static void tmp464_restore_lock(void *regmap)
+{
+	regmap_write(regmap, TMP464_LOCK_REG, TMP464_LOCK_VAL);
+}
+
+static void tmp464_restore_config(void *_data)
+{
+	struct tmp464_data *data = _data;
+
+	regmap_write(data->regmap, TMP464_CONFIG_REG, data->config_orig);
+}
+
+static int tmp464_init_client(struct device *dev, struct tmp464_data *data)
+{
+	struct regmap *regmap = data->regmap;
+	unsigned int regval;
+	int err;
+
+	err = regmap_read(regmap, TMP464_LOCK_REG, &regval);
+	if (err)
+		return err;
+	if (regval == TMP464_LOCKED) {
+		/* Explicitly unlock chip if it is locked */
+		err = regmap_write(regmap, TMP464_LOCK_REG, TMP464_UNLOCK_VAL);
+		if (err)
+			return err;
+		/* and lock it again when unloading the driver */
+		err = devm_add_action_or_reset(dev, tmp464_restore_lock, regmap);
+		if (err)
+			return err;
+	}
+
+	err = regmap_read(regmap, TMP464_CONFIG_REG, &regval);
+	if (err)
+		return err;
+	data->config_orig = regval;
+	err = devm_add_action_or_reset(dev, tmp464_restore_config, data);
+	if (err)
+		return err;
+
+	/* Default to 500 ms update interval */
+	err = regmap_update_bits(regmap, TMP464_CONFIG_REG,
+				 TMP464_CONFIG_CONVERSION_RATE_MASK | TMP464_CONFIG_SHUTDOWN,
+				 BIT(TMP464_CONFIG_CONVERSION_RATE_B0) |
+				 BIT(TMP464_CONFIG_CONVERSION_RATE_B2));
+	if (err)
+		return err;
+
+	data->update_interval = 500;
+
+	return tmp464_enable_channels(data);
+}
+
+static int tmp464_detect(struct i2c_client *client,
+			 struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	char *name, *chip;
+	int reg;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA))
+		return -ENODEV;
+
+	reg = i2c_smbus_read_word_swapped(client, TMP464_MANUFACTURER_ID_REG);
+	if (reg < 0)
+		return reg;
+	if (reg != TMP464_MANUFACTURER_ID)
+		return -ENODEV;
+
+	/* Check for "always return zero" bits */
+	reg = i2c_smbus_read_word_swapped(client, TMP464_THERM_STATUS_REG);
+	if (reg < 0)
+		return reg;
+	if (reg & 0x1f)
+		return -ENODEV;
+	reg = i2c_smbus_read_word_swapped(client, TMP464_THERM2_STATUS_REG);
+	if (reg < 0)
+		return reg;
+	if (reg & 0x1f)
+		return -ENODEV;
+
+	reg = i2c_smbus_read_word_swapped(client, TMP464_DEVICE_ID_REG);
+	if (reg < 0)
+		return reg;
+	switch (reg) {
+	case TMP464_DEVICE_ID:
+		name = "tmp464";
+		chip = "TMP464";
+		break;
+	case TMP468_DEVICE_ID:
+		name = "tmp468";
+		chip = "TMP468";
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	strscpy(info->type, name, I2C_NAME_SIZE);
+	dev_info(&adapter->dev, "Detected TI %s chip at 0x%02x\n", chip, client->addr);
+
+	return 0;
+}
+
+static int tmp464_probe_child_from_dt(struct device *dev,
+				      struct device_node *child,
+				      struct tmp464_data *data)
+
+{
+	struct regmap *regmap = data->regmap;
+	u32 channel;
+	s32 nfactor;
+	int err;
+
+	err = of_property_read_u32(child, "reg", &channel);
+	if (err) {
+		dev_err(dev, "missing reg property of %pOFn\n", child);
+		return err;
+	}
+
+	if (channel >= data->channels) {
+		dev_err(dev, "invalid reg %d of %pOFn\n", channel, child);
+		return -EINVAL;
+	}
+
+	of_property_read_string(child, "label", &data->channel[channel].label);
+
+	data->channel[channel].enabled = of_device_is_available(child);
+
+	err = of_property_read_s32(child, "ti,n-factor", &nfactor);
+	if (err && err != -EINVAL)
+		return err;
+	if (!err) {
+		if (channel == 0) {
+			dev_err(dev, "n-factor can't be set for internal channel\n");
+			return -EINVAL;
+		}
+		if (nfactor > 127 || nfactor < -128) {
+			dev_err(dev, "n-factor for channel %d invalid (%d)\n",
+				channel, nfactor);
+			return -EINVAL;
+		}
+		err = regmap_write(regmap, TMP464_N_FACTOR_REG(channel),
+				   (nfactor << 8) & 0xff00);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int tmp464_probe_from_dt(struct device *dev, struct tmp464_data *data)
+{
+	const struct device_node *np = dev->of_node;
+	struct device_node *child;
+	int err;
+
+	for_each_child_of_node(np, child) {
+		if (strcmp(child->name, "channel"))
+			continue;
+
+		err = tmp464_probe_child_from_dt(dev, child, data);
+		if (err) {
+			of_node_put(child);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static const struct hwmon_ops tmp464_ops = {
+	.is_visible = tmp464_is_visible,
+	.read = tmp464_read,
+	.read_string = tmp464_read_string,
+	.write = tmp464_write,
+};
+
+static const struct hwmon_channel_info *tmp464_info[] = {
+	HWMON_CHANNEL_INFO(chip,
+			   HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST | HWMON_T_CRIT |
+			   HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM |
+			   HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE,
+			   HWMON_T_INPUT | HWMON_T_OFFSET | HWMON_T_MAX | HWMON_T_MAX_HYST |
+			   HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MAX_ALARM |
+			   HWMON_T_CRIT_ALARM | HWMON_T_FAULT | HWMON_T_LABEL | HWMON_T_ENABLE),
+	NULL
+};
+
+static const struct hwmon_chip_info tmp464_chip_info = {
+	.ops = &tmp464_ops,
+	.info = tmp464_info,
+};
+
+/* regmap */
+
+static bool tmp464_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (reg < TMP464_TEMP_REG(TMP468_NUM_CHANNELS) ||
+		reg == TMP464_THERM_STATUS_REG ||
+		reg == TMP464_THERM2_STATUS_REG ||
+		reg == TMP464_REMOTE_OPEN_REG);
+}
+
+static const struct regmap_config tmp464_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = TMP464_DEVICE_ID_REG,
+	.volatile_reg = tmp464_is_volatile_reg,
+	.val_format_endian = REGMAP_ENDIAN_BIG,
+	.cache_type = REGCACHE_RBTREE,
+	.use_single_read = true,
+	.use_single_write = true,
+};
+
+static int tmp464_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct device *hwmon_dev;
+	struct tmp464_data *data;
+	int i, err;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA)) {
+		dev_err(&client->dev, "i2c functionality check failed\n");
+		return -ENODEV;
+	}
+	data = devm_kzalloc(dev, sizeof(struct tmp464_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	mutex_init(&data->update_lock);
+
+	if (dev->of_node)
+		data->channels = (int)(unsigned long)of_device_get_match_data(&client->dev);
+	else
+		data->channels = i2c_match_id(tmp464_id, client)->driver_data;
+
+	data->regmap = devm_regmap_init_i2c(client, &tmp464_regmap_config);
+	if (IS_ERR(data->regmap))
+		return PTR_ERR(data->regmap);
+
+	for (i = 0; i < data->channels; i++)
+		data->channel[i].enabled = true;
+
+	err = tmp464_init_client(dev, data);
+	if (err)
+		return err;
+
+	if (dev->of_node) {
+		err = tmp464_probe_from_dt(dev, data);
+		if (err)
+			return err;
+	}
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data, &tmp464_chip_info, NULL);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static struct i2c_driver tmp464_driver = {
+	.class = I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "tmp464",
+		.of_match_table = of_match_ptr(tmp464_of_match),
+	},
+	.probe_new = tmp464_probe,
+	.id_table = tmp464_id,
+	.detect = tmp464_detect,
+	.address_list = normal_i2c,
+};
+
+module_i2c_driver(tmp464_driver);
+
+MODULE_AUTHOR("Agathe Porte <agathe.porte@nokia.com>");
+MODULE_DESCRIPTION("Texas Instruments TMP464 temperature sensor driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 01f6c7338ce267959975da65d86ba34f44d54220 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 25 Feb 2022 13:06:45 -0600
Subject: [PATCH 0958/1586] cpuidle: PSCI: Move the `has_lpi` check to the
 beginning of the function

Currently the first thing checked is whether the PCSI cpu_suspend function
has been initialized.

Another change will be overloading `acpi_processor_ffh_lpi_probe` and
calling it sooner.  So make the `has_lpi` check the first thing checked
to prepare for that change.

Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/kernel/cpuidle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 03991eeff6430..3006f43248084 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	struct acpi_lpi_state *lpi;
 	struct acpi_processor *pr = per_cpu(processors, cpu);
 
+	if (unlikely(!pr || !pr->flags.has_lpi))
+		return -EINVAL;
+
 	/*
 	 * If the PSCI cpu_suspend function hook has not been initialized
 	 * idle states must not be enabled, so bail out
@@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
 	if (!psci_ops.cpu_suspend)
 		return -EOPNOTSUPP;
 
-	if (unlikely(!pr || !pr->flags.has_lpi))
-		return -EINVAL;
-
 	count = pr->power.count - 1;
 	if (count <= 0)
 		return -ENODEV;
-- 
GitLab


From eb087f305919ee8169ad65665610313e74260463 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 25 Feb 2022 13:06:46 -0600
Subject: [PATCH 0959/1586] ACPI: processor idle: Check for architectural
 support for LPI

When `osc_pc_lpi_support_confirmed` is set through `_OSC` and `_LPI` is
populated then the cpuidle driver assumes that LPI is fully functional.

However currently the kernel only provides architectural support for LPI
on ARM.  This leads to high power consumption on X86 platforms that
otherwise try to enable LPI.

So probe whether or not LPI support is implemented before enabling LPI in
the kernel.  This is done by overloading `acpi_processor_ffh_lpi_probe` to
check whether it returns `-EOPNOTSUPP`. It also means that all future
implementations of `acpi_processor_ffh_lpi_probe` will need to follow
these semantics as well.

Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_idle.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index f8e9fa82cb9b1..32b20efff5f83 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1080,6 +1080,11 @@ static int flatten_lpi_states(struct acpi_processor *pr,
 	return 0;
 }
 
+int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
+{
+	return -EOPNOTSUPP;
+}
+
 static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 {
 	int ret, i;
@@ -1088,6 +1093,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	struct acpi_device *d = NULL;
 	struct acpi_lpi_states_array info[2], *tmp, *prev, *curr;
 
+	/* make sure our architecture has support */
+	ret = acpi_processor_ffh_lpi_probe(pr->id);
+	if (ret == -EOPNOTSUPP)
+		return ret;
+
 	if (!osc_pc_lpi_support_confirmed)
 		return -EOPNOTSUPP;
 
@@ -1139,11 +1149,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr)
 	return 0;
 }
 
-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
-	return -ENODEV;
-}
-
 int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
 {
 	return -ENODEV;
-- 
GitLab


From dc4e8c07e9e2f69387579c49caca26ba239f7270 Mon Sep 17 00:00:00 2001
From: Shuai Xue <xueshuai@linux.alibaba.com>
Date: Sun, 27 Feb 2022 20:25:45 +0800
Subject: [PATCH 0960/1586] ACPI: APEI: explicit init of HEST and GHES in
 apci_init()

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and (un)register events. The
dependencies are as follows:

    ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
    ghes_init() => sdei_init()

HEST is not PCI-specific and initcall ordering is implicit and not
well-defined within a level.

Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
convert ghes_init() and sdei_init() from initcalls to explicit calls in the
following order:

    acpi_hest_init()
    ghes_init()
        sdei_init()

Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c    | 19 ++++++++-----------
 drivers/acpi/bus.c          |  2 ++
 drivers/acpi/pci_root.c     |  3 ---
 drivers/firmware/Kconfig    |  1 +
 drivers/firmware/arm_sdei.c | 13 ++-----------
 include/acpi/apei.h         |  4 +++-
 include/linux/arm_sdei.h    |  2 ++
 7 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254e..aadc0a972f18a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
 	.remove		= ghes_remove,
 };
 
-static int __init ghes_init(void)
+void __init ghes_init(void)
 {
 	int rc;
 
+	sdei_init();
+
 	if (acpi_disabled)
-		return -ENODEV;
+		return;
 
 	switch (hest_disable) {
 	case HEST_NOT_FOUND:
-		return -ENODEV;
+		return;
 	case HEST_DISABLED:
 		pr_info(GHES_PFX "HEST is not enabled!\n");
-		return -EINVAL;
+		return;
 	default:
 		break;
 	}
 
 	if (ghes_disable) {
 		pr_info(GHES_PFX "GHES is not enabled!\n");
-		return -EINVAL;
+		return;
 	}
 
 	ghes_nmi_init_cxt();
 
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
-		goto err;
+		return;
 
 	rc = apei_osc_setup();
 	if (rc == 0 && osc_sb_apei_support_acked)
@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
 		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
 	else
 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
-
-	return 0;
-err:
-	return rc;
 }
-device_initcall(ghes_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6b..3f403db20f69f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
 
 	pci_mmcfg_late_init();
 	acpi_iort_init();
+	acpi_hest_init();
+	ghes_init();
 	acpi_scan_init();
 	acpi_ec_init();
 	acpi_debugfs_init();
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3b..6f9e75d148084 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
 #include <linux/slab.h>
 #include <linux/dmi.h>
 #include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h>	/* for acpi_hest_init() */
-
 #include "internal.h"
 
 #define ACPI_PCI_ROOT_CLASS		"pci_bridge"
@@ -943,7 +941,6 @@ out_release_info:
 
 void __init acpi_pci_root_init(void)
 {
-	acpi_hest_init();
 	if (acpi_pci_disabled)
 		return;
 
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 75cb91055c174..e5cfb01353d88 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
 config ARM_SDE_INTERFACE
 	bool "ARM Software Delegated Exception Interface (SDEI)"
 	depends on ARM64
+	depends on ACPI_APEI_GHES
 	help
 	  The Software Delegated Exception Interface (SDEI) is an ARM
 	  standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f95..1e1a51510e83b 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
 	return true;
 }
 
-static int __init sdei_init(void)
+void __init sdei_init(void)
 {
 	struct platform_device *pdev;
 	int ret;
 
 	ret = platform_driver_register(&sdei_driver);
 	if (ret || !sdei_present_acpi())
-		return ret;
+		return;
 
 	pdev = platform_device_register_simple(sdei_driver.driver.name,
 					       0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
 		pr_info("Failed to register ACPI:SDEI platform device %d\n",
 			ret);
 	}
-
-	return ret;
 }
 
-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
 int sdei_event_handler(struct pt_regs *regs,
 		       struct sdei_registered_event *arg)
 {
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae7..4e60dd73c3bb9 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
 extern int erst_disable;
 #ifdef CONFIG_ACPI_APEI_GHES
 extern bool ghes_disable;
+void __init ghes_init(void);
 #else
 #define ghes_disable 1
+static inline void ghes_init(void) { }
 #endif
 
 #ifdef CONFIG_ACPI_APEI
 void __init acpi_hest_init(void);
 #else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
 #endif
 
 int erst_write(const struct cper_record_header *record);
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d8..14dc461b0e829 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
 /* For use by arch code when CPU hotplug notifiers are not appropriate. */
 int sdei_mask_local_cpu(void);
 int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
 #else
 static inline int sdei_mask_local_cpu(void) { return 0; }
 static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
 #endif /* CONFIG_ARM_SDE_INTERFACE */
 
 
-- 
GitLab


From 27e932a31496f75b78ea41fd5ccadd0f75d8e8be Mon Sep 17 00:00:00 2001
From: Shuai Xue <xueshuai@linux.alibaba.com>
Date: Sun, 27 Feb 2022 20:25:46 +0800
Subject: [PATCH 0961/1586] ACPI: APEI: rename ghes_init() with an "acpi_"
 prefix

ghes_init() sticks out in acpi_init() because it is the only functions
without an "acpi_" prefix.

Rename ghes_init with an "acpi_" prefix, then all looks fine.

Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 2 +-
 drivers/acpi/bus.c       | 2 +-
 include/acpi/apei.h      | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index aadc0a972f18a..d91ad378c00d6 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,7 +1457,7 @@ static struct platform_driver ghes_platform_driver = {
 	.remove		= ghes_remove,
 };
 
-void __init ghes_init(void)
+void __init acpi_ghes_init(void)
 {
 	int rc;
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3f403db20f69f..cd374210fb9f7 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1332,7 +1332,7 @@ static int __init acpi_init(void)
 	pci_mmcfg_late_init();
 	acpi_iort_init();
 	acpi_hest_init();
-	ghes_init();
+	acpi_ghes_init();
 	acpi_scan_init();
 	acpi_ec_init();
 	acpi_debugfs_init();
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 4e60dd73c3bb9..afaca3a075e86 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,10 +27,10 @@ extern int hest_disable;
 extern int erst_disable;
 #ifdef CONFIG_ACPI_APEI_GHES
 extern bool ghes_disable;
-void __init ghes_init(void);
+void __init acpi_ghes_init(void);
 #else
 #define ghes_disable 1
-static inline void ghes_init(void) { }
+static inline void acpi_ghes_init(void) { }
 #endif
 
 #ifdef CONFIG_ACPI_APEI
-- 
GitLab


From b625fe694626992c1d640b37c691cfcfc58d6006 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:30 +0200
Subject: [PATCH 0962/1586] ACPI: docs: enumeration: Discourage to use custom
 _DSM methods

Since we have _DSD established and specified (ACPI v5.1+) there is no
need to use custom _DSM methods. Rewrite documentation to use _DSD.

Fixes: f60e7074902a ("misc: at25: Make use of device property API")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../firmware-guide/acpi/enumeration.rst       | 48 ++++++++-----------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 74b830b2fd59e..cda41e4255ebf 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -188,43 +188,37 @@ to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
 	};
 
 Note that this driver actually needs more information like page size of the
-eeprom etc. but at the time writing this there is no standard way of
-passing those. One idea is to return this in _DSM method like::
+eeprom, etc. This information can be passed via _DSD method like::
 
 	Device (EEP0)
 	{
 		...
-		Method (_DSM, 4, NotSerialized)
+		Name (_DSD, Package ()
 		{
-			Store (Package (6)
+			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+			Package ()
 			{
-				"byte-len", 1024,
-				"addr-mode", 2,
-				"page-size, 32
-			}, Local0)
-
-			// Check UUIDs etc.
-
-			Return (Local0)
-		}
-
-Then the at25 SPI driver can get this configuration by calling _DSM on its
-ACPI handle like::
-
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_object_list input;
-	acpi_status status;
+				Package () { "size", 1024 },
+				Package () { "pagesize", 32 },
+				Package () { "address-width", 16 },
+			}
+		})
+	}
 
-	/* Fill in the input buffer */
+Then the at25 SPI driver can get this configuration by calling device property
+APIs during ->probe() phase like::
 
-	status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM",
-				      &input, &output);
-	if (ACPI_FAILURE(status))
-		/* Handle the error */
+	err = device_property_read_u32(dev, "size", &size);
+	if (err)
+		...error handling...
 
-	/* Extract the data here */
+	err = device_property_read_u32(dev, "pagesize", &page_size);
+	if (err)
+		...error handling...
 
-	kfree(output.pointer);
+	err = device_property_read_u32(dev, "address-width", &addr_width);
+	if (err)
+		...error handling...
 
 I2C serial bus support
 ======================
-- 
GitLab


From d72f06cee0d5cad0056967f0edc2c23b984238ba Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:31 +0200
Subject: [PATCH 0963/1586] ACPI: docs: enumeration: Update UART serial bus
 resource documentation

In some cases UART serial bus resource may be represented by struct
serdev_device.

Fixes: 53c7626356c7 ("serdev: Add ACPI support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index cda41e4255ebf..799ca240e6126 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -19,16 +19,17 @@ possible we decided to do following:
     platform devices.
 
   - Devices behind real busses where there is a connector resource
-    are represented as struct spi_device or struct i2c_device
-    (standard UARTs are not busses so there is no struct uart_device).
+    are represented as struct spi_device or struct i2c_device. Note
+    that standard UARTs are not busses so there is no struct uart_device,
+    although some of them may be represented by sturct serdev_device.
 
 As both ACPI and Device Tree represent a tree of devices (and their
 resources) this implementation follows the Device Tree way as much as
 possible.
 
-The ACPI implementation enumerates devices behind busses (platform, SPI and
-I2C), creates the physical devices and binds them to their ACPI handle in
-the ACPI namespace.
+The ACPI implementation enumerates devices behind busses (platform, SPI,
+I2C, and in some cases UART), creates the physical devices and binds them
+to their ACPI handle in the ACPI namespace.
 
 This means that when ACPI_HANDLE(dev) returns non-NULL the device was
 enumerated from ACPI namespace. This handle can be used to extract other
-- 
GitLab


From e8a62f363661d824495737427d59ba3fce91ae34 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:32 +0200
Subject: [PATCH 0964/1586] ACPI: docs: enumeration: Remove redundant .owner
 assignment

The owner member of the struct i2c_driver is assigned by a corresponding macro.
No need to assign it explicitly.

Fixes: 59c3987805a9 ("ACPI: add documentation about ACPI 5 enumeration")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 799ca240e6126..900d193ffa5e7 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -243,7 +243,6 @@ input driver::
 	static struct i2c_driver mpu3050_i2c_driver = {
 		.driver	= {
 			.name	= "mpu3050",
-			.owner	= THIS_MODULE,
 			.pm	= &mpu3050_pm,
 			.of_match_table = mpu3050_of_match,
 			.acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
@@ -252,6 +251,7 @@ input driver::
 		.remove		= mpu3050_remove,
 		.id_table	= mpu3050_ids,
 	};
+	module_i2c_driver(mpu3050_i2c_driver);
 
 Reference to PWM device
 =======================
-- 
GitLab


From e92e19747c1e0eb805c7c72fa991da7a292b43a6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:33 +0200
Subject: [PATCH 0965/1586] ACPI: docs: enumeration: Amend PWM enumeration ASL
 example

Drop blank line and add a closing curly brace to make ASL example closer
to the reality.

Fixes: ef3d13b86763 ("docs: firmware-guide: ACPI: Add a PWM example")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 900d193ffa5e7..7a5095901153d 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -277,9 +277,9 @@ introduced, i.e.::
                     }
                 }
             }
-
         })
         ...
+    }
 
 In the above example the PWM-based LED driver references to the PWM channel 0
 of \_SB.PCI0.PWM device with initial period setting equal to 600 ms (note that
-- 
GitLab


From 6bf87c4de91ca31f829587f69cd8ea13576a483d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:34 +0200
Subject: [PATCH 0966/1586] ACPI: docs: enumeration: Drop ugly ifdeffery from
 the examples

The ifdeffery around ACPI ID tables are ugly and in some cases
even less valuable than plain definitions. Drop them for good
to avoid spreading rather bad pattern.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 7a5095901153d..caa9a5e508b2e 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -47,18 +47,16 @@ some minor changes.
 Adding ACPI support for an existing driver should be pretty
 straightforward. Here is the simplest example::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mydrv_acpi_match[] = {
 		/* ACPI IDs here */
 		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match);
-	#endif
 
 	static struct platform_driver my_driver = {
 		...
 		.driver = {
-			.acpi_match_table = ACPI_PTR(mydrv_acpi_match),
+			.acpi_match_table = mydrv_acpi_match,
 		},
 	};
 
@@ -173,18 +171,16 @@ The SPI device drivers only need to add ACPI IDs in a similar way than with
 the platform device drivers. Below is an example where we add ACPI support
 to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id at25_acpi_match[] = {
 		{ "AT25", 0 },
 		{ },
 	};
 	MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
-	#endif
 
 	static struct spi_driver at25_driver = {
 		.driver = {
 			...
-			.acpi_match_table = ACPI_PTR(at25_acpi_match),
+			.acpi_match_table = at25_acpi_match,
 		},
 	};
 
@@ -232,20 +228,18 @@ registered.
 Below is an example of how to add ACPI support to the existing mpu3050
 input driver::
 
-	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mpu3050_acpi_match[] = {
 		{ "MPU3050", 0 },
 		{ },
 	};
 	MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
-	#endif
 
 	static struct i2c_driver mpu3050_i2c_driver = {
 		.driver	= {
 			.name	= "mpu3050",
 			.pm	= &mpu3050_pm,
 			.of_match_table = mpu3050_of_match,
-			.acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
+			.acpi_match_table = mpu3050_acpi_match,
 		},
 		.probe		= mpu3050_probe,
 		.remove		= mpu3050_remove,
-- 
GitLab


From a889e50ea088a525338e7fb9adc887d407e4a5c4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:36 +0200
Subject: [PATCH 0967/1586] ACPI: docs: enumeration: Drop comma for terminator
 entry

Drop comma for terminator entry to avoid copy'n'paste of this pattern.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index caa9a5e508b2e..621ae95bc4722 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -173,7 +173,7 @@ to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
 
 	static const struct acpi_device_id at25_acpi_match[] = {
 		{ "AT25", 0 },
-		{ },
+		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
 
@@ -230,7 +230,7 @@ input driver::
 
 	static const struct acpi_device_id mpu3050_acpi_match[] = {
 		{ "MPU3050", 0 },
-		{ },
+		{ }
 	};
 	MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
 
-- 
GitLab


From 01399a994bb477bb7d0a32665ce77407da849faf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Mar 2022 00:39:35 +0200
Subject: [PATCH 0968/1586] ACPI: docs: enumeration: Unify Package () for
 properties

Unify Package () representation for properties:
 - make them one line where it's possible
 - add spaces between parentheses and curly braces
 - drop the explicit size of package

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 621ae95bc4722..a946643244f73 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -154,7 +154,7 @@ Here is what the ACPI namespace for a SPI slave might look like::
 	Device (EEP0)
 	{
 		Name (_ADR, 1)
-		Name (_CID, Package() {
+		Name (_CID, Package () {
 			"ATML0025",
 			"AT25",
 		})
@@ -326,8 +326,8 @@ For example::
 			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 			Package ()
 			{
-				Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
-				Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
+				Package () { "power-gpios", Package () { ^DEV, 0, 0, 0 } },
+				Package () { "irq-gpios", Package () { ^DEV, 1, 0, 0 } },
 			}
 		})
 		...
@@ -449,10 +449,10 @@ namespace link::
 	Device (TMP0)
 	{
 		Name (_HID, "PRP0001")
-		Name (_DSD, Package() {
+		Name (_DSD, Package () {
 			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 			Package () {
-				Package (2) { "compatible", "ti,tmp75" },
+				Package () { "compatible", "ti,tmp75" },
 			}
 		})
 		Method (_CRS, 0, Serialized)
-- 
GitLab


From 344150999b7fc88502a65bbb147a47503eca2033 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Thu, 27 Jan 2022 13:44:49 +0800
Subject: [PATCH 0969/1586] f2fs: fix to avoid potential deadlock

Quoted from Jing Xia's report, there is a potential deadlock may happen
between kworker and checkpoint as below:

[T:writeback]				[T:checkpoint]
- wb_writeback
 - blk_start_plug
bio contains NodeA was plugged in writeback threads
					- do_writepages  -- sync write inodeB, inc wb_sync_req[DATA]
					 - f2fs_write_data_pages
					  - f2fs_write_single_data_page -- write last dirty page
					   - f2fs_do_write_data_page
					    - set_page_writeback  -- clear page dirty flag and
					    PAGECACHE_TAG_DIRTY tag in radix tree
					    - f2fs_outplace_write_data
					     - f2fs_update_data_blkaddr
					      - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
					   - inode_dec_dirty_pages
 - writeback_sb_inodes
  - writeback_single_inode
   - do_writepages
    - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
     - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
  - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
 - blk_finish_plug

Let's try to avoid deadlock condition by forcing unplugging previous bio via
blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
due to valid sbi->wb_sync_req[DATA/NODE].

Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
Signed-off-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
Signed-off-by: Jing Xia <jing.xia@unisoc.com>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 6 +++++-
 fs/f2fs/node.c | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6b5f389ba998b..b09f401f89606 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3169,8 +3169,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_inc(&sbi->wb_sync_req[DATA]);
-	else if (atomic_read(&sbi->wb_sync_req[DATA]))
+	else if (atomic_read(&sbi->wb_sync_req[DATA])) {
+		/* to avoid potential deadlock */
+		if (current->plug)
+			blk_finish_plug(current->plug);
 		goto skip_write;
+	}
 
 	if (__should_serialize_io(inode, wbc)) {
 		mutex_lock(&sbi->writepages);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0d98834575797..fe8c38c9ac77c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2112,8 +2112,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		atomic_inc(&sbi->wb_sync_req[NODE]);
-	else if (atomic_read(&sbi->wb_sync_req[NODE]))
+	else if (atomic_read(&sbi->wb_sync_req[NODE])) {
+		/* to avoid potential deadlock */
+		if (current->plug)
+			blk_finish_plug(current->plug);
 		goto skip_write;
+	}
 
 	trace_f2fs_writepages(mapping->host, wbc, NODE);
 
-- 
GitLab


From f41ee8b91c00770d718be2ff4852a80017ae9ab3 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 4 Mar 2022 09:49:13 +0800
Subject: [PATCH 0970/1586] f2fs: fix to do sanity check on curseg->alloc_type

As Wenqing Liu reported in bugzilla:

https://bugzilla.kernel.org/show_bug.cgi?id=215657

- Overview
UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2 when mount and operate a corrupted image

- Reproduce
tested on kernel 5.17-rc4, 5.17-rc6

1. mkdir test_crash
2. cd test_crash
3. unzip tmp2.zip
4. mkdir mnt
5. ./single_test.sh f2fs 2

- Kernel dump
[   46.434454] loop0: detected capacity change from 0 to 131072
[   46.529839] F2FS-fs (loop0): Mounted with checkpoint version = 7548c2d9
[   46.738319] ================================================================================
[   46.738412] UBSAN: array-index-out-of-bounds in fs/f2fs/segment.c:3460:2
[   46.738475] index 231 is out of range for type 'unsigned int [2]'
[   46.738539] CPU: 2 PID: 939 Comm: umount Not tainted 5.17.0-rc6 #1
[   46.738547] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014
[   46.738551] Call Trace:
[   46.738556]  <TASK>
[   46.738563]  dump_stack_lvl+0x47/0x5c
[   46.738581]  ubsan_epilogue+0x5/0x50
[   46.738592]  __ubsan_handle_out_of_bounds+0x68/0x80
[   46.738604]  f2fs_allocate_data_block+0xdff/0xe60 [f2fs]
[   46.738819]  do_write_page+0xef/0x210 [f2fs]
[   46.738934]  f2fs_do_write_node_page+0x3f/0x80 [f2fs]
[   46.739038]  __write_node_page+0x2b7/0x920 [f2fs]
[   46.739162]  f2fs_sync_node_pages+0x943/0xb00 [f2fs]
[   46.739293]  f2fs_write_checkpoint+0x7bb/0x1030 [f2fs]
[   46.739405]  kill_f2fs_super+0x125/0x150 [f2fs]
[   46.739507]  deactivate_locked_super+0x60/0xc0
[   46.739517]  deactivate_super+0x70/0xb0
[   46.739524]  cleanup_mnt+0x11a/0x200
[   46.739532]  __cleanup_mnt+0x16/0x20
[   46.739538]  task_work_run+0x67/0xa0
[   46.739547]  exit_to_user_mode_prepare+0x18c/0x1a0
[   46.739559]  syscall_exit_to_user_mode+0x26/0x40
[   46.739568]  do_syscall_64+0x46/0xb0
[   46.739584]  entry_SYSCALL_64_after_hwframe+0x44/0xae

The root cause is we missed to do sanity check on curseg->alloc_type,
result in out-of-bound accessing on sbi->block_count[] array, fix it.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 56211e201d51b..012524db74374 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4793,6 +4793,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
 
 		sanity_check_seg_type(sbi, curseg->seg_type);
 
+		if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
+			f2fs_err(sbi,
+				 "Current segment has invalid alloc_type:%d",
+				 curseg->alloc_type);
+			return -EFSCORRUPTED;
+		}
+
 		if (f2fs_test_bit(blkofs, se->cur_valid_map))
 			goto out;
 
-- 
GitLab


From 9e1a3ce0a952450a1163cc93ab1df6d4fa8c8155 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 23 Feb 2022 19:32:10 -0800
Subject: [PATCH 0971/1586] binfmt_elf: Introduce KUnit test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds simple KUnit test for some binfmt_elf internals: specifically a
regression test for the problem fixed by commit 8904d9cd90ee ("ELF:
fix overflow in total mapping size calculation").

$ ./tools/testing/kunit/kunit.py run --arch x86_64 \
    --kconfig_add CONFIG_IA32_EMULATION=y '*binfmt_elf'
...
[19:41:08] ================== binfmt_elf (1 subtest) ==================
[19:41:08] [PASSED] total_mapping_size_test
[19:41:08] =================== [PASSED] binfmt_elf ====================
[19:41:08] ============== compat_binfmt_elf (1 subtest) ===============
[19:41:08] [PASSED] total_mapping_size_test
[19:41:08] ================ [PASSED] compat_binfmt_elf ================
[19:41:08] ============================================================
[19:41:08] Testing complete. Passed: 2, Failed: 0, Crashed: 0, Skipped: 0, Errors: 0

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: David Gow <davidgow@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Magnus Groß" <magnus.gross@rwth-aachen.de>
Cc: kunit-dev@googlegroups.com
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
v1: https://lore.kernel.org/lkml/20220224054332.1852813-1-keescook@chromium.org
v2:
 - improve commit log
 - fix comment URL (Daniel)
 - drop redundant KUnit Kconfig help info (Daniel)
 - note in Kconfig help that COMPAT builds add a compat test (David)
---
 fs/Kconfig.binfmt      | 10 +++++++
 fs/binfmt_elf.c        |  4 +++
 fs/binfmt_elf_test.c   | 64 ++++++++++++++++++++++++++++++++++++++++++
 fs/compat_binfmt_elf.c |  2 ++
 4 files changed, 80 insertions(+)
 create mode 100644 fs/binfmt_elf_test.c

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4d5ae61580aae..f7065e825b7f0 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -28,6 +28,16 @@ config BINFMT_ELF
 	  ld.so (check the file <file:Documentation/Changes> for location and
 	  latest version).
 
+config BINFMT_ELF_KUNIT_TEST
+	bool "Build KUnit tests for ELF binary support" if !KUNIT_ALL_TESTS
+	depends on KUNIT=y && BINFMT_ELF=y
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the ELF loader KUnit tests, which try to gather
+	  prior bug fixes into a regression test collection. This is really
+	  only needed for debugging. Note that with CONFIG_COMPAT=y, the
+	  compat_binfmt_elf KUnit test is also created.
+
 config COMPAT_BINFMT_ELF
 	def_bool y
 	depends on COMPAT && BINFMT_ELF
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4c5a2175f6053..eaf39b1bdbbb8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2346,3 +2346,7 @@ static void __exit exit_elf_binfmt(void)
 core_initcall(init_elf_binfmt);
 module_exit(exit_elf_binfmt);
 MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
+#include "binfmt_elf_test.c"
+#endif
diff --git a/fs/binfmt_elf_test.c b/fs/binfmt_elf_test.c
new file mode 100644
index 0000000000000..11d734fec366e
--- /dev/null
+++ b/fs/binfmt_elf_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+
+static void total_mapping_size_test(struct kunit *test)
+{
+	struct elf_phdr empty[] = {
+		{ .p_type = PT_LOAD, .p_vaddr = 0, .p_memsz = 0, },
+		{ .p_type = PT_INTERP, .p_vaddr = 10, .p_memsz = 999999, },
+	};
+	/*
+	 * readelf -lW /bin/mount | grep '^  .*0x0' | awk '{print "\t\t{ .p_type = PT_" \
+	 *				$1 ", .p_vaddr = " $3 ", .p_memsz = " $6 ", },"}'
+	 */
+	struct elf_phdr mount[] = {
+		{ .p_type = PT_PHDR, .p_vaddr = 0x00000040, .p_memsz = 0x0002d8, },
+		{ .p_type = PT_INTERP, .p_vaddr = 0x00000318, .p_memsz = 0x00001c, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x00000000, .p_memsz = 0x0033a8, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x00004000, .p_memsz = 0x005c91, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x0000a000, .p_memsz = 0x0022f8, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x0000d330, .p_memsz = 0x000d40, },
+		{ .p_type = PT_DYNAMIC, .p_vaddr = 0x0000d928, .p_memsz = 0x000200, },
+		{ .p_type = PT_NOTE, .p_vaddr = 0x00000338, .p_memsz = 0x000030, },
+		{ .p_type = PT_NOTE, .p_vaddr = 0x00000368, .p_memsz = 0x000044, },
+		{ .p_type = PT_GNU_PROPERTY, .p_vaddr = 0x00000338, .p_memsz = 0x000030, },
+		{ .p_type = PT_GNU_EH_FRAME, .p_vaddr = 0x0000b490, .p_memsz = 0x0001ec, },
+		{ .p_type = PT_GNU_STACK, .p_vaddr = 0x00000000, .p_memsz = 0x000000, },
+		{ .p_type = PT_GNU_RELRO, .p_vaddr = 0x0000d330, .p_memsz = 0x000cd0, },
+	};
+	size_t mount_size = 0xE070;
+	/* https://lore.kernel.org/linux-fsdevel/YfF18Dy85mCntXrx@fractal.localdomain */
+	struct elf_phdr unordered[] = {
+		{ .p_type = PT_LOAD, .p_vaddr = 0x00000000, .p_memsz = 0x0033a8, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x0000d330, .p_memsz = 0x000d40, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x00004000, .p_memsz = 0x005c91, },
+		{ .p_type = PT_LOAD, .p_vaddr = 0x0000a000, .p_memsz = 0x0022f8, },
+	};
+
+	/* No headers, no size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(NULL, 0), 0);
+	KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 0), 0);
+	/* Empty headers, no size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 1), 0);
+	/* No PT_LOAD headers, no size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(&empty[1], 1), 0);
+	/* Empty PT_LOAD and non-PT_LOAD headers, no size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(empty, 2), 0);
+
+	/* Normal set of PT_LOADS, and expected size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(mount, ARRAY_SIZE(mount)), mount_size);
+	/* Unordered PT_LOADs result in same size. */
+	KUNIT_EXPECT_EQ(test, total_mapping_size(unordered, ARRAY_SIZE(unordered)), mount_size);
+}
+
+static struct kunit_case binfmt_elf_test_cases[] = {
+	KUNIT_CASE(total_mapping_size_test),
+	{},
+};
+
+static struct kunit_suite binfmt_elf_test_suite = {
+	.name = KBUILD_MODNAME,
+	.test_cases = binfmt_elf_test_cases,
+};
+
+kunit_test_suite(binfmt_elf_test_suite);
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 95e72d271b95b..8f0af4f626316 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -135,6 +135,8 @@
 #define elf_format		compat_elf_format
 #define init_elf_binfmt		init_compat_elf_binfmt
 #define exit_elf_binfmt		exit_compat_elf_binfmt
+#define binfmt_elf_test_cases	compat_binfmt_elf_test_cases
+#define binfmt_elf_test_suite	compat_binfmt_elf_test_suite
 
 /*
  * We share all the actual code with the native (64-bit) version.
-- 
GitLab


From bf9ad37dc8a30cce22ae95d6c2ca6abf8731d305 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 14 Jul 2015 14:26:34 +0200
Subject: [PATCH 0972/1586] signal, x86: Delay calling signals in atomic on RT
 enabled kernels

On x86_64 we must disable preemption before we enable interrupts
for stack faults, int3 and debugging, because the current task is using
a per CPU debug stack defined by the IST. If we schedule out, another task
can come in and use the same stack and cause the stack to be corrupted
and crash the kernel on return.

When CONFIG_PREEMPT_RT is enabled, spinlock_t locks become sleeping, and
one of these is the spin lock used in signal handling.

Some of the debug code (int3) causes do_trap() to send a signal.
This function calls a spinlock_t lock that has been converted to a
sleeping lock. If this happens, the above issues with the corrupted
stack is possible.

Instead of calling the signal right away, for PREEMPT_RT and x86,
the signal information is stored on the stacks task_struct and
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
code will send the signal when preemption is enabled.

[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to
  ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
[bigeasy: Add on 32bit as per Yang Shi, minor rewording. ]
[ tglx: Use a config option ]

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/Ygq5aBB/qMQw6aP5@linutronix.de
---
 arch/x86/Kconfig       |  1 +
 include/linux/sched.h  |  3 +++
 kernel/Kconfig.preempt | 12 +++++++++++-
 kernel/entry/common.c  | 14 ++++++++++++++
 kernel/signal.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f5bd41bf660c..d557ac29b6cd9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -120,6 +120,7 @@ config X86
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_HUGE_PMD_SHARE
 	select ARCH_WANT_LD_ORPHAN_WARN
+	select ARCH_WANTS_RT_DELAYED_SIGNALS
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select ARCH_HAS_PARANOID_L1D_FLUSH
 	select BUILDTIME_TABLE_SORT
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75ba8aa60248b..098e37fd770ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1087,6 +1087,9 @@ struct task_struct {
 	/* Restored if set_restore_sigmask() was used: */
 	sigset_t			saved_sigmask;
 	struct sigpending		pending;
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+	struct kernel_siginfo		forced_info;
+#endif
 	unsigned long			sas_ss_sp;
 	size_t				sas_ss_size;
 	unsigned int			sas_ss_flags;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index ce77f02656603..5644abd5f8a8d 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -132,4 +132,14 @@ config SCHED_CORE
 	  which is the likely usage by Linux distributions, there should
 	  be no measurable impact on performance.
 
-
+config ARCH_WANTS_RT_DELAYED_SIGNALS
+	bool
+	help
+	  This option is selected by architectures where raising signals
+	  can happen in atomic contexts on PREEMPT_RT enabled kernels. This
+	  option delays raising the signal until the return to user space
+	  loop where it is also delivered. X86 requires this to deliver
+	  signals from trap handlers which run on IST stacks.
+
+config RT_DELAYED_SIGNALS
+	def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index bad713684c2e3..0543a2c92f201 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work)
 	arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
 }
 
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline void raise_delayed_signal(void)
+{
+	if (unlikely(current->forced_info.si_signo)) {
+		force_sig_info(&current->forced_info);
+		current->forced_info.si_signo = 0;
+	}
+}
+#else
+static inline void raise_delayed_signal(void) { }
+#endif
+
 static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 					    unsigned long ti_work)
 {
@@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 		if (ti_work & _TIF_NEED_RESCHED)
 			schedule();
 
+		raise_delayed_signal();
+
 		if (ti_work & _TIF_UPROBE)
 			uprobe_notify_resume(regs);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8f..e93de6daa1889 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1307,6 +1307,43 @@ enum sig_handler {
 	HANDLER_EXIT,	 /* Only visible as the process exit code */
 };
 
+/*
+ * On some archictectures, PREEMPT_RT has to delay sending a signal from a
+ * trap since it cannot enable preemption, and the signal code's
+ * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME
+ * which will send the signal on exit of the trap.
+ */
+#ifdef CONFIG_RT_DELAYED_SIGNALS
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+				     struct task_struct *t)
+{
+	if (!in_atomic())
+		return false;
+
+	if (WARN_ON_ONCE(t->forced_info.si_signo))
+		return true;
+
+	if (is_si_special(info)) {
+		WARN_ON_ONCE(info != SEND_SIG_PRIV);
+		t->forced_info.si_signo = info->si_signo;
+		t->forced_info.si_errno = 0;
+		t->forced_info.si_code = SI_KERNEL;
+		t->forced_info.si_pid = 0;
+		t->forced_info.si_uid = 0;
+	} else {
+		t->forced_info = *info;
+	}
+	set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+	return true;
+}
+#else
+static inline bool force_sig_delayed(struct kernel_siginfo *info,
+				     struct task_struct *t)
+{
+	return false;
+}
+#endif
+
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
 	struct k_sigaction *action;
 	int sig = info->si_signo;
 
+	if (force_sig_delayed(info, t))
+		return 0;
+
 	spin_lock_irqsave(&t->sighand->siglock, flags);
 	action = &t->sighand->action[sig-1];
 	ignored = action->sa.sa_handler == SIG_IGN;
-- 
GitLab


From 1e364921b0085780153b1d03640ca41e1be83e8b Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Fri, 4 Mar 2022 08:51:29 +0100
Subject: [PATCH 0973/1586] irqchip/xilinx: Switch to GENERIC_IRQ_MULTI_HANDLER

Register the Xilinx driver as the root interrupt controller using
the GENERIC_IRQ_MULTI_HANDLER API, instead of the arch-specific hack.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Reviewed-by: Stefan Asserhall <stefan.asserhall@xilinx.com>
[maz: repainted commit message]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/e6c6595a81f662bf839cee3109d0fa58a596ea47.1646380284.git.michal.simek@xilinx.com
---
 arch/microblaze/Kconfig           |  2 ++
 arch/microblaze/include/asm/irq.h |  3 ---
 arch/microblaze/kernel/irq.c      | 16 +---------------
 drivers/irqchip/irq-xilinx-intc.c | 30 ++++++++++++++++--------------
 4 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 59798e43cdb03..da568e9816041 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -45,6 +45,8 @@ config MICROBLAZE
 	select SET_FS
 	select ZONE_DMA
 	select TRACE_IRQFLAGS_SUPPORT
+	select GENERIC_IRQ_MULTI_HANDLER
+	select HANDLE_DOMAIN_IRQ
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index 0a28e80bbab0b..cb6ab55d1d014 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -11,7 +11,4 @@
 struct pt_regs;
 extern void do_IRQ(struct pt_regs *regs);
 
-/* should be defined in each interrupt controller driver */
-extern unsigned int xintc_get_irq(void);
-
 #endif /* _ASM_MICROBLAZE_IRQ_H */
diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c
index 903dad822fad9..1f8cb4c4f74fd 100644
--- a/arch/microblaze/kernel/irq.c
+++ b/arch/microblaze/kernel/irq.c
@@ -20,27 +20,13 @@
 #include <linux/irqchip.h>
 #include <linux/of_irq.h>
 
-static u32 concurrent_irq;
-
 void __irq_entry do_IRQ(struct pt_regs *regs)
 {
-	unsigned int irq;
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	trace_hardirqs_off();
 
 	irq_enter();
-	irq = xintc_get_irq();
-next_irq:
-	BUG_ON(!irq);
-	generic_handle_irq(irq);
-
-	irq = xintc_get_irq();
-	if (irq != -1U) {
-		pr_debug("next irq: %d\n", irq);
-		++concurrent_irq;
-		goto next_irq;
-	}
-
+	handle_arch_irq(regs);
 	irq_exit();
 	set_irq_regs(old_regs);
 	trace_hardirqs_on();
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 356a59755d637..238d3d3449496 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -32,6 +32,8 @@
 #define MER_ME (1<<0)
 #define MER_HIE (1<<1)
 
+#define SPURIOUS_IRQ	(-1U)
+
 static DEFINE_STATIC_KEY_FALSE(xintc_is_be);
 
 struct xintc_irq_chip {
@@ -110,20 +112,6 @@ static struct irq_chip intc_dev = {
 	.irq_mask_ack = intc_mask_ack,
 };
 
-unsigned int xintc_get_irq(void)
-{
-	unsigned int irq = -1;
-	u32 hwirq;
-
-	hwirq = xintc_read(primary_intc, IVR);
-	if (hwirq != -1U)
-		irq = irq_find_mapping(primary_intc->root_domain, hwirq);
-
-	pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq);
-
-	return irq;
-}
-
 static int xintc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
 {
 	struct xintc_irq_chip *irqc = d->host_data;
@@ -164,6 +152,19 @@ static void xil_intc_irq_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+static void xil_intc_handle_irq(struct pt_regs *regs)
+{
+	u32 hwirq;
+
+	do {
+		hwirq = xintc_read(primary_intc, IVR);
+		if (unlikely(hwirq == SPURIOUS_IRQ))
+			break;
+
+		generic_handle_domain_irq(primary_intc->root_domain, hwirq);
+	} while (true);
+}
+
 static int __init xilinx_intc_of_init(struct device_node *intc,
 					     struct device_node *parent)
 {
@@ -233,6 +234,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
 	} else {
 		primary_intc = irqc;
 		irq_set_default_host(primary_intc->root_domain);
+		set_handle_irq(xil_intc_handle_irq);
 	}
 
 	return 0;
-- 
GitLab


From d6a3be863dcf8c51ba2d18d8fd47a1fadb1336aa Mon Sep 17 00:00:00 2001
From: Qianggui Song <qianggui.song@amlogic.com>
Date: Fri, 25 Feb 2022 13:52:03 +0800
Subject: [PATCH 0974/1586] dt-bindings: interrupt-controller: New binding for
 Meson-S4 SoCs

Update dt-binding document for GPIO interrupt controller of Meson-S4 SoCs

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Qianggui Song <qianggui.song@amlogic.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220225055207.1048-2-qianggui.song@amlogic.com
---
 .../bindings/interrupt-controller/amlogic,meson-gpio-intc.txt    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 23b18b92c5581..bde63f8f090e9 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -18,6 +18,7 @@ Required properties:
     "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
     "amlogic,meson-sm1-gpio-intc" for SM1 SoCs (S905D3, S905X3, S905Y3)
     "amlogic,meson-a1-gpio-intc" for A1 SoCs (A113L)
+    "amlogic,meson-s4-gpio-intc" for S4 SoCs (S802X2, S905Y4, S805X2G, S905W2)
 - reg : Specifies base physical address and size of the registers.
 - interrupt-controller : Identifies the node as an interrupt controller.
 - #interrupt-cells : Specifies the number of cells needed to encode an
-- 
GitLab


From cc311074f681443266ed9f5969a5b5a0e833c5bc Mon Sep 17 00:00:00 2001
From: Qianggui Song <qianggui.song@amlogic.com>
Date: Fri, 25 Feb 2022 13:52:04 +0800
Subject: [PATCH 0975/1586] irqchip/meson-gpio: support more than 8 channels
 gpio irq

Current meson gpio irqchip driver only support 8 channels for gpio irq
line, later chips may have more then 8 channels, so need to modify code
to support more.

Signed-off-by: Qianggui Song <qianggui.song@amlogic.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220225055207.1048-3-qianggui.song@amlogic.com
---
 drivers/irqchip/irq-meson-gpio.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index d90ff0b924807..63841e1578f5a 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -16,7 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
-#define NUM_CHANNEL 8
+#define MAX_NUM_CHANNEL 64
 #define MAX_INPUT_MUX 256
 
 #define REG_EDGE_POL	0x00
@@ -60,6 +60,7 @@ struct irq_ctl_ops {
 
 struct meson_gpio_irq_params {
 	unsigned int nr_hwirq;
+	unsigned int nr_channels;
 	bool support_edge_both;
 	unsigned int edge_both_offset;
 	unsigned int edge_single_offset;
@@ -81,6 +82,7 @@ struct meson_gpio_irq_params {
 	.edge_single_offset = 0,				\
 	.pol_low_offset = 16,					\
 	.pin_sel_mask = 0xff,					\
+	.nr_channels = 8,					\
 
 #define INIT_MESON_A1_COMMON_DATA(irqs)				\
 	INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init,		\
@@ -90,6 +92,7 @@ struct meson_gpio_irq_params {
 	.edge_single_offset = 8,				\
 	.pol_low_offset = 0,					\
 	.pin_sel_mask = 0x7f,					\
+	.nr_channels = 8,					\
 
 static const struct meson_gpio_irq_params meson8_params = {
 	INIT_MESON8_COMMON_DATA(134)
@@ -136,8 +139,8 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
 struct meson_gpio_irq_controller {
 	const struct meson_gpio_irq_params *params;
 	void __iomem *base;
-	u32 channel_irqs[NUM_CHANNEL];
-	DECLARE_BITMAP(channel_map, NUM_CHANNEL);
+	u32 channel_irqs[MAX_NUM_CHANNEL];
+	DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL);
 	spinlock_t lock;
 };
 
@@ -207,8 +210,8 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl,
 	spin_lock_irqsave(&ctl->lock, flags);
 
 	/* Find a free channel */
-	idx = find_first_zero_bit(ctl->channel_map, NUM_CHANNEL);
-	if (idx >= NUM_CHANNEL) {
+	idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels);
+	if (idx >= ctl->params->nr_channels) {
 		spin_unlock_irqrestore(&ctl->lock, flags);
 		pr_err("No channel available\n");
 		return -ENOSPC;
@@ -450,10 +453,10 @@ static int meson_gpio_irq_parse_dt(struct device_node *node, struct meson_gpio_i
 	ret = of_property_read_variable_u32_array(node,
 						  "amlogic,channel-interrupts",
 						  ctl->channel_irqs,
-						  NUM_CHANNEL,
-						  NUM_CHANNEL);
+						  ctl->params->nr_channels,
+						  ctl->params->nr_channels);
 	if (ret < 0) {
-		pr_err("can't get %d channel interrupts\n", NUM_CHANNEL);
+		pr_err("can't get %d channel interrupts\n", ctl->params->nr_channels);
 		return ret;
 	}
 
@@ -507,7 +510,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node *
 	}
 
 	pr_info("%d to %d gpio interrupt mux initialized\n",
-		ctl->params->nr_hwirq, NUM_CHANNEL);
+		ctl->params->nr_hwirq, ctl->params->nr_channels);
 
 	return 0;
 
-- 
GitLab


From be6692b923355206ab8cd3705d3ef0e0768adb90 Mon Sep 17 00:00:00 2001
From: Qianggui Song <qianggui.song@amlogic.com>
Date: Fri, 25 Feb 2022 13:52:05 +0800
Subject: [PATCH 0976/1586] irqchip/meson-gpio: add select trigger type
 callback

Due to some chips may use different registers and offset, provide
a set trigger type call back and add one for old controller.

Signed-off-by: Qianggui Song <qianggui.song@amlogic.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220225055207.1048-4-qianggui.song@amlogic.com
---
 drivers/irqchip/irq-meson-gpio.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 63841e1578f5a..7b5863e368167 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -51,11 +51,15 @@ static void meson_a1_gpio_irq_sel_pin(struct meson_gpio_irq_controller *ctl,
 				      unsigned int channel,
 				      unsigned long hwirq);
 static void meson_a1_gpio_irq_init(struct meson_gpio_irq_controller *ctl);
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				    unsigned int type, u32 *channel_hwirq);
 
 struct irq_ctl_ops {
 	void (*gpio_irq_sel_pin)(struct meson_gpio_irq_controller *ctl,
 				 unsigned int channel, unsigned long hwirq);
 	void (*gpio_irq_init)(struct meson_gpio_irq_controller *ctl);
+	int (*gpio_irq_set_type)(struct meson_gpio_irq_controller *ctl,
+				 unsigned int type, u32 *channel_hwirq);
 };
 
 struct meson_gpio_irq_params {
@@ -69,16 +73,18 @@ struct meson_gpio_irq_params {
 	struct irq_ctl_ops ops;
 };
 
-#define INIT_MESON_COMMON(irqs, init, sel)			\
+#define INIT_MESON_COMMON(irqs, init, sel, type)		\
 	.nr_hwirq = irqs,					\
 	.ops = {						\
 		.gpio_irq_init = init,				\
 		.gpio_irq_sel_pin = sel,			\
+		.gpio_irq_set_type = type,			\
 	},
 
 #define INIT_MESON8_COMMON_DATA(irqs)				\
 	INIT_MESON_COMMON(irqs, meson_gpio_irq_init_dummy,	\
-			  meson8_gpio_irq_sel_pin)		\
+			  meson8_gpio_irq_sel_pin,		\
+			  meson8_gpio_irq_set_type)		\
 	.edge_single_offset = 0,				\
 	.pol_low_offset = 16,					\
 	.pin_sel_mask = 0xff,					\
@@ -86,7 +92,8 @@ struct meson_gpio_irq_params {
 
 #define INIT_MESON_A1_COMMON_DATA(irqs)				\
 	INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init,		\
-			  meson_a1_gpio_irq_sel_pin)		\
+			  meson_a1_gpio_irq_sel_pin,		\
+			  meson8_gpio_irq_set_type)		\
 	.support_edge_both = true,				\
 	.edge_both_offset = 16,					\
 	.edge_single_offset = 8,				\
@@ -259,9 +266,8 @@ meson_gpio_irq_release_channel(struct meson_gpio_irq_controller *ctl,
 	clear_bit(idx, ctl->channel_map);
 }
 
-static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl,
-				     unsigned int type,
-				     u32 *channel_hwirq)
+static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				    unsigned int type, u32 *channel_hwirq)
 {
 	u32 val = 0;
 	unsigned int idx;
@@ -326,7 +332,7 @@ static int meson_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 	u32 *channel_hwirq = irq_data_get_irq_chip_data(data);
 	int ret;
 
-	ret = meson_gpio_irq_type_setup(ctl, type, channel_hwirq);
+	ret = ctl->params->ops.gpio_irq_set_type(ctl, type, channel_hwirq);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 0d2be10b3037c12d68003856b8d6c83d1b80e0c9 Mon Sep 17 00:00:00 2001
From: Andrew Halaney <ahalaney@redhat.com>
Date: Thu, 24 Feb 2022 15:23:12 -0600
Subject: [PATCH 0977/1586] Documentation/locking/locktypes: Fix PREEMPT_RT
 _bh() description

With PREEMPT_RT the _bh() version of a spinlock leaves preemption
enabled, align the doc to say that instead of the opposite.

Reported-by: Leah Leshchinsky <lleshchi@redhat.com>
Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20220224212312.2601153-1-ahalaney@redhat.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/locking/locktypes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst
index 4fd7b70fcde19..bfa75ea1b66a3 100644
--- a/Documentation/locking/locktypes.rst
+++ b/Documentation/locking/locktypes.rst
@@ -247,7 +247,7 @@ based on rt_mutex which changes the semantics:
    Non-PREEMPT_RT kernels disable preemption to get this effect.
 
    PREEMPT_RT kernels use a per-CPU lock for serialization which keeps
-   preemption disabled. The lock disables softirq handlers and also
+   preemption enabled. The lock disables softirq handlers and also
    prevents reentrancy due to task preemption.
 
 PREEMPT_RT kernels preserve all other spinlock_t semantics:
-- 
GitLab


From 50c63009f6ab37eb78d8b4f9bc606a12b2b46505 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 2 Mar 2022 11:39:08 -0800
Subject: [PATCH 0978/1586] f2fs: avoid an infinite loop in
 f2fs_sync_dirty_inodes

If one read IO is always failing, we can fall into an infinite loop in
f2fs_sync_dirty_inodes. This happens during xfstests/generic/475.

[  142.803335] Buffer I/O error on dev dm-1, logical block 8388592, async page read
...
[  382.887210]  submit_bio_noacct+0xdd/0x2a0
[  382.887213]  submit_bio+0x80/0x110
[  382.887223]  __submit_bio+0x4d/0x300 [f2fs]
[  382.887282]  f2fs_submit_page_bio+0x125/0x200 [f2fs]
[  382.887299]  __get_meta_page+0xc9/0x280 [f2fs]
[  382.887315]  f2fs_get_meta_page+0x13/0x20 [f2fs]
[  382.887331]  f2fs_get_node_info+0x317/0x3c0 [f2fs]
[  382.887350]  f2fs_do_write_data_page+0x327/0x6f0 [f2fs]
[  382.887367]  f2fs_write_single_data_page+0x5b7/0x960 [f2fs]
[  382.887386]  f2fs_write_cache_pages+0x302/0x890 [f2fs]
[  382.887405]  ? preempt_count_add+0x7a/0xc0
[  382.887408]  f2fs_write_data_pages+0xfd/0x320 [f2fs]
[  382.887425]  ? _raw_spin_unlock+0x1a/0x30
[  382.887428]  do_writepages+0xd3/0x1d0
[  382.887432]  filemap_fdatawrite_wbc+0x69/0x90
[  382.887434]  filemap_fdatawrite+0x50/0x70
[  382.887437]  f2fs_sync_dirty_inodes+0xa4/0x270 [f2fs]
[  382.887453]  f2fs_write_checkpoint+0x189/0x1640 [f2fs]
[  382.887469]  ? schedule_timeout+0x114/0x150
[  382.887471]  ? ttwu_do_activate+0x6d/0xb0
[  382.887473]  ? preempt_count_add+0x7a/0xc0
[  382.887476]  kill_f2fs_super+0xca/0x100 [f2fs]
[  382.887491]  deactivate_locked_super+0x35/0xa0
[  382.887494]  deactivate_super+0x40/0x50
[  382.887497]  cleanup_mnt+0x139/0x190
[  382.887499]  __cleanup_mnt+0x12/0x20
[  382.887501]  task_work_run+0x64/0xa0
[  382.887505]  exit_to_user_mode_prepare+0x1b7/0x1c0
[  382.887508]  syscall_exit_to_user_mode+0x27/0x50
[  382.887510]  do_syscall_64+0x48/0xc0
[  382.887513]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 7 +++++++
 fs/f2fs/f2fs.h       | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 203a1577942d3..871eee35a32fc 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -98,6 +98,13 @@ repeat:
 	}
 
 	if (unlikely(!PageUptodate(page))) {
+		if (page->index == sbi->metapage_eio_ofs &&
+			sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) {
+			set_ckpt_flags(sbi, CP_ERROR_FLAG);
+		} else {
+			sbi->metapage_eio_ofs = page->index;
+			sbi->metapage_eio_cnt = 0;
+		}
 		f2fs_put_page(page, 1);
 		return ERR_PTR(-EIO);
 	}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 47bf9e30913f9..efc4f1fe2ffd4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -577,6 +577,9 @@ enum {
 /* maximum retry quota flush count */
 #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
 
+/* maximum retry of EIO'ed meta page */
+#define MAX_RETRY_META_PAGE_EIO			100
+
 #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
 
 #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
@@ -1614,6 +1617,8 @@ struct f2fs_sb_info {
 	/* keep migration IO order for LFS mode */
 	struct f2fs_rwsem io_order_lock;
 	mempool_t *write_io_dummy;		/* Dummy pages */
+	pgoff_t metapage_eio_ofs;		/* EIO page offset */
+	int metapage_eio_cnt;			/* EIO count */
 
 	/* for checkpoint */
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
-- 
GitLab


From 7f8e249dccc4c530cf26c2c091f5bb64e701c262 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 3 Mar 2022 13:21:27 -0800
Subject: [PATCH 0979/1586] f2fs: introduce F2FS_UNFAIR_RWSEM to support unfair
 rwsem

Unfair rwsem should be used when blk-cg is on. Otherwise, there is regression.

FYI, we noticed a -26.7% regression of aim7.jobs-per-min due to commit:

commit: e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae ("f2fs: move f2fs to use reader-unfair rwsems")
https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master

in testcase: aim7
on test machine: 88 threads 2 sockets Intel(R) Xeon(R) Gold 6238M CPU @ 2.10GHz with 128G memory
with following parameters:

	disk: 4BRD_12G
	md: RAID0
	fs: f2fs
	test: sync_disk_rw
	load: 100
	cpufreq_governor: performance
	ucode: 0x500320a

test-description: AIM7 is a traditional UNIX system level benchmark suite which is used to test and measure the performance of multiuser system.
test-url: https://sourceforge.net/projects/aimbench/files/aim-suite7/

Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/Kconfig |  7 +++++++
 fs/f2fs/f2fs.h  | 10 ++++++++++
 2 files changed, 17 insertions(+)

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index f46a7339d6cf7..03ef087537c7c 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -143,3 +143,10 @@ config F2FS_IOSTAT
 	  Support getting IO statistics through sysfs and printing out periodic
 	  IO statistics tracepoint events. You have to turn on "iostat_enable"
 	  sysfs node to enable this feature.
+
+config F2FS_UNFAIR_RWSEM
+	bool "F2FS unfair rw_semaphore"
+	depends on F2FS_FS && BLK_CGROUP
+	help
+	  Use unfair rw_semaphore, if system configured IO priority by block
+	  cgroup.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index efc4f1fe2ffd4..68d791ec8b278 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -132,7 +132,9 @@ typedef u32 nid_t;
 
 struct f2fs_rwsem {
         struct rw_semaphore internal_rwsem;
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
         wait_queue_head_t read_waiters;
+#endif
 };
 
 struct f2fs_mount_info {
@@ -2131,7 +2133,9 @@ static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem,
 		const char *sem_name, struct lock_class_key *key)
 {
 	__init_rwsem(&sem->internal_rwsem, sem_name, key);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
 	init_waitqueue_head(&sem->read_waiters);
+#endif
 }
 
 static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem)
@@ -2146,7 +2150,11 @@ static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem)
 
 static inline void f2fs_down_read(struct f2fs_rwsem *sem)
 {
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
 	wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem));
+#else
+	down_read(&sem->internal_rwsem);
+#endif
 }
 
 static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem)
@@ -2181,7 +2189,9 @@ static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
 static inline void f2fs_up_write(struct f2fs_rwsem *sem)
 {
 	up_write(&sem->internal_rwsem);
+#ifdef CONFIG_F2FS_UNFAIR_RWSEM
 	wake_up_all(&sem->read_waiters);
+#endif
 }
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
-- 
GitLab


From 0d6356d6cdd0afeb546f4e8ca653748a8ec3ba26 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 28 Feb 2022 11:16:56 +0800
Subject: [PATCH 0980/1586] docs: fix 'make htmldocs' warning in perf

Fix following 'make htmldocs' warnings:
./Documentation/admin-guide/perf/hisi-pcie-pmu.rst: WARNING:
document isn't included in any toctree

Fixes: c8602008e247 ("docs: perf: Add description for HiSilicon PCIe PMU driver")
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/20220228031700.1669086-1-wanjiabing@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/perf/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 5a8f2529a0331..69b23f087c050 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -8,6 +8,7 @@ Performance monitor support
    :maxdepth: 1
 
    hisi-pmu
+   hisi-pcie-pmu
    imx-ddr
    qcom_l2_pmu
    qcom_l3_pmu
-- 
GitLab


From b4060db9251f919506e4d672737c6b8ab9a84701 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 23 Feb 2022 08:34:48 -0800
Subject: [PATCH 0981/1586] PM: runtime: Have devm_pm_runtime_enable() handle
 pm_runtime_dont_use_autosuspend()

The PM Runtime docs say:

  Drivers in ->remove() callback should undo the runtime PM changes done
  in ->probe(). Usually this means calling pm_runtime_disable(),
  pm_runtime_dont_use_autosuspend() etc.

From grepping code, it's clear that many people aren't aware of the
need to call pm_runtime_dont_use_autosuspend().

When brainstorming solutions, one idea that came up was to leverage
the new-ish devm_pm_runtime_enable() function. The idea here is that:

 * When the devm action is called we know that the driver is being
   removed. It's the perfect time to undo the use_autosuspend.

 * The code of pm_runtime_dont_use_autosuspend() already handles the
   case of being called when autosuspend wasn't enabled.

Suggested-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 5 +++++
 include/linux/pm_runtime.h   | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 2f3cce17219bd..d4059e6ffeaec 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1476,11 +1476,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
 static void pm_runtime_disable_action(void *data)
 {
+	pm_runtime_dont_use_autosuspend(data);
 	pm_runtime_disable(data);
 }
 
 /**
  * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
+ *
+ * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for
+ * you at driver exit time if needed.
+ *
  * @dev: Device to handle.
  */
 int devm_pm_runtime_enable(struct device *dev)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9f09601c465aa..2bff6a10095d1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -567,6 +567,10 @@ static inline void pm_runtime_disable(struct device *dev)
  * Allow the runtime PM autosuspend mechanism to be used for @dev whenever
  * requested (or "autosuspend" will be handled as direct runtime-suspend for
  * it).
+ *
+ * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend()
+ * at driver exit time unless your driver initially enabled pm_runtime
+ * with devm_pm_runtime_enable() (which handles it for you).
  */
 static inline void pm_runtime_use_autosuspend(struct device *dev)
 {
-- 
GitLab


From 9edf3c0ffef0ec1bed8300315852b5c6a0997130 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 2 Mar 2022 10:15:58 +0200
Subject: [PATCH 0982/1586] intel_idle: add SPR support

Add Sapphire Rapids Xeon support.

Up until very recently, the C1 and C1E C-states were independent, but this
has changed in some new chips, including Sapphire Rapids Xeon (SPR). In these
chips the C1 and C1E states cannot be enabled at the same time. The "C1E
promotion" bit in 'MSR_IA32_POWER_CTL' also has its semantics changed a bit.

Here are the C1, C1E, and "C1E promotion" bit rules on Xeons before SPR.

1. If C1E promotion bit is disabled.
   a. C1  requests end up with C1  C-state.
   b. C1E requests end up with C1E C-state.
2. If C1E promotion bit is enabled.
   a. C1  requests end up with C1E C-state.
   b. C1E requests end up with C1E C-state.

Here are the C1, C1E, and "C1E promotion" bit rules on Sapphire Rapids Xeon.
1. If C1E promotion bit is disabled.
   a. C1  requests end up with C1 C-state.
   b. C1E requests end up with C1 C-state.
2. If C1E promotion bit is enabled.
   a. C1  requests end up with C1E C-state.
   b. C1E requests end up with C1E C-state.

Before SPR Xeon, the 'intel_idle' driver was disabling C1E promotion and was
exposing C1 and C1E as independent C-states. But on SPR, C1 and C1E cannot be
enabled at the same time.

This patch adds both C1 and C1E states. However, C1E is marked as with the
"CPUIDLE_FLAG_UNUSABLE" flag, which means that in won't be registered by
default. The C1E promotion bit will be cleared, which means that by default
only C1 and C6 will be registered on SPR.

The next patch will add an option for enabling C1E and disabling C1 on SPR.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 47 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 0b66e25c0e2d5..1c7c25909e54a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -761,6 +761,46 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.enter = NULL }
 };
 
+/*
+ * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+ * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+ * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+ * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+ * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1 and disable C1E by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state spr_cstates[] __initdata = {
+	{
+		.name = "C1",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 1,
+		.target_residency = 1,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C1E",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \
+					   CPUIDLE_FLAG_UNUSABLE,
+		.exit_latency = 2,
+		.target_residency = 4,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C6",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 290,
+		.target_residency = 800,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.enter = NULL }
+};
+
 static struct cpuidle_state atom_cstates[] __initdata = {
 	{
 		.name = "C1E",
@@ -1104,6 +1144,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
 	.use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_spr __initconst = {
+	.state_table = spr_cstates,
+	.disable_promotion_to_c1e = true,
+	.use_acpi = true,
+};
+
 static const struct idle_cpu idle_cpu_avn __initconst = {
 	.state_table = avn_cstates,
 	.disable_promotion_to_c1e = true,
@@ -1166,6 +1212,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
+	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
-- 
GitLab


From da0e58c038e60e7e65d30813ebdfe91687aa8a24 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 2 Mar 2022 10:15:59 +0200
Subject: [PATCH 0983/1586] intel_idle: add 'preferred_cstates' module argument

On Sapphire Rapids Xeon (SPR) the C1 and C1E states are basically mutually
exclusive - only one of them can be enabled. By default, 'intel_idle' driver
enables C1 and disables C1E. However, some users prefer to use C1E instead of
C1, because it saves more energy.

This patch adds a new module parameter ('preferred_cstates') for enabling C1E
and disabling C1. Here is the idea behind it.

1. This option has effect only for "mutually exclusive" C-states like C1 and
   C1E on SPR.
2. It does not have any effect on independent C-states, which do not require
   other C-states to be disabled (most states on most platforms as of today).
3. For mutually exclusive C-states, the 'intel_idle' driver always has a
   reasonable default, such as enabling C1 on SPR by default. On other
   platforms, the default may be different.
4. Users can override the default using the 'preferred_cstates' parameter.
5. The parameter accepts the preferred C-states bit-mask, similarly to the
   existing 'states_off' parameter.
6. This parameter is not limited to C1/C1E, and leaves room for supporting
   other mutually exclusive C-states, if they come in the future.

Today 'intel_idle' can only be compiled-in, which means that on SPR, in order
to disable C1 and enable C1E, users should boot with the following kernel
argument: intel_idle.preferred_cstates=4

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 46 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 1c7c25909e54a..b2688c326522c 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -64,6 +64,7 @@ static struct cpuidle_driver intel_idle_driver = {
 /* intel_idle.max_cstate=0 disables driver */
 static int max_cstate = CPUIDLE_STATE_MAX - 1;
 static unsigned int disabled_states_mask;
+static unsigned int preferred_states_mask;
 
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 
@@ -1400,6 +1401,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
 
+static void c1e_promotion_enable(void);
+
 /**
  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
  *
@@ -1570,6 +1573,26 @@ static void __init skx_idle_state_table_update(void)
 	}
 }
 
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+	/* Check if user prefers C1E over C1. */
+	if (preferred_states_mask & BIT(2)) {
+		if (preferred_states_mask & BIT(1))
+			/* Both can't be enabled, stick to the defaults. */
+			return;
+
+		spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+		spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+
+		/* Enable C1E using the "C1E promotion" bit. */
+		c1e_promotion_enable();
+		disable_promotion_to_c1e = false;
+	}
+}
+
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 {
 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1604,6 +1627,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	case INTEL_FAM6_SKYLAKE_X:
 		skx_idle_state_table_update();
 		break;
+	case INTEL_FAM6_SAPPHIRERAPIDS_X:
+		spr_idle_state_table_update();
+		break;
 	}
 
 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1676,6 +1702,15 @@ static void auto_demotion_disable(void)
 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 }
 
+static void c1e_promotion_enable(void)
+{
+	unsigned long long msr_bits;
+
+	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+	msr_bits |= 0x2;
+	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
+
 static void c1e_promotion_disable(void)
 {
 	unsigned long long msr_bits;
@@ -1845,3 +1880,14 @@ module_param(max_cstate, int, 0444);
  */
 module_param_named(states_off, disabled_states_mask, uint, 0444);
 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
+/*
+ * Some platforms come with mutually exclusive C-states, so that if one is
+ * enabled, the other C-states must not be used. Example: C1 and C1E on
+ * Sapphire Rapids platform. This parameter allows for selecting the
+ * preferred C-states among the groups of mutually exclusive C-states - the
+ * selected C-states will be registered, the other C-states from the mutually
+ * exclusive group won't be registered. If the platform has no mutually
+ * exclusive C-states, this parameter has no effect.
+ */
+module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
+MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
-- 
GitLab


From 3a9cf77b60dc9839b6674943bb7c9dcd524b6294 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 2 Mar 2022 10:16:00 +0200
Subject: [PATCH 0984/1586] intel_idle: add core C6 optimization for SPR

Add a Sapphire Rapids Xeon C6 optimization, similar to what we have for Sky Lake
Xeon: if package C6 is disabled, adjust C6 exit latency and target residency to
match core C6 values, instead of using the default package C6 values.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b2688c326522c..e385ddf15b323 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1578,6 +1578,8 @@ static void __init skx_idle_state_table_update(void)
  */
 static void __init spr_idle_state_table_update(void)
 {
+	unsigned long long msr;
+
 	/* Check if user prefers C1E over C1. */
 	if (preferred_states_mask & BIT(2)) {
 		if (preferred_states_mask & BIT(1))
@@ -1591,6 +1593,19 @@ static void __init spr_idle_state_table_update(void)
 		c1e_promotion_enable();
 		disable_promotion_to_c1e = false;
 	}
+
+	/*
+	 * By default, the C6 state assumes the worst-case scenario of package
+	 * C6. However, if PC6 is disabled, we update the numbers to match
+	 * core C6.
+	 */
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/* Limit value 2 and above allow for PC6. */
+	if ((msr & 0x7) < 2) {
+		spr_cstates[2].exit_latency = 190;
+		spr_cstates[2].target_residency = 600;
+	}
 }
 
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
-- 
GitLab


From 143a70b8b4300faa92ad82468f65dccd440e7957 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:18:56 +0300
Subject: [PATCH 0985/1586] iss-simdisk: use bvec_kmap_local in
 simdisk_submit_bio

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Link: https://lore.kernel.org/r/20220303111905.321089-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/xtensa/platforms/iss/simdisk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 8eb6ad1a3a1de..0f0e0724397f4 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -108,13 +108,13 @@ static void simdisk_submit_bio(struct bio *bio)
 	sector_t sector = bio->bi_iter.bi_sector;
 
 	bio_for_each_segment(bvec, bio, iter) {
-		char *buffer = kmap_atomic(bvec.bv_page) + bvec.bv_offset;
+		char *buffer = bvec_kmap_local(&bvec);
 		unsigned len = bvec.bv_len >> SECTOR_SHIFT;
 
 		simdisk_transfer(dev, sector, len, buffer,
 				bio_data_dir(bio) == WRITE);
 		sector += len;
-		kunmap_atomic(buffer);
+		kunmap_local(buffer);
 	}
 
 	bio_endio(bio);
-- 
GitLab


From b7ab4611b6c793100197abc93e069d6f9aab7960 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:18:57 +0300
Subject: [PATCH 0986/1586] aoe: use bvec_kmap_local in bvcpy

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220303111905.321089-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/aoe/aoecmd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index cc11f89a0928f..384073ef2323c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1018,9 +1018,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
 	iter.bi_size = cnt;
 
 	__bio_for_each_segment(bv, bio, iter, iter) {
-		char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+		char *p = bvec_kmap_local(&bv);
 		skb_copy_bits(skb, soff, p, bv.bv_len);
-		kunmap_atomic(p);
+		kunmap_local(p);
 		soff += bv.bv_len;
 	}
 }
-- 
GitLab


From b3bd0a8a74ab970cc1cf0849e66bd0906741105b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:18:58 +0300
Subject: [PATCH 0987/1586] zram: use memcpy_to_bvec in zram_bvec_read

Use the proper helper instead of open coding the copy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/zram/zram_drv.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index a3a5e1e713268..14becdf2815df 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1331,12 +1331,10 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 		goto out;
 
 	if (is_partial_io(bvec)) {
-		void *dst = kmap_atomic(bvec->bv_page);
 		void *src = kmap_atomic(page);
 
-		memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
+		memcpy_to_bvec(bvec, src + offset);
 		kunmap_atomic(src);
-		kunmap_atomic(dst);
 	}
 out:
 	if (is_partial_io(bvec))
-- 
GitLab


From bd3d3203eb84d08a6daef805efe9316b79d3bf3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:18:59 +0300
Subject: [PATCH 0988/1586] zram: use memcpy_from_bvec in zram_bvec_write

Use memcpy_from_bvec instead of open coding the logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/zram/zram_drv.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 14becdf2815df..e9474b02012de 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1465,7 +1465,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 {
 	int ret;
 	struct page *page = NULL;
-	void *src;
 	struct bio_vec vec;
 
 	vec = *bvec;
@@ -1483,11 +1482,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 		if (ret)
 			goto out;
 
-		src = kmap_atomic(bvec->bv_page);
 		dst = kmap_atomic(page);
-		memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
+		memcpy_from_bvec(dst + offset, bvec);
 		kunmap_atomic(dst);
-		kunmap_atomic(src);
 
 		vec.bv_page = page;
 		vec.bv_len = PAGE_SIZE;
-- 
GitLab


From 20072ec828640b7d23a0cfdbccf0dea48e77ba3e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:00 +0300
Subject: [PATCH 0989/1586] nvdimm-blk: use bvec_kmap_local in
 nd_blk_rw_integrity

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/blk.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index c1db43524d755..0a38738335941 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -88,10 +88,9 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 		 */
 
 		cur_len = min(len, bv.bv_len);
-		iobuf = kmap_atomic(bv.bv_page);
-		err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
-				cur_len, rw);
-		kunmap_atomic(iobuf);
+		iobuf = bvec_kmap_local(&bv);
+		err = ndbr->do_io(ndbr, dev_offset, iobuf, cur_len, rw);
+		kunmap_local(iobuf);
 		if (err)
 			return err;
 
-- 
GitLab


From 3205190655ea56ea5e00815eeff4dab2bde0af80 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:01 +0300
Subject: [PATCH 0990/1586] nvdimm-btt: use bvec_kmap_local in btt_rw_integrity

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/btt.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index cbd994f7f1fe6..9613e54c7a675 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1163,17 +1163,15 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 		 */
 
 		cur_len = min(len, bv.bv_len);
-		mem = kmap_atomic(bv.bv_page);
+		mem = bvec_kmap_local(&bv);
 		if (rw)
-			ret = arena_write_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len,
+			ret = arena_write_bytes(arena, meta_nsoff, mem, cur_len,
 					NVDIMM_IO_ATOMIC);
 		else
-			ret = arena_read_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len,
+			ret = arena_read_bytes(arena, meta_nsoff, mem, cur_len,
 					NVDIMM_IO_ATOMIC);
 
-		kunmap_atomic(mem);
+		kunmap_local(mem);
 		if (ret)
 			return ret;
 
-- 
GitLab


From 07fee7aba5472d0e65345146a68b4bd1a8b656c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:02 +0300
Subject: [PATCH 0991/1586] bcache: use bvec_kmap_local in bio_csum

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6869e010475a3..fdd0194f84dd0 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k)
 	uint64_t csum = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
-		void *d = kmap(bv.bv_page) + bv.bv_offset;
+		void *d = bvec_kmap_local(&bv);
 
 		csum = crc64_be(csum, d, bv.bv_len);
-		kunmap(bv.bv_page);
+		kunmap_local(d);
 	}
 
 	k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
-- 
GitLab


From 472278508dce25316e806e45778658c3e4b353b3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:03 +0300
Subject: [PATCH 0992/1586] drbd: use bvec_kmap_local in drbd_csum_bio

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_worker.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a5e04b38006b6..1b48c8172a077 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -326,9 +326,9 @@ void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
 	bio_for_each_segment(bvec, bio, iter) {
 		u8 *src;
 
-		src = kmap_atomic(bvec.bv_page);
-		crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
-		kunmap_atomic(src);
+		src = bvec_kmap_local(&bvec);
+		crypto_shash_update(desc, src, bvec.bv_len);
+		kunmap_local(src);
 
 		/* REQ_OP_WRITE_SAME has only one segment,
 		 * checksum the payload only once. */
-- 
GitLab


From 3eddaa60b8411c135d1c71090dea9b59ff3f2e26 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:04 +0300
Subject: [PATCH 0993/1586] drbd: use bvec_kmap_local in recv_dless_read

Using local kmaps slightly reduces the chances to stray writes, and
the bvec interface cleans up the code a little bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 04e3ec12d8b49..fa00cf2ea9529 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2017,10 +2017,10 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
 
 	bio_for_each_segment(bvec, bio, iter) {
-		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+		void *mapped = bvec_kmap_local(&bvec);
 		expect = min_t(int, data_size, bvec.bv_len);
 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
-		kunmap(bvec.bv_page);
+		kunmap_local(mapped);
 		if (err)
 			return err;
 		data_size -= expect;
-- 
GitLab


From 13d4ef0f66b7ee9415e101e213acaf94a0cb28ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2022 14:19:05 +0300
Subject: [PATCH 0994/1586] floppy: use memcpy_{to,from}_bvec

Use the helpers instead of open coding them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220303111905.321089-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/floppy.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 19c2d0327e157..8c647532e3ce9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2485,11 +2485,9 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 		}
 
 		if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
-			memcpy_to_page(bv.bv_page, bv.bv_offset, dma_buffer,
-				       size);
+			memcpy_to_bvec(&bv, dma_buffer);
 		else
-			memcpy_from_page(dma_buffer, bv.bv_page, bv.bv_offset,
-					 size);
+			memcpy_from_bvec(dma_buffer, &bv);
 
 		remaining -= size;
 		dma_buffer += size;
-- 
GitLab


From ab552fcb17cc9e4afe0e4ac4df95fc7b30e8490a Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Thu, 3 Mar 2022 15:03:34 +0800
Subject: [PATCH 0995/1586] bfq: fix use-after-free in bfq_dispatch_request

KASAN reports a use-after-free report when doing normal scsi-mq test

[69832.239032] ==================================================================
[69832.241810] BUG: KASAN: use-after-free in bfq_dispatch_request+0x1045/0x44b0
[69832.243267] Read of size 8 at addr ffff88802622ba88 by task kworker/3:1H/155
[69832.244656]
[69832.245007] CPU: 3 PID: 155 Comm: kworker/3:1H Not tainted 5.10.0-10295-g576c6382529e #8
[69832.246626] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[69832.249069] Workqueue: kblockd blk_mq_run_work_fn
[69832.250022] Call Trace:
[69832.250541]  dump_stack+0x9b/0xce
[69832.251232]  ? bfq_dispatch_request+0x1045/0x44b0
[69832.252243]  print_address_description.constprop.6+0x3e/0x60
[69832.253381]  ? __cpuidle_text_end+0x5/0x5
[69832.254211]  ? vprintk_func+0x6b/0x120
[69832.254994]  ? bfq_dispatch_request+0x1045/0x44b0
[69832.255952]  ? bfq_dispatch_request+0x1045/0x44b0
[69832.256914]  kasan_report.cold.9+0x22/0x3a
[69832.257753]  ? bfq_dispatch_request+0x1045/0x44b0
[69832.258755]  check_memory_region+0x1c1/0x1e0
[69832.260248]  bfq_dispatch_request+0x1045/0x44b0
[69832.261181]  ? bfq_bfqq_expire+0x2440/0x2440
[69832.262032]  ? blk_mq_delay_run_hw_queues+0xf9/0x170
[69832.263022]  __blk_mq_do_dispatch_sched+0x52f/0x830
[69832.264011]  ? blk_mq_sched_request_inserted+0x100/0x100
[69832.265101]  __blk_mq_sched_dispatch_requests+0x398/0x4f0
[69832.266206]  ? blk_mq_do_dispatch_ctx+0x570/0x570
[69832.267147]  ? __switch_to+0x5f4/0xee0
[69832.267898]  blk_mq_sched_dispatch_requests+0xdf/0x140
[69832.268946]  __blk_mq_run_hw_queue+0xc0/0x270
[69832.269840]  blk_mq_run_work_fn+0x51/0x60
[69832.278170]  process_one_work+0x6d4/0xfe0
[69832.278984]  worker_thread+0x91/0xc80
[69832.279726]  ? __kthread_parkme+0xb0/0x110
[69832.280554]  ? process_one_work+0xfe0/0xfe0
[69832.281414]  kthread+0x32d/0x3f0
[69832.282082]  ? kthread_park+0x170/0x170
[69832.282849]  ret_from_fork+0x1f/0x30
[69832.283573]
[69832.283886] Allocated by task 7725:
[69832.284599]  kasan_save_stack+0x19/0x40
[69832.285385]  __kasan_kmalloc.constprop.2+0xc1/0xd0
[69832.286350]  kmem_cache_alloc_node+0x13f/0x460
[69832.287237]  bfq_get_queue+0x3d4/0x1140
[69832.287993]  bfq_get_bfqq_handle_split+0x103/0x510
[69832.289015]  bfq_init_rq+0x337/0x2d50
[69832.289749]  bfq_insert_requests+0x304/0x4e10
[69832.290634]  blk_mq_sched_insert_requests+0x13e/0x390
[69832.291629]  blk_mq_flush_plug_list+0x4b4/0x760
[69832.292538]  blk_flush_plug_list+0x2c5/0x480
[69832.293392]  io_schedule_prepare+0xb2/0xd0
[69832.294209]  io_schedule_timeout+0x13/0x80
[69832.295014]  wait_for_common_io.constprop.1+0x13c/0x270
[69832.296137]  submit_bio_wait+0x103/0x1a0
[69832.296932]  blkdev_issue_discard+0xe6/0x160
[69832.297794]  blk_ioctl_discard+0x219/0x290
[69832.298614]  blkdev_common_ioctl+0x50a/0x1750
[69832.304715]  blkdev_ioctl+0x470/0x600
[69832.305474]  block_ioctl+0xde/0x120
[69832.306232]  vfs_ioctl+0x6c/0xc0
[69832.306877]  __se_sys_ioctl+0x90/0xa0
[69832.307629]  do_syscall_64+0x2d/0x40
[69832.308362]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[69832.309382]
[69832.309701] Freed by task 155:
[69832.310328]  kasan_save_stack+0x19/0x40
[69832.311121]  kasan_set_track+0x1c/0x30
[69832.311868]  kasan_set_free_info+0x1b/0x30
[69832.312699]  __kasan_slab_free+0x111/0x160
[69832.313524]  kmem_cache_free+0x94/0x460
[69832.314367]  bfq_put_queue+0x582/0x940
[69832.315112]  __bfq_bfqd_reset_in_service+0x166/0x1d0
[69832.317275]  bfq_bfqq_expire+0xb27/0x2440
[69832.318084]  bfq_dispatch_request+0x697/0x44b0
[69832.318991]  __blk_mq_do_dispatch_sched+0x52f/0x830
[69832.319984]  __blk_mq_sched_dispatch_requests+0x398/0x4f0
[69832.321087]  blk_mq_sched_dispatch_requests+0xdf/0x140
[69832.322225]  __blk_mq_run_hw_queue+0xc0/0x270
[69832.323114]  blk_mq_run_work_fn+0x51/0x60
[69832.323942]  process_one_work+0x6d4/0xfe0
[69832.324772]  worker_thread+0x91/0xc80
[69832.325518]  kthread+0x32d/0x3f0
[69832.326205]  ret_from_fork+0x1f/0x30
[69832.326932]
[69832.338297] The buggy address belongs to the object at ffff88802622b968
[69832.338297]  which belongs to the cache bfq_queue of size 512
[69832.340766] The buggy address is located 288 bytes inside of
[69832.340766]  512-byte region [ffff88802622b968, ffff88802622bb68)
[69832.343091] The buggy address belongs to the page:
[69832.344097] page:ffffea0000988a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88802622a528 pfn:0x26228
[69832.346214] head:ffffea0000988a00 order:2 compound_mapcount:0 compound_pincount:0
[69832.347719] flags: 0x1fffff80010200(slab|head)
[69832.348625] raw: 001fffff80010200 ffffea0000dbac08 ffff888017a57650 ffff8880179fe840
[69832.354972] raw: ffff88802622a528 0000000000120008 00000001ffffffff 0000000000000000
[69832.356547] page dumped because: kasan: bad access detected
[69832.357652]
[69832.357970] Memory state around the buggy address:
[69832.358926]  ffff88802622b980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[69832.360358]  ffff88802622ba00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[69832.361810] >ffff88802622ba80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[69832.363273]                       ^
[69832.363975]  ffff88802622bb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc
[69832.375960]  ffff88802622bb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[69832.377405] ==================================================================

In bfq_dispatch_requestfunction, it may have function call:

bfq_dispatch_request
	__bfq_dispatch_request
		bfq_select_queue
			bfq_bfqq_expire
				__bfq_bfqd_reset_in_service
					bfq_put_queue
						kmem_cache_free
In this function call, in_serv_queue has beed expired and meet the
conditions to free. In the function bfq_dispatch_request, the address
of in_serv_queue pointing to has been released. For getting the value
of idle_timer_disabled, it will get flags value from the address which
in_serv_queue pointing to, then the problem of use-after-free happens;

Fix the problem by check in_serv_queue == bfqd->in_service_queue, to
get the value of idle_timer_disabled if in_serve_queue is equel to
bfqd->in_service_queue. If the space of in_serv_queue pointing has
been released, this judge will aviod use-after-free problem.
And if in_serv_queue may be expired or finished, the idle_timer_disabled
will be false which would not give effects to bfq_update_dispatch_stats.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Link: https://lore.kernel.org/r/20220303070334.3020168-1-zhangwensheng5@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2f2b97cad9802..e234eb408ed0a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5181,7 +5181,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 	struct request *rq;
 	struct bfq_queue *in_serv_queue;
-	bool waiting_rq, idle_timer_disabled;
+	bool waiting_rq, idle_timer_disabled = false;
 
 	spin_lock_irq(&bfqd->lock);
 
@@ -5189,14 +5189,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
 
 	rq = __bfq_dispatch_request(hctx);
-
-	idle_timer_disabled =
-		waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+	if (in_serv_queue == bfqd->in_service_queue) {
+		idle_timer_disabled =
+			waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+	}
 
 	spin_unlock_irq(&bfqd->lock);
-
-	bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue,
-				  idle_timer_disabled);
+	bfq_update_dispatch_stats(hctx->queue, rq,
+			idle_timer_disabled ? in_serv_queue : NULL,
+				idle_timer_disabled);
 
 	return rq;
 }
-- 
GitLab


From 7b1002f7cfe581930f63787a0b3de0144e61ed55 Mon Sep 17 00:00:00 2001
From: Mingzhe Zou <mingzhe.zou@easystack.cn>
Date: Fri, 7 Jan 2022 16:21:13 +0800
Subject: [PATCH 0996/1586] bcache: fixup bcache_dev_sectors_dirty_add()
 multithreaded CPU false sharing

When attaching a cached device (a.k.a backing device) to a cache
device, bch_sectors_dirty_init() is called to count dirty sectors
and stripes (see what bcache_dev_sectors_dirty_add() does) on the
cache device.

When bcache_dev_sectors_dirty_add() is called, set_bit(stripe,
d->full_dirty_stripes) or clear_bit(stripe, d->full_dirty_stripes)
operation will always be performed. In full_dirty_stripes, each 1bit
represents stripe_size (8192) sectors (512B), so 1bit=4MB (8192*512),
and each CPU cache line=64B=512bit=2048MB. When 20 threads process
a cached disk with 100G dirty data, a single thread processes about
23M at a time, and 20 threads total 460M. These full_dirty_stripes
bits corresponding to the 460M data is likely to fall in the same CPU
cache line. When one of these threads performs a set_bit or clear_bit
operation, the same CPU cache line of other threads will become invalid
and must read the full_dirty_stripes from the main memory again. Compared
with single thread, the time of a bcache_dev_sectors_dirty_add()
call is increased by about 50 times in our test (100G dirty data,
20 threads, bcache_dev_sectors_dirty_add() is called more than
20 million times).

This patch tries to test_bit before set_bit or clear_bit operation.
Therefore, a lot of force set and clear operations will be avoided,
and most of bcache_dev_sectors_dirty_add() calls will only read CPU
cache line.

Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Signed-off-by: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/writeback.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d42301e6309d4..176461f89f46f 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -585,10 +585,13 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
 
 		sectors_dirty = atomic_add_return(s,
 					d->stripe_sectors_dirty + stripe);
-		if (sectors_dirty == d->stripe_size)
-			set_bit(stripe, d->full_dirty_stripes);
-		else
-			clear_bit(stripe, d->full_dirty_stripes);
+		if (sectors_dirty == d->stripe_size) {
+			if (!test_bit(stripe, d->full_dirty_stripes))
+				set_bit(stripe, d->full_dirty_stripes);
+		} else {
+			if (test_bit(stripe, d->full_dirty_stripes))
+				clear_bit(stripe, d->full_dirty_stripes);
+		}
 
 		nr_sectors -= s;
 		stripe_offset = 0;
-- 
GitLab


From 887554ab96588de2917b6c8c73e552da082e5368 Mon Sep 17 00:00:00 2001
From: Mingzhe Zou <mingzhe.zou@easystack.cn>
Date: Fri, 11 Feb 2022 14:39:15 +0800
Subject: [PATCH 0997/1586] bcache: fixup multiple threads crash

When multiple threads to check btree nodes in parallel, the main
thread wait for all threads to stop or CACHE_SET_IO_DISABLE flag:

wait_event_interruptible(check_state->wait,
                         atomic_read(&check_state->started) == 0 ||
                         test_bit(CACHE_SET_IO_DISABLE, &c->flags));

However, the bch_btree_node_read and bch_btree_node_read_done
maybe call bch_cache_set_error, then the CACHE_SET_IO_DISABLE
will be set. If the flag already set, the main thread return
error. At the same time, maybe some threads still running and
read NULL pointer, the kernel will crash.

This patch change the event wait condition, the main thread must
wait for all threads to stop.

Fixes: 8e7102273f597 ("bcache: make bch_btree_check() to be multithreaded")
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/btree.c     | 6 ++++--
 drivers/md/bcache/writeback.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 88c573eeb5982..ad9f16689419d 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c)
 		}
 	}
 
+	/*
+	 * Must wait for all threads to stop.
+	 */
 	wait_event_interruptible(check_state->wait,
-				 atomic_read(&check_state->started) == 0 ||
-				  test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+				 atomic_read(&check_state->started) == 0);
 
 	for (i = 0; i < check_state->total_threads; i++) {
 		if (check_state->infos[i].result) {
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 176461f89f46f..9ee0005874cda 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -1001,9 +1001,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 		}
 	}
 
+	/*
+	 * Must wait for all threads to stop.
+	 */
 	wait_event_interruptible(state->wait,
-		 atomic_read(&state->started) == 0 ||
-		 test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+		 atomic_read(&state->started) == 0);
 
 out:
 	kfree(state);
-- 
GitLab


From 9729cad0278b4fdde172bb7b5da65bd1a0e0fb8b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 11 Jan 2022 18:48:02 +0100
Subject: [PATCH 0998/1586] dt-bindings: extcon: maxim,max77843: Add MAX77843
 bindings

Document the bindings for MAX77843 MUIC/extcon driver, based on
Exynos5433 TM2 devicetree.  These are neither accurate nor finished
bindings but at least allow parsing existing DTS files.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220111174805.223732-3-krzysztof.kozlowski@canonical.com
---
 .../bindings/extcon/maxim,max77843.yaml       | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/extcon/maxim,max77843.yaml

diff --git a/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml
new file mode 100644
index 0000000000000..f9ffe3d6f9575
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/maxim,max77843.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/extcon/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC Extcon
+
+maintainers:
+  - Chanwoo Choi <cw00.choi@samsung.com>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX77843 MicroUSB
+  Integrated Circuit (MUIC).
+
+  See also Documentation/devicetree/bindings/mfd/maxim,max77843.yaml for
+  additional information and example.
+
+properties:
+  compatible:
+    const: maxim,max77843-muic
+
+  connector:
+    $ref: /schemas/connector/usb-connector.yaml#
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/port
+    description:
+      Any connector to the data bus of this controller should be modelled using
+      the OF graph bindings specified
+    properties:
+      port:
+        $ref: /schemas/graph.yaml#/properties/port
+
+required:
+  - compatible
+  - connector
+
+additionalProperties: false
-- 
GitLab


From 77357c50e4003f38e3e058fa64ae5e84dec414a3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 11 Jan 2022 18:48:03 +0100
Subject: [PATCH 0999/1586] regulator: dt-bindings: maxim,max77843: Add
 MAX77843 bindings

Document the bindings for MAX77843 regulator driver.  The bindings are
almost identical to MAX77693 bindings, except the actual names of
regulators.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220111174805.223732-4-krzysztof.kozlowski@canonical.com
---
 .../bindings/regulator/maxim,max77843.yaml    | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max77843.yaml

diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml
new file mode 100644
index 0000000000000..a963025e96c12
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max77843.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC regulators
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX77843 MicroUSB Integrated
+  Circuit (MUIC).
+
+  See also Documentation/devicetree/bindings/mfd/maxim,max77843.yaml for
+  additional information and example.
+
+properties:
+  compatible:
+    const: maxim,max77843-regulator
+
+  CHARGER:
+    type: object
+    $ref: regulator.yaml#
+    additionalProperties: false
+    description: |
+      Current regulator.
+
+    properties:
+      regulator-name: true
+      regulator-always-on: true
+      regulator-boot-on: true
+      regulator-min-microamp:
+        minimum: 100000
+      regulator-max-microamp:
+        maximum: 3150000
+
+    required:
+      - regulator-name
+
+patternProperties:
+  "^SAFEOUT[12]$":
+    type: object
+    $ref: regulator.yaml#
+    additionalProperties: false
+    description: |
+      Safeout LDO regulator.
+
+    properties:
+      regulator-name: true
+      regulator-always-on: true
+      regulator-boot-on: true
+      regulator-min-microvolt:
+        minimum: 3300000
+      regulator-max-microvolt:
+        maximum: 4950000
+
+    required:
+      - regulator-name
+
+required:
+  - compatible
+
+additionalProperties: false
-- 
GitLab


From 4fddb6211da1749a5fb683a33d7d4890d1290b1b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 11 Jan 2022 18:48:04 +0100
Subject: [PATCH 1000/1586] dt-bindings: mfd: maxim,max77843: Add MAX77843
 bindings

Document the bindings for MAX77843 MFD driver, based on Exynos5433 TM2
devicetree.  These are neither accurate nor finished bindings but at
least allow parsing existing DTS files.

The example DTS was copied from existing DTS
(exynos5433-tm2-common.dtsi), so keep the license as GPL-2.0-only.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220111174805.223732-5-krzysztof.kozlowski@canonical.com
---
 .../bindings/mfd/maxim,max77843.yaml          | 144 ++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/maxim,max77843.yaml

diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml
new file mode 100644
index 0000000000000..61a0f9dcb9837
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max77843.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max77843.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77843 MicroUSB and Companion Power Management IC
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX77843 MicroUSB
+  Integrated Circuit (MUIC).
+
+  The Maxim MAX77843 is a MicroUSB and Companion Power Management IC which
+  includes voltage current regulators, charger, fuel-gauge, haptic motor driver
+  and MicroUSB management IC.
+
+properties:
+  compatible:
+    const: maxim,max77843
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+  extcon:
+    $ref: /schemas/extcon/maxim,max77843.yaml
+
+  motor-driver:
+    type: object
+    properties:
+      compatible:
+        const: maxim,max77843-haptic
+
+      haptic-supply:
+        description: Power supply to the haptic motor
+
+      pwms:
+        maxItems: 1
+
+    required:
+      - compatible
+      - haptic-supply
+      - pwms
+
+  regulators:
+    $ref: /schemas/regulator/maxim,max77843.yaml
+
+required:
+  - compatible
+  - interrupts
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@66 {
+            compatible = "maxim,max77843";
+            interrupt-parent = <&gpa1>;
+            interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+            reg = <0x66>;
+
+            extcon {
+                compatible = "maxim,max77843-muic";
+
+                connector {
+                    compatible = "samsung,usb-connector-11pin",
+                                 "usb-b-connector";
+                    label = "micro-USB";
+                    type = "micro";
+
+                    ports {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+
+                        port@0 {
+                            /*
+                             * TODO: The DTS this is based on does not have
+                             * port@0 which is a required property. The ports
+                             * look incomplete and need fixing.
+                             * Add a disabled port just to satisfy dtschema.
+                             */
+                            reg = <0>;
+                            status = "disabled";
+                        };
+
+                        port@3 {
+                            reg = <3>;
+                            endpoint {
+                                remote-endpoint = <&mhl_to_musb_con>;
+                            };
+                        };
+                    };
+                };
+
+                ports {
+                    port {
+                        endpoint {
+                            remote-endpoint = <&usb_to_muic>;
+                        };
+                    };
+                };
+            };
+
+            regulators {
+                compatible = "maxim,max77843-regulator";
+
+                SAFEOUT1 {
+                    regulator-name = "SAFEOUT1";
+                    regulator-min-microvolt = <3300000>;
+                    regulator-max-microvolt = <4950000>;
+                };
+
+                SAFEOUT2 {
+                    regulator-name = "SAFEOUT2";
+                    regulator-min-microvolt = <3300000>;
+                    regulator-max-microvolt = <4950000>;
+                };
+
+                CHARGER {
+                    regulator-name = "CHARGER";
+                    regulator-min-microamp = <100000>;
+                    regulator-max-microamp = <3150000>;
+                };
+            };
+
+            motor-driver {
+                compatible = "maxim,max77843-haptic";
+                haptic-supply = <&ldo38_reg>;
+                pwms = <&pwm 0 33670 0>;
+            };
+        };
+    };
-- 
GitLab


From 04732ff2a1f0f4c37164a788af2a69a01d6c6191 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 11 Jan 2022 18:48:05 +0100
Subject: [PATCH 1001/1586] MAINTAINERS: mfd: Cover MAX77843 by Maxim PMIC/MUIC
 for Exynos boards entry

The MAX77843 is used in Exynos5433-based TM2 boards and shares some
parts of code with MAX77693 (regulator and haptic motor drivers).
Include all MAX77843 drivers in the entry for Maxim PMIC/MUIC drivers
for Exynos boards, so they will receive some dedicated review coverage.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220111174805.223732-6-krzysztof.kozlowski@canonical.com
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..ecb875856ca18 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11720,9 +11720,11 @@ M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/*/maxim,max77686.yaml
+F:	Documentation/devicetree/bindings/*/maxim,max77843.yaml
 F:	Documentation/devicetree/bindings/clock/maxim,max77686.txt
 F:	Documentation/devicetree/bindings/mfd/max14577.txt
 F:	Documentation/devicetree/bindings/mfd/max77693.txt
+F:	drivers/*/*max77843.c
 F:	drivers/*/max14577*.c
 F:	drivers/*/max77686*.c
 F:	drivers/*/max77693*.c
-- 
GitLab


From 0ff4827ed66f3cdb9d4f5cfad4a3c211215ff4f0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 24 Jan 2022 09:23:44 +0100
Subject: [PATCH 1002/1586] spi: dt-bindings: samsung: Convert to dtschema

Convert the Samsung SoC (S3C24xx, S3C64xx, S5Pv210, Exynos) SPI
controller bindings to DT schema format.

The conversion also drops requirement from providing controller-data and
its data for each of SPI peripheral device nodes.  The dtschema cannot
express this and the requirement is being relaxed in the driver now.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Acked-by: Pratyush Yadav <p.yadav@ti.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220124082347.32747-2-krzysztof.kozlowski@canonical.com
---
 .../bindings/soc/samsung/exynos-usi.yaml      |   2 +-
 .../spi/samsung,spi-peripheral-props.yaml     |  33 ++++
 .../devicetree/bindings/spi/samsung,spi.yaml  | 187 ++++++++++++++++++
 .../bindings/spi/spi-peripheral-props.yaml    |   1 +
 .../devicetree/bindings/spi/spi-samsung.txt   | 122 ------------
 MAINTAINERS                                   |   2 +-
 6 files changed, 223 insertions(+), 124 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
 create mode 100644 Documentation/devicetree/bindings/spi/samsung,spi.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-samsung.txt

diff --git a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
index 273f2d95a0437..e72b6a3fae999 100644
--- a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
+++ b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
@@ -22,7 +22,7 @@ description: |
 
   [1] Documentation/devicetree/bindings/serial/samsung_uart.yaml
   [2] Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
-  [3] Documentation/devicetree/bindings/spi/spi-samsung.txt
+  [3] Documentation/devicetree/bindings/spi/samsung,spi.yaml
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
new file mode 100644
index 0000000000000..f0db3fb3d6880
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/samsung,spi-peripheral-props.yaml
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/samsung,spi-peripheral-props.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Peripheral-specific properties for Samsung S3C/S5P/Exynos SoC SPI controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description:
+  See spi-peripheral-props.yaml for more info.
+
+properties:
+  controller-data:
+    type: object
+    additionalProperties: false
+
+    properties:
+      samsung,spi-feedback-delay:
+        description: |
+          The sampling phase shift to be applied on the miso line (to account
+          for any lag in the miso line). Valid values:
+           - 0: No phase shift.
+           - 1: 90 degree phase shift sampling.
+           - 2: 180 degree phase shift sampling.
+           - 3: 270 degree phase shift sampling.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2, 3]
+        default: 0
+
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
new file mode 100644
index 0000000000000..61c77088e8ee6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/samsung,spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung S3C/S5P/Exynos SoC SPI controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description:
+  All the SPI controller nodes should be represented in the aliases node using
+  the following format 'spi{n}' where n is a unique number for the alias.
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - samsung,s3c2443-spi # for S3C2443, S3C2416 and S3C2450
+          - samsung,s3c6410-spi
+          - samsung,s5pv210-spi # for S5PV210 and S5PC110
+          - samsung,exynos5433-spi
+      - const: samsung,exynos7-spi
+        deprecated: true
+
+  clocks:
+    minItems: 2
+    maxItems: 3
+
+  clock-names:
+    minItems: 2
+    maxItems: 3
+
+  cs-gpios: true
+
+  dmas:
+    minItems: 2
+    maxItems: 2
+
+  dma-names:
+    items:
+      - const: tx
+      - const: rx
+
+  interrupts:
+    maxItems: 1
+
+  no-cs-readback:
+    description:
+      The CS line is disconnected, therefore the device should not operate
+      based on CS signalling.
+    type: boolean
+
+  num-cs:
+    minimum: 1
+    maximum: 4
+    default: 1
+
+  samsung,spi-src-clk:
+    description:
+      If the spi controller includes a internal clock mux to select the clock
+      source for the spi bus clock, this property can be used to indicate the
+      clock to be used for driving the spi bus clock. If not specified, the
+      clock number 0 is used as default.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 0
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - dmas
+  - dma-names
+  - interrupts
+  - reg
+
+allOf:
+  - $ref: spi-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,exynos5433-spi
+    then:
+      properties:
+        clocks:
+          minItems: 3
+          maxItems: 3
+        clock-names:
+          items:
+            - const: spi
+            - enum:
+                - spi_busclk0
+                - spi_busclk1
+                - spi_busclk2
+                - spi_busclk3
+            - const: spi_ioclk
+    else:
+      properties:
+        clocks:
+          minItems: 2
+          maxItems: 2
+        clock-names:
+          items:
+            - const: spi
+            - enum:
+                - spi_busclk0
+                - spi_busclk1
+                - spi_busclk2
+                - spi_busclk3
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos5433.h>
+    #include <dt-bindings/clock/samsung,s2mps11.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi@14d30000 {
+        compatible = "samsung,exynos5433-spi";
+        reg = <0x14d30000 0x100>;
+        interrupts = <GIC_SPI 433 IRQ_TYPE_LEVEL_HIGH>;
+        dmas = <&pdma0 11>, <&pdma0 10>;
+        dma-names = "tx", "rx";
+        #address-cells = <1>;
+        #size-cells = <0>;
+        clocks = <&cmu_peric CLK_PCLK_SPI1>,
+                 <&cmu_peric CLK_SCLK_SPI1>,
+                 <&cmu_peric CLK_SCLK_IOCLK_SPI1>;
+        clock-names = "spi",
+                      "spi_busclk0",
+                      "spi_ioclk";
+        samsung,spi-src-clk = <0>;
+        pinctrl-names = "default";
+        pinctrl-0 = <&spi1_bus>;
+        num-cs = <1>;
+
+        cs-gpios = <&gpd6 3 GPIO_ACTIVE_HIGH>;
+
+        audio-codec@0 {
+            compatible = "wlf,wm5110";
+            reg = <0x0>;
+            spi-max-frequency = <20000000>;
+            interrupt-parent = <&gpa0>;
+            interrupts = <4 IRQ_TYPE_NONE>;
+            clocks = <&pmu_system_controller 0>,
+                     <&s2mps13_osc S2MPS11_CLK_BT>;
+            clock-names = "mclk1", "mclk2";
+
+            gpio-controller;
+            #gpio-cells = <2>;
+            interrupt-controller;
+            #interrupt-cells = <2>;
+
+            wlf,micd-detect-debounce = <300>;
+            wlf,micd-bias-start-time = <0x1>;
+            wlf,micd-rate = <0x7>;
+            wlf,micd-dbtime = <0x2>;
+            wlf,micd-force-micbias;
+            wlf,micd-configs = <0x0 1 0>;
+            wlf,hpdet-channel = <1>;
+            wlf,gpsw = <0x1>;
+            wlf,inmode = <2 0 2 0>;
+
+            wlf,reset = <&gpc0 7 GPIO_ACTIVE_HIGH>;
+            wlf,ldoena = <&gpf0 0 GPIO_ACTIVE_HIGH>;
+
+            /* core supplies */
+            AVDD-supply = <&ldo18_reg>;
+            DBVDD1-supply = <&ldo18_reg>;
+            CPVDD-supply = <&ldo18_reg>;
+            DBVDD2-supply = <&ldo18_reg>;
+            DBVDD3-supply = <&ldo18_reg>;
+            SPKVDDL-supply = <&ldo18_reg>;
+            SPKVDDR-supply = <&ldo18_reg>;
+
+            controller-data {
+                samsung,spi-feedback-delay = <0>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
index 5dd209206e880..df885eeb144f8 100644
--- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml
@@ -85,5 +85,6 @@ properties:
 # The controller specific properties go here.
 allOf:
   - $ref: cdns,qspi-nor-peripheral-props.yaml#
+  - $ref: samsung,spi-peripheral-props.yaml#
 
 additionalProperties: true
diff --git a/Documentation/devicetree/bindings/spi/spi-samsung.txt b/Documentation/devicetree/bindings/spi/spi-samsung.txt
deleted file mode 100644
index 49028a4f5df18..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-samsung.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-* Samsung SPI Controller
-
-The Samsung SPI controller is used to interface with various devices such as flash
-and display controllers using the SPI communication interface.
-
-Required SoC Specific Properties:
-
-- compatible: should be one of the following.
-    - samsung,s3c2443-spi: for s3c2443, s3c2416 and s3c2450 platforms
-    - samsung,s3c6410-spi: for s3c6410 platforms
-    - samsung,s5pv210-spi: for s5pv210 and s5pc110 platforms
-    - samsung,exynos5433-spi: for exynos5433 compatible controllers
-    - samsung,exynos7-spi: for exynos7 platforms <DEPRECATED>
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- interrupts: The interrupt number to the cpu. The interrupt specifier format
-  depends on the interrupt controller.
-
-- dmas : Two or more DMA channel specifiers following the convention outlined
-  in bindings/dma/dma.txt
-
-- dma-names: Names for the dma channels. There must be at least one channel
-  named "tx" for transmit and named "rx" for receive.
-
-- clocks: specifies the clock IDs provided to the SPI controller; they are
-  required for interacting with the controller itself, for synchronizing the bus
-  and as I/O clock (the latter is required by exynos5433 and exynos7).
-
-- clock-names: string names of the clocks in the 'clocks' property; for all the
-  the devices the names must be "spi", "spi_busclkN" (where N is determined by
-  "samsung,spi-src-clk"), while Exynos5433 should specify a third clock
-  "spi_ioclk" for the I/O clock.
-
-Required Board Specific Properties:
-
-- #address-cells: should be 1.
-- #size-cells: should be 0.
-
-Optional Board Specific Properties:
-
-- samsung,spi-src-clk: If the spi controller includes a internal clock mux to
-  select the clock source for the spi bus clock, this property can be used to
-  indicate the clock to be used for driving the spi bus clock. If not specified,
-  the clock number 0 is used as default.
-
-- num-cs: Specifies the number of chip select lines supported. If
-  not specified, the default number of chip select lines is set to 1.
-
-- cs-gpios: should specify GPIOs used for chipselects (see spi-bus.txt)
-
-- no-cs-readback: the CS line is disconnected, therefore the device should not
-  operate based on CS signalling.
-
-SPI Controller specific data in SPI slave nodes:
-
-- The spi slave nodes should provide the following information which is required
-  by the spi controller.
-
-  - samsung,spi-feedback-delay: The sampling phase shift to be applied on the
-    miso line (to account for any lag in the miso line). The following are the
-    valid values.
-
-      - 0: No phase shift.
-      - 1: 90 degree phase shift sampling.
-      - 2: 180 degree phase shift sampling.
-      - 3: 270 degree phase shift sampling.
-
-Aliases:
-
-- All the SPI controller nodes should be represented in the aliases node using
-  the following format 'spi{n}' where n is a unique number for the alias.
-
-
-Example:
-
-- SoC Specific Portion:
-
-	spi_0: spi@12d20000 {
-		compatible = "samsung,exynos4210-spi";
-		reg = <0x12d20000 0x100>;
-		interrupts = <0 66 0>;
-		dmas = <&pdma0 5
-			&pdma0 4>;
-		dma-names = "tx", "rx";
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
-
-- Board Specific Portion:
-
-	spi_0: spi@12d20000 {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&spi0_bus>;
-		cs-gpios = <&gpa2 5 0>;
-
-		w25q80bw@0 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "w25x80";
-			reg = <0>;
-			spi-max-frequency = <10000>;
-
-			controller-data {
-				samsung,spi-feedback-delay = <0>;
-			};
-
-			partition@0 {
-				label = "U-Boot";
-				reg = <0x0 0x40000>;
-				read-only;
-			};
-
-			partition@40000 {
-				label = "Kernel";
-				reg = <0x40000 0xc0000>;
-			};
-		};
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index ecb875856ca18..23202ad1f5f76 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17091,7 +17091,7 @@ M:	Andi Shyti <andi@etezian.org>
 L:	linux-spi@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/spi/spi-samsung.txt
+F:	Documentation/devicetree/bindings/spi/samsung,spi*.yaml
 F:	drivers/spi/spi-s3c*
 F:	include/linux/platform_data/spi-s3c64xx.h
 F:	include/linux/spi/s3c24xx-fiq.h
-- 
GitLab


From f412fe11c1a9d815565f3918c56f3fd02167c734 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 24 Jan 2022 09:23:45 +0100
Subject: [PATCH 1003/1586] mfd: dt-bindings: google,cros-ec: Reference Samsung
 SPI bindings

The ChromeOS Embedded Controller appears on boards with Samsung Exynos
SoC, where Exynos SPI bindings expect controller-data node.  Reference
SPI peripheral bindings which include now Samsung SPI peripheral parts.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220124082347.32747-3-krzysztof.kozlowski@canonical.com
---
 .../bindings/mfd/google,cros-ec.yaml          | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index d1f53bd449f7b..525ab18005b39 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -31,7 +31,7 @@ properties:
 
   controller-data:
     description:
-      SPI controller data, see bindings/spi/spi-samsung.txt
+      SPI controller data, see bindings/spi/samsung,spi-peripheral-props.yaml
     type: object
 
   google,cros-ec-spi-pre-delay:
@@ -148,18 +148,21 @@ patternProperties:
 required:
   - compatible
 
-if:
-  properties:
-    compatible:
-      contains:
-        enum:
-          - google,cros-ec-i2c
-          - google,cros-ec-rpmsg
-then:
-  properties:
-    google,cros-ec-spi-pre-delay: false
-    google,cros-ec-spi-msg-delay: false
-    spi-max-frequency: false
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - google,cros-ec-i2c
+              - google,cros-ec-rpmsg
+    then:
+      properties:
+        google,cros-ec-spi-pre-delay: false
+        google,cros-ec-spi-msg-delay: false
+        spi-max-frequency: false
+    else:
+      $ref: /schemas/spi/spi-peripheral-props.yaml
 
 additionalProperties: false
 
-- 
GitLab


From 96f06006a3505287b37ba19d4e50e504ac5b6a50 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 24 Jan 2022 09:23:46 +0100
Subject: [PATCH 1004/1586] mfd: dt-bindings: google,cros-ec: Fix indentation
 in example

Correct level of indentation in the example.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220124082347.32747-4-krzysztof.kozlowski@canonical.com
---
 Documentation/devicetree/bindings/mfd/google,cros-ec.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index 525ab18005b39..4caadf73fc4aa 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -203,7 +203,7 @@ examples:
             spi-max-frequency = <5000000>;
 
             proximity {
-                    compatible = "google,cros-ec-mkbp-proximity";
+                compatible = "google,cros-ec-mkbp-proximity";
             };
 
             cbas {
-- 
GitLab


From 7db7a24657c969291921d6580d62748b1320dd2f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 24 Jan 2022 09:23:47 +0100
Subject: [PATCH 1005/1586] spi: s3c64xx: Allow controller-data to be optional

The Samsung SoC SPI driver requires to provide controller-data node
for each of SPI peripheral device nodes.  Make this controller-data node
optional, so DTS could be simpler.

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Reviewed-by: Andi Shyti <andi@etezian.org>
Reviwed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220124082347.32747-5-krzysztof.kozlowski@canonical.com
---
 drivers/spi/spi-s3c64xx.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 8755cd85e83ce..386550fca81cc 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -796,16 +796,14 @@ static struct s3c64xx_spi_csinfo *s3c64xx_get_slave_ctrldata(
 		return ERR_PTR(-EINVAL);
 	}
 
-	data_np = of_get_child_by_name(slave_np, "controller-data");
-	if (!data_np) {
-		dev_err(&spi->dev, "child node 'controller-data' not found\n");
-		return ERR_PTR(-EINVAL);
-	}
-
 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
-	if (!cs) {
-		of_node_put(data_np);
+	if (!cs)
 		return ERR_PTR(-ENOMEM);
+
+	data_np = of_get_child_by_name(slave_np, "controller-data");
+	if (!data_np) {
+		dev_info(&spi->dev, "feedback delay set to default (0)\n");
+		return cs;
 	}
 
 	of_property_read_u32(data_np, "samsung,spi-feedback-delay", &fb_delay);
-- 
GitLab


From ce44e3aeec43a887a7999f4e0e50df1e4557af78 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 15 Feb 2022 08:47:57 +0100
Subject: [PATCH 1006/1586] dt-bindings: power: supply: maxim,max14577: Convert
 to dtschema

Convert the Charger bindings of Maxim MAX14577/MAX77836 MUIC to DT
schema format.  The existing bindings were defined in
../bindings/mfd/max14577.txt.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220215074759.29402-2-krzysztof.kozlowski@canonical.com
---
 .../bindings/power/supply/maxim,max14577.yaml | 84 +++++++++++++++++++
 MAINTAINERS                                   |  1 +
 2 files changed, 85 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml

diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
new file mode 100644
index 0000000000000..3978b48299de1
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/supply/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC Charger
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+  Integrated Circuit (MUIC).
+
+  See also Documentation/devicetree/bindings/mfd/maxim,max14577.yaml for
+  additional information and example.
+
+properties:
+  compatible:
+    enum:
+      - maxim,max14577-charger
+      - maxim,max77836-charger
+
+  maxim,constant-uvolt:
+    description:
+      Battery Constant Voltage in uV
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 4000000
+    maximum: 4350000
+
+  maxim,eoc-uamp:
+    description: |
+      Current in uA for End-Of-Charge mode.
+      MAX14577: 50000-20000
+      MAX77836: 5000-100000
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  maxim,fast-charge-uamp:
+    description: |
+      Current in uA for Fast Charge
+      MAX14577: 90000-950000
+      MAX77836: 45000-475000
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  maxim,ovp-uvolt:
+    description:
+      OverVoltage Protection Threshold in uV; In an overvoltage condition, INT
+      asserts and charging stops.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [6000000, 6500000, 7000000, 7500000]
+
+required:
+  - compatible
+  - maxim,constant-uvolt
+  - maxim,eoc-uamp
+  - maxim,fast-charge-uamp
+  - maxim,ovp-uvolt
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: maxim,max14577-charger
+    then:
+      properties:
+        maxim,eoc-uamp:
+          minimum: 50000
+          maximum: 200000
+        maxim,fast-charge-uamp:
+          minimum: 90000
+          maximum: 950000
+    else:
+      # max77836
+      properties:
+        maxim,eoc-uamp:
+          minimum: 5000
+          maximum: 100000
+        maxim,fast-charge-uamp:
+          minimum: 45000
+          maximum: 475000
+
+additionalProperties: false
diff --git a/MAINTAINERS b/MAINTAINERS
index 23202ad1f5f76..636bd6da988c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11710,6 +11710,7 @@ M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-pm@vger.kernel.org
 S:	Supported
+F:	Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
 F:	drivers/power/supply/max14577_charger.c
 F:	drivers/power/supply/max77693_charger.c
 
-- 
GitLab


From e1cd4265e547e02efc2a2547e8f390a7a9041787 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 15 Feb 2022 08:47:58 +0100
Subject: [PATCH 1007/1586] regulator: dt-bindings: maxim,max14577: Convert to
 dtschema

Convert the regulator bindings of Maxim MAX14577/MAX77835 MUIC to DT
schema format.  The existing bindings were defined in
../bindings/mfd/max14577.txt.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220215074759.29402-3-krzysztof.kozlowski@canonical.com
---
 .../bindings/regulator/maxim,max14577.yaml    | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max14577.yaml

diff --git a/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml
new file mode 100644
index 0000000000000..16f01886a6014
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max14577.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC regulators
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+  Integrated Circuit (MUIC).
+
+  See also Documentation/devicetree/bindings/mfd/maxim,max14577.yaml for
+  additional information and example.
+
+properties:
+  compatible:
+    enum:
+      - maxim,max14577-regulator
+      - maxim,max77836-regulator
+
+  CHARGER:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description: |
+      Current regulator.
+
+    properties:
+      regulator-min-microvolt: false
+      regulator-max-microvolt: false
+
+  SAFEOUT:
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description: |
+      Safeout LDO regulator (fixed voltage).
+
+    properties:
+      regulator-min-microamp: false
+      regulator-max-microamp: false
+      regulator-min-microvolt:
+        const: 4900000
+      regulator-max-microvolt:
+        const: 4900000
+
+patternProperties:
+  "^LDO[12]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description: |
+      Current regulator.
+
+    properties:
+      regulator-min-microamp: false
+      regulator-max-microamp: false
+      regulator-min-microvolt:
+        minimum: 800000
+      regulator-max-microvolt:
+        maximum: 3950000
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: maxim,max14577-regulator
+    then:
+      properties:
+        LDO1: false
+        LDO2: false
+
+additionalProperties: false
-- 
GitLab


From ed6e9aa8f52f88975b4692eb00bdeaf35cd3d529 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 15 Feb 2022 08:47:59 +0100
Subject: [PATCH 1008/1586] dt-bindings: mfd: maxim,max14577: Convert to
 dtschema

Convert the MFD part of Maxim MAX14577/MAX77836 MUIC to DT schema
format.  The example DTS was copied from existing DTS
(exynos3250-rinato.dts), so keep the license as GPL-2.0-only.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220215074759.29402-4-krzysztof.kozlowski@canonical.com
---
 .../devicetree/bindings/mfd/max14577.txt      | 147 -------------
 .../bindings/mfd/maxim,max14577.yaml          | 195 ++++++++++++++++++
 MAINTAINERS                                   |   2 +-
 3 files changed, 196 insertions(+), 148 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/mfd/max14577.txt
 create mode 100644 Documentation/devicetree/bindings/mfd/maxim,max14577.yaml

diff --git a/Documentation/devicetree/bindings/mfd/max14577.txt b/Documentation/devicetree/bindings/mfd/max14577.txt
deleted file mode 100644
index be11943a0560f..0000000000000
--- a/Documentation/devicetree/bindings/mfd/max14577.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-Maxim MAX14577/77836 Multi-Function Device
-
-MAX14577 is a Multi-Function Device with Micro-USB Interface Circuit, Li+
-Battery Charger and SFOUT LDO output for powering USB devices. It is
-interfaced to host controller using I2C.
-
-MAX77836 additionally contains PMIC (with two LDO regulators) and Fuel Gauge.
-For the description of Fuel Gauge low SOC alert interrupt see:
-../power/supply/max17040_battery.txt
-
-
-Required properties:
-- compatible : Must be "maxim,max14577" or "maxim,max77836".
-- reg : I2C slave address for the max14577 chip (0x25 for max14577/max77836)
-- interrupts : IRQ line for the chip.
-
-
-Required nodes:
- - charger :
-	Node for configuring the charger driver.
-	Required properties:
-		- compatible : "maxim,max14577-charger"
-			or "maxim,max77836-charger"
-		- maxim,fast-charge-uamp : Current in uA for Fast Charge;
-			Valid values:
-			- for max14577: 90000 - 950000;
-			- for max77836: 45000 - 475000;
-		- maxim,eoc-uamp : Current in uA for End-Of-Charge mode;
-			Valid values:
-			- for max14577: 50000 - 200000;
-			- for max77836: 5000 - 100000;
-		- maxim,ovp-uvolt : OverVoltage Protection Threshold in uV;
-			In an overvoltage condition, INT asserts and charging
-			stops. Valid values:
-			- 6000000, 6500000, 7000000, 7500000;
-		- maxim,constant-uvolt : Battery Constant Voltage in uV;
-			Valid values:
-			- 4000000 - 4280000 (step by 20000);
-			- 4350000;
-
-
-Optional nodes:
-- max14577-muic/max77836-muic :
-	Node used only by extcon consumers.
-	Required properties:
-		- compatible : "maxim,max14577-muic" or "maxim,max77836-muic"
-
-- regulators :
-	Required properties:
-		- compatible : "maxim,max14577-regulator"
-			or "maxim,max77836-regulator"
-
-	May contain a sub-node per regulator from the list below. Each
-	sub-node should contain the constraints and initialization information
-	for that regulator. See regulator.txt for a description of standard
-	properties for these sub-nodes.
-
-	List of valid regulator names:
-	- for max14577: CHARGER, SAFEOUT.
-	- for max77836: CHARGER, SAFEOUT, LDO1, LDO2.
-
-	The SAFEOUT is a fixed voltage regulator so there is no need to specify
-	voltages for it.
-
-
-Example:
-
-#include <dt-bindings/interrupt-controller/irq.h>
-
-max14577@25 {
-	compatible = "maxim,max14577";
-	reg = <0x25>;
-	interrupt-parent = <&gpx1>;
-	interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
-
-	muic: max14577-muic {
-		compatible = "maxim,max14577-muic";
-	};
-
-	regulators {
-		compatible = "maxim,max14577-regulator";
-
-		SAFEOUT {
-			regulator-name = "SAFEOUT";
-		};
-		CHARGER {
-			regulator-name = "CHARGER";
-			regulator-min-microamp = <90000>;
-			regulator-max-microamp = <950000>;
-			regulator-boot-on;
-		};
-	};
-
-	charger {
-		compatible = "maxim,max14577-charger";
-
-		maxim,constant-uvolt = <4350000>;
-		maxim,fast-charge-uamp = <450000>;
-		maxim,eoc-uamp = <50000>;
-		maxim,ovp-uvolt = <6500000>;
-	};
-};
-
-
-max77836@25 {
-	compatible = "maxim,max77836";
-	reg = <0x25>;
-	interrupt-parent = <&gpx1>;
-	interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
-
-	muic: max77836-muic {
-		compatible = "maxim,max77836-muic";
-	};
-
-	regulators {
-		compatible = "maxim,max77836-regulator";
-
-		SAFEOUT {
-			regulator-name = "SAFEOUT";
-		};
-		CHARGER {
-			regulator-name = "CHARGER";
-			regulator-min-microamp = <90000>;
-			regulator-max-microamp = <950000>;
-			regulator-boot-on;
-		};
-		LDO1 {
-			regulator-name = "LDO1";
-			regulator-min-microvolt = <2700000>;
-			regulator-max-microvolt = <2700000>;
-		};
-		LDO2 {
-			regulator-name = "LDO2";
-			regulator-min-microvolt = <800000>;
-			regulator-max-microvolt = <3950000>;
-		};
-	};
-
-	charger {
-		compatible = "maxim,max77836-charger";
-
-		maxim,constant-uvolt = <4350000>;
-		maxim,fast-charge-uamp = <225000>;
-		maxim,eoc-uamp = <7500>;
-		maxim,ovp-uvolt = <6500000>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml
new file mode 100644
index 0000000000000..27870b8760a6d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max14577.yaml
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max14577.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX14577/MAX77836 MicroUSB and Companion Power Management IC
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX14577/MAX77836 MicroUSB
+  Integrated Circuit (MUIC).
+
+  The Maxim MAX14577 is a MicroUSB and Companion Power Management IC which
+  includes voltage safeout regulators, charger and MicroUSB management IC.
+
+  The Maxim MAX77836 is a MicroUSB and Companion Power Management IC which
+  includes voltage safeout and LDO regulators, charger, fuel-gauge and MicroUSB
+  management IC.
+
+properties:
+  compatible:
+    enum:
+      - maxim,max14577
+      - maxim,max77836
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+  wakeup-source: true
+
+  charger:
+    $ref: /schemas/power/supply/maxim,max14577.yaml
+
+  extcon:
+    type: object
+    properties:
+      compatible:
+        enum:
+          - maxim,max14577-muic
+          - maxim,max77836-muic
+
+    required:
+      - compatible
+
+  regulators:
+    $ref: /schemas/regulator/maxim,max14577.yaml
+
+required:
+  - compatible
+  - interrupts
+  - reg
+  - charger
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: maxim,max14577
+    then:
+      properties:
+        charger:
+          properties:
+            compatible:
+              const: maxim,max14577-charger
+        extcon:
+          properties:
+            compatible:
+              const: maxim,max14577-muic
+        regulator:
+          properties:
+            compatible:
+              const: maxim,max14577-regulator
+    else:
+      properties:
+        charger:
+          properties:
+            compatible:
+              const: maxim,max77836-charger
+        extcon:
+          properties:
+            compatible:
+              const: maxim,max77836-muic
+        regulator:
+          properties:
+            compatible:
+              const: maxim,max77836-regulator
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@25 {
+            compatible = "maxim,max14577";
+            reg = <0x25>;
+            interrupt-parent = <&gpx1>;
+            interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
+
+            extcon {
+                compatible = "maxim,max14577-muic";
+            };
+
+            regulators {
+                compatible = "maxim,max14577-regulator";
+
+                SAFEOUT {
+                    regulator-name = "SAFEOUT";
+                };
+
+                CHARGER {
+                    regulator-name = "CHARGER";
+                    regulator-min-microamp = <90000>;
+                    regulator-max-microamp = <950000>;
+                    regulator-boot-on;
+                };
+            };
+
+            charger {
+                compatible = "maxim,max14577-charger";
+
+                maxim,constant-uvolt = <4350000>;
+                maxim,fast-charge-uamp = <450000>;
+                maxim,eoc-uamp = <50000>;
+                maxim,ovp-uvolt = <6500000>;
+            };
+        };
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@25 {
+            compatible = "maxim,max77836";
+            interrupt-parent = <&gpx1>;
+            interrupts = <5 IRQ_TYPE_NONE>;
+            reg = <0x25>;
+            wakeup-source;
+
+            extcon {
+                compatible = "maxim,max77836-muic";
+            };
+
+            regulators {
+                compatible = "maxim,max77836-regulator";
+
+                SAFEOUT {
+                    regulator-name = "SAFEOUT";
+                };
+
+                CHARGER {
+                    regulator-name = "CHARGER";
+                    regulator-min-microamp = <45000>;
+                    regulator-max-microamp = <475000>;
+                    regulator-boot-on;
+                };
+
+                LDO1 {
+                    regulator-name = "MOT_2.7V";
+                    regulator-min-microvolt = <1100000>;
+                    regulator-max-microvolt = <2700000>;
+                };
+
+                LDO2 {
+                    regulator-name = "UNUSED_LDO2";
+                    regulator-min-microvolt = <800000>;
+                    regulator-max-microvolt = <3950000>;
+                };
+            };
+
+            charger {
+                compatible = "maxim,max77836-charger";
+
+                maxim,constant-uvolt = <4350000>;
+                maxim,fast-charge-uamp = <225000>;
+                maxim,eoc-uamp = <7500>;
+                maxim,ovp-uvolt = <6500000>;
+            };
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 636bd6da988c9..31d1c0926697a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11720,10 +11720,10 @@ M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
+F:	Documentation/devicetree/bindings/*/maxim,max14577.yaml
 F:	Documentation/devicetree/bindings/*/maxim,max77686.yaml
 F:	Documentation/devicetree/bindings/*/maxim,max77843.yaml
 F:	Documentation/devicetree/bindings/clock/maxim,max77686.txt
-F:	Documentation/devicetree/bindings/mfd/max14577.txt
 F:	Documentation/devicetree/bindings/mfd/max77693.txt
 F:	drivers/*/*max77843.c
 F:	drivers/*/max14577*.c
-- 
GitLab


From 6b9d48b418fbc91ae8dbcba2cfe078c4736a77f2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 15 Feb 2022 08:53:43 +0100
Subject: [PATCH 1009/1586] regulator: dt-bindings: maxim,max77802: Convert to
 dtschema

Convert the regulators of Maxim MAX77802 PMIC to DT schema format.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220215075344.31421-2-krzysztof.kozlowski@canonical.com
---
 .../bindings/regulator/max77802.txt           | 111 ------------------
 .../bindings/regulator/maxim,max77802.yaml    |  85 ++++++++++++++
 2 files changed, 85 insertions(+), 111 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/regulator/max77802.txt
 create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max77802.yaml

diff --git a/Documentation/devicetree/bindings/regulator/max77802.txt b/Documentation/devicetree/bindings/regulator/max77802.txt
deleted file mode 100644
index b82943d836770..0000000000000
--- a/Documentation/devicetree/bindings/regulator/max77802.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-Binding for Maxim MAX77802 regulators
-
-This is a part of device tree bindings of MAX77802 multi-function device.
-More information can be found in bindings/mfd/max77802.txt file.
-
-The MAX77802 PMIC has 10 high-efficiency Buck and 32 Low-dropout (LDO)
-regulators that can be controlled over I2C.
-
-Following properties should be present in main device node of the MFD chip.
-
-Optional properties:
-- inb1-supply:  The input supply for BUCK1
-- inb2-supply:  The input supply for BUCK2
-- inb3-supply:  The input supply for BUCK3
-- inb4-supply:  The input supply for BUCK4
-- inb5-supply:  The input supply for BUCK5
-- inb6-supply:  The input supply for BUCK6
-- inb7-supply:  The input supply for BUCK7
-- inb8-supply:  The input supply for BUCK8
-- inb9-supply:  The input supply for BUCK9
-- inb10-supply: The input supply for BUCK10
-- inl1-supply:  The input supply for LDO8 and LDO15
-- inl2-supply:  The input supply for LDO17, LDO27, LDO30 and LDO35
-- inl3-supply:  The input supply for LDO3, LDO5, LDO6 and LDO7
-- inl4-supply:  The input supply for LDO10, LDO11, LDO13 and LDO14
-- inl5-supply:  The input supply for LDO9 and LDO19
-- inl6-supply:  The input supply for LDO4, LDO21, LDO24 and LDO33
-- inl7-supply:  The input supply for LDO18, LDO20, LDO28 and LDO29
-- inl9-supply:  The input supply for LDO12, LDO23, LDO25, LDO26, LDO32 and LDO34
-- inl10-supply: The input supply for LDO1 and LDO2
-
-Optional nodes:
-- regulators : The regulators of max77802 have to be instantiated
-  under subnode named "regulators" using the following format.
-
-	regulator-name {
-		standard regulator constraints....
-	};
-	refer Documentation/devicetree/bindings/regulator/regulator.txt
-
-The regulator node name should be initialized with a string to get matched
-with their hardware counterparts as follow. The valid names are:
-
-	-LDOn 	:	for LDOs, where n can lie in ranges 1-15, 17-21, 23-30
-			and 32-35.
-			example: LDO1, LDO2, LDO35.
-	-BUCKn 	:	for BUCKs, where n can lie in range 1 to 10.
-			example: BUCK1, BUCK5, BUCK10.
-
-The max77802 regulator supports two different operating modes: Normal and Low
-Power Mode. Some regulators support the modes to be changed at startup or by
-the consumers during normal operation while others only support to change the
-mode during system suspend. The standard regulator suspend states binding can
-be used to configure the regulator operating mode.
-
-The regulators that support the standard "regulator-initial-mode" property,
-changing their mode during normal operation are: LDOs 1, 3, 20 and 21.
-
-The possible values for "regulator-initial-mode" and "regulator-mode" are:
-	1: Normal regulator voltage output mode.
-	3: Low Power which reduces the quiescent current down to only 1uA
-
-The valid modes list is defined in the dt-bindings/regulator/maxim,max77802.h
-header and can be included by device tree source files.
-
-The standard "regulator-mode" property can only be used for regulators that
-support changing their mode to Low Power Mode during suspend. These regulators
-are: BUCKs 2-4 and LDOs 1-35. Also, it only takes effect if the regulator has
-been enabled for the given suspend state using "regulator-on-in-suspend" and
-has not been disabled for that state using "regulator-off-in-suspend".
-
-Example:
-
-	max77802@9 {
-		compatible = "maxim,max77802";
-		interrupt-parent = <&wakeup_eint>;
-		interrupts = <26 0>;
-		reg = <0x09>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		inb1-supply = <&parent_reg>;
-
-		regulators {
-			ldo1_reg: LDO1 {
-				regulator-name = "vdd_1v0";
-				regulator-min-microvolt = <1000000>;
-				regulator-max-microvolt = <1000000>;
-				regulator-always-on;
-				regulator-initial-mode = <MAX77802_OPMODE_LP>;
-			};
-
-			ldo11_reg: LDO11 {
-				regulator-name = "vdd_ldo11";
-				regulator-min-microvolt = <1900000>;
-				regulator-max-microvolt = <1900000>;
-				regulator-always-on;
-				regulator-state-mem {
-					regulator-on-in-suspend;
-					regulator-mode = <MAX77802_OPMODE_LP>;
-				};
-			};
-
-			buck1_reg: BUCK1 {
-				regulator-name = "vdd_mif";
-				regulator-min-microvolt = <950000>;
-				regulator-max-microvolt = <1300000>;
-				regulator-always-on;
-				regulator-boot-on;
-			};
-	};
diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml
new file mode 100644
index 0000000000000..f2b4dd15a0f36
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/maxim,max77802.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/maxim,max77802.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77802 Power Management IC regulators
+
+maintainers:
+  - Javier Martinez Canillas <javier@dowhile0.org>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX77802 Power Management
+  Integrated Circuit (PMIC).
+
+  The Maxim MAX77686 provides 10 high-efficiency Buck and 32 Low-DropOut (LDO)
+  regulators.
+
+  See also Documentation/devicetree/bindings/mfd/maxim,max77802.yaml for
+  additional information and example.
+
+  Certain regulators support "regulator-initial-mode" and "regulator-mode".
+  The valid modes list is defined in the dt-bindings/regulator/maxim,max77802.h
+  and their meaning is::
+    1 - Normal regulator voltage output mode.
+    3 - Low Power which reduces the quiescent current down to only 1uA
+
+  The standard "regulator-mode" property can only be used for regulators that
+  support changing their mode to Low Power Mode during suspend. These
+  regulators are:: bucks 2-4 and LDOs 1-35. Also, it only takes effect if the
+  regulator has been enabled for the given suspend state using
+  "regulator-on-in-suspend" and has not been disabled for that state using
+  "regulator-off-in-suspend".
+
+patternProperties:
+  # LDO1, LDO3, LDO20, LDO21
+  "^LDO([13]|2[01])$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      LDOs supporting the regulator-initial-mode property and changing their
+      mode during normal operation.
+
+  # LDO2, LDO4-15, LDO17-19, LDO23-30, LDO32-35
+  "^LDO([24-9]|1[0-5789]|2[3-9]|3[02345])$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      LDOs supporting the regulator-mode property (changing mode to Low Power
+      Mode during suspend).
+
+    properties:
+      regulator-initial-mode: false
+
+  # buck2-4
+  "^BUCK[2-4]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      bucks supporting the regulator-mode property (changing mode to Low Power
+      Mode during suspend).
+
+    properties:
+      regulator-initial-mode: false
+
+  # buck1, buck5-10
+  "^BUCK([15-9]|10)$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+
+    properties:
+      regulator-initial-mode: false
+
+    patternProperties:
+      regulator-state-(standby|mem|disk):
+        type: object
+        properties:
+          regulator-mode: false
+
+additionalProperties: false
-- 
GitLab


From 172e611b54e813c49a35b6b74bccaa99f27bf566 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 15 Feb 2022 08:53:44 +0100
Subject: [PATCH 1010/1586] dt-bindings: mfd: maxim,max77802: Convert to
 dtschema

Convert the MFD part of Maxim MAX77802 PMIC to DT schema format.  The
example DTS was copied from existing DTS (exynos5800-peach-pi.dts), so
keep the license as GPL-2.0-only.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20220215075344.31421-3-krzysztof.kozlowski@canonical.com
---
 .../devicetree/bindings/mfd/max77802.txt      |  25 ---
 .../bindings/mfd/maxim,max77802.yaml          | 194 ++++++++++++++++++
 MAINTAINERS                                   |   2 +-
 3 files changed, 195 insertions(+), 26 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/mfd/max77802.txt
 create mode 100644 Documentation/devicetree/bindings/mfd/maxim,max77802.yaml

diff --git a/Documentation/devicetree/bindings/mfd/max77802.txt b/Documentation/devicetree/bindings/mfd/max77802.txt
deleted file mode 100644
index 09decac20d919..0000000000000
--- a/Documentation/devicetree/bindings/mfd/max77802.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Maxim MAX77802 multi-function device
-
-The Maxim MAX77802 is a Power Management IC (PMIC) that contains 10 high
-efficiency Buck regulators, 32 Low-DropOut (LDO) regulators used to power
-up application processors and peripherals, a 2-channel 32kHz clock outputs,
-a Real-Time-Clock (RTC) and a I2C interface to program the individual
-regulators, clocks outputs and the RTC.
-
-Bindings for the built-in 32k clock generator block and
-regulators are defined in ../clk/maxim,max77802.txt and
-../regulator/max77802.txt respectively.
-
-Required properties:
-- compatible		: Must be "maxim,max77802"
-- reg			: Specifies the I2C slave address of PMIC block.
-- interrupts		: I2C device IRQ line connected to the main SoC.
-
-Example:
-
-	max77802: pmic@9 {
-		compatible = "maxim,max77802";
-		interrupt-parent = <&intc>;
-		interrupts = <26 IRQ_TYPE_NONE>;
-		reg = <0x09>;
-	};
diff --git a/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml
new file mode 100644
index 0000000000000..baa1346ac5d5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/maxim,max77802.yaml
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/maxim,max77802.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX77802 Power Management IC
+
+maintainers:
+  - Javier Martinez Canillas <javier@dowhile0.org>
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  This is a part of device tree bindings for Maxim MAX77802 Power Management
+  Integrated Circuit (PMIC).
+
+  The Maxim MAX77802 is a Power Management IC which includes voltage and
+  current regulators (10 high efficiency Buck regulators and 32 Low-DropOut
+  (LDO)), RTC and clock outputs.
+
+  The MAX77802 provides two 32.768khz clock outputs that can be controlled
+  (gated/ungated) over I2C.  The clock IDs are defined as preprocessor macros
+  in dt-bindings/clock/maxim,max77802.h.
+
+properties:
+  compatible:
+    const: maxim,max77802
+
+  '#clock-cells':
+    const: 1
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    $ref: /schemas/regulator/maxim,max77802.yaml
+    description:
+      List of child nodes that specify the regulators.
+
+  inb1-supply:
+    description: Power supply for buck1
+  inb2-supply:
+    description: Power supply for buck2
+  inb3-supply:
+    description: Power supply for buck3
+  inb4-supply:
+    description: Power supply for buck4
+  inb5-supply:
+    description: Power supply for buck5
+  inb6-supply:
+    description: Power supply for buck6
+  inb7-supply:
+    description: Power supply for buck7
+  inb8-supply:
+    description: Power supply for buck8
+  inb9-supply:
+    description: Power supply for buck9
+  inb10-supply:
+    description: Power supply for buck10
+
+  inl1-supply:
+    description: Power supply for LDO8, LDO15
+  inl2-supply:
+    description: Power supply for LDO17, LDO27, LDO30, LDO35
+  inl3-supply:
+    description: Power supply for LDO3, LDO5, LDO7, LDO7
+  inl4-supply:
+    description: Power supply for LDO10, LDO11, LDO13, LDO14
+  inl5-supply:
+    description: Power supply for LDO9, LDO19
+  inl6-supply:
+    description: Power supply for LDO4, LDO21, LDO24, LDO33
+  inl7-supply:
+    description: Power supply for LDO18, LDO20, LDO28, LDO29
+  inl9-supply:
+    description: Power supply for LDO12, LDO23, LDO25, LDO26, LDO32, LDO34
+  inl10-supply:
+    description: Power supply for LDO1, LDO2
+
+  wakeup-source: true
+
+required:
+  - compatible
+  - '#clock-cells'
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/regulator/maxim,max77802.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@9 {
+            compatible = "maxim,max77802";
+            interrupt-parent = <&gpx3>;
+            interrupts = <1 IRQ_TYPE_NONE>;
+            pinctrl-names = "default";
+            pinctrl-0 = <&max77802_irq>, <&pmic_selb>,
+                        <&pmic_dvs_1>, <&pmic_dvs_2>, <&pmic_dvs_3>;
+            wakeup-source;
+            reg = <0x9>;
+            #clock-cells = <1>;
+
+            inb1-supply = <&tps65090_dcdc2>;
+            inb2-supply = <&tps65090_dcdc1>;
+            inb3-supply = <&tps65090_dcdc2>;
+            inb4-supply = <&tps65090_dcdc2>;
+            inb5-supply = <&tps65090_dcdc1>;
+            inb6-supply = <&tps65090_dcdc2>;
+            inb7-supply = <&tps65090_dcdc1>;
+            inb8-supply = <&tps65090_dcdc1>;
+            inb9-supply = <&tps65090_dcdc1>;
+            inb10-supply = <&tps65090_dcdc1>;
+
+            inl1-supply = <&buck5_reg>;
+            inl2-supply = <&buck7_reg>;
+            inl3-supply = <&buck9_reg>;
+            inl4-supply = <&buck9_reg>;
+            inl5-supply = <&buck9_reg>;
+            inl6-supply = <&tps65090_dcdc2>;
+            inl7-supply = <&buck9_reg>;
+            inl9-supply = <&tps65090_dcdc2>;
+            inl10-supply = <&buck7_reg>;
+
+            regulators {
+                BUCK1 {
+                    regulator-name = "vdd_mif";
+                    regulator-min-microvolt = <800000>;
+                    regulator-max-microvolt = <1300000>;
+                    regulator-always-on;
+                    regulator-boot-on;
+                    regulator-ramp-delay = <12500>;
+                    regulator-state-mem {
+                        regulator-off-in-suspend;
+                    };
+                };
+
+                BUCK2 {
+                    regulator-name = "vdd_arm";
+                    regulator-min-microvolt = <800000>;
+                    regulator-max-microvolt = <1500000>;
+                    regulator-always-on;
+                    regulator-boot-on;
+                    regulator-ramp-delay = <12500>;
+                    regulator-coupled-with = <&buck3_reg>;
+                    regulator-coupled-max-spread = <300000>;
+                    regulator-state-mem {
+                        regulator-off-in-suspend;
+                    };
+                };
+
+                // ...
+
+                BUCK10 {
+                    regulator-name = "vdd_1v8";
+                    regulator-min-microvolt = <1800000>;
+                    regulator-max-microvolt = <1800000>;
+                    regulator-always-on;
+                    regulator-boot-on;
+                    regulator-state-mem {
+                        regulator-on-in-suspend;
+                    };
+                };
+
+                LDO1 {
+                    regulator-name = "vdd_1v0";
+                    regulator-min-microvolt = <1000000>;
+                    regulator-max-microvolt = <1000000>;
+                    regulator-always-on;
+                    regulator-initial-mode = <MAX77802_OPMODE_NORMAL>;
+                    regulator-state-mem {
+                        regulator-on-in-suspend;
+                        regulator-mode = <MAX77802_OPMODE_LP>;
+                    };
+                };
+
+                // ...
+
+                LDO35 {
+                    regulator-name = "ldo_35";
+                    regulator-min-microvolt = <1200000>;
+                    regulator-max-microvolt = <1200000>;
+                };
+            };
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 31d1c0926697a..823cdb3fe41b1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11695,7 +11695,7 @@ MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
 M:	Javier Martinez Canillas <javier@dowhile0.org>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
-F:	Documentation/devicetree/bindings/*/*max77802.txt
+F:	Documentation/devicetree/bindings/*/*max77802.yaml
 F:	drivers/regulator/max77802-regulator.c
 F:	include/dt-bindings/*/*max77802.h
 
-- 
GitLab


From bbc7ba0fa06ab4aee26969a9454ca32e4a8fcb1c Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 5 Mar 2022 08:24:38 -0800
Subject: [PATCH 1011/1586] regulator: cleanup comments

For spdx
Remove leading space, add space after //

Replacements
overriden to overridden
Calulate to Calculate
addional to additional
regulatior to regulator
devive to device

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20220305162438.689442-1-trix@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max8973-regulator.c | 2 +-
 drivers/regulator/sc2731-regulator.c  | 2 +-
 drivers/regulator/ti-abb-regulator.c  | 6 +++---
 drivers/regulator/wm8350-regulator.c  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 80b65cb87cefd..cb7e50003f702 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -459,7 +459,7 @@ static int max8973_thermal_read_temp(void *data, int *temp)
 		return ret;
 	}
 
-	/* +1 degC to trigger cool devive */
+	/* +1 degC to trigger cool device */
 	if (val & MAX77621_CHIPID_TJINT_S)
 		*temp = mchip->junction_temp_warning + 1000;
 	else
diff --git a/drivers/regulator/sc2731-regulator.c b/drivers/regulator/sc2731-regulator.c
index 0f21f95c8981c..71e5ceb679f4a 100644
--- a/drivers/regulator/sc2731-regulator.c
+++ b/drivers/regulator/sc2731-regulator.c
@@ -1,4 +1,4 @@
- //SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 Spreadtrum Communications Inc.
  */
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c
index 2931a0b89bffb..bd7b2f287250e 100644
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -42,7 +42,7 @@
 /**
  * struct ti_abb_info - ABB information per voltage setting
  * @opp_sel:	one of TI_ABB macro
- * @vset:	(optional) vset value that LDOVBB needs to be overriden with.
+ * @vset:	(optional) vset value that LDOVBB needs to be overridden with.
  *
  * Array of per voltage entries organized in the same order as regulator_desc's
  * volt_table list. (selector is used to index from this array)
@@ -484,7 +484,7 @@ static int ti_abb_init_timings(struct device *dev, struct ti_abb *abb)
 	/* Calculate cycle rate */
 	cycle_rate = DIV_ROUND_CLOSEST(clock_cycles * 10, clk_rate);
 
-	/* Calulate SR2_WTCNT_VALUE */
+	/* Calculate SR2_WTCNT_VALUE */
 	sr2_wt_cnt_val = DIV_ROUND_CLOSEST(abb->settling_time * 10, cycle_rate);
 
 	dev_dbg(dev, "%s: Clk_rate=%ld, sr2_cnt=0x%08x\n", __func__,
@@ -688,7 +688,7 @@ MODULE_DEVICE_TABLE(of, ti_abb_of_match);
  * @pdev: ABB platform device
  *
  * Initializes an individual ABB LDO for required Body-Bias. ABB is used to
- * addional bias supply to SoC modules for power savings or mandatory stability
+ * additional bias supply to SoC modules for power savings or mandatory stability
  * configuration at certain Operating Performance Points(OPPs).
  *
  * Return: 0 on success or appropriate error value when fails
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 6579bfdb0c262..b1d5aac8917de 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1112,7 +1112,7 @@ static int wm8350_regulator_probe(struct platform_device *pdev)
 	if (pdev->id < WM8350_DCDC_1 || pdev->id > WM8350_ISINK_B)
 		return -ENODEV;
 
-	/* do any regulatior specific init */
+	/* do any regulator specific init */
 	switch (pdev->id) {
 	case WM8350_DCDC_1:
 		val = wm8350_reg_read(wm8350, WM8350_DCDC1_LOW_POWER);
-- 
GitLab


From af524ae5ad13a9c28acf0b2ec4489d5903c4fbed Mon Sep 17 00:00:00 2001
From: Kuldeep Singh <singh.kuldeep87k@gmail.com>
Date: Thu, 3 Mar 2022 00:59:15 +0530
Subject: [PATCH 1012/1586] spi: Update NXP Flexspi maintainer details

Add Han Xu as flexspi maintainer.
Also, update my email address as previous one is not working anymore.

Signed-off-by: Kuldeep Singh <singh.kuldeep87k@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220302192915.6193-1-singh.kuldeep87k@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
index 283815d59e85f..1b552c298277b 100644
--- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NXP Flex Serial Peripheral Interface (FSPI)
 
 maintainers:
-  - Kuldeep Singh <kuldeep.singh@nxp.com>
+  - Han Xu <han.xu@nxp.com>
+  - Kuldeep Singh <singh.kuldeep87k@gmail.com>
 
 allOf:
   - $ref: "spi-controller.yaml#"
-- 
GitLab


From c59dbc642d4e76187516960780b6cd26e7f2c943 Mon Sep 17 00:00:00 2001
From: Yihao Han <hanyihao@vivo.com>
Date: Thu, 3 Mar 2022 04:50:54 -0800
Subject: [PATCH 1013/1586] spi: cadence: fix platform_get_irq.cocci warning

Remove dev_err() messages after platform_get_irq*() failures.
platform_get_irq() already prints an error.

Generated by: scripts/coccinelle/api/platform_get_irq.cocci

Signed-off-by: Yihao Han <hanyihao@vivo.com>
Link: https://lore.kernel.org/r/20220303125054.3574-1-hanyihao@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-xspi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 4bc1b93fc276b..3ab19be830951 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -578,10 +578,8 @@ static int cdns_xspi_probe(struct platform_device *pdev)
 	}
 
 	cdns_xspi->irq = platform_get_irq(pdev, 0);
-	if (cdns_xspi->irq < 0) {
-		dev_err(dev, "Failed to get IRQ\n");
+	if (cdns_xspi->irq < 0)
 		return -ENXIO;
-	}
 
 	ret = devm_request_irq(dev, cdns_xspi->irq, cdns_xspi_irq_handler,
 			       IRQF_SHARED, pdev->name, cdns_xspi);
-- 
GitLab


From fa0f3db49e10ac61774e1c90167ec79429d6fd56 Mon Sep 17 00:00:00 2001
From: Xingbang Liu <liu.airalert@gmail.com>
Date: Wed, 2 Mar 2022 15:15:21 +0800
Subject: [PATCH 1014/1586] spi: qup: replace spin_lock_irqsave by spin_lock in
 hard IRQ

The code has been in a irq-disabled context since it is hard IRQ. There
is no necessity to do it again.

Signed-off-by: Xingbang Liu <liu.airalert@gmail.com>
Link: https://lore.kernel.org/r/20220302071521.6638-1-liu.airalert@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-qup.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index d39dec6d1c91e..00d6084306b4a 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -593,7 +593,6 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 {
 	struct spi_qup *controller = dev_id;
 	u32 opflags, qup_err, spi_err;
-	unsigned long flags;
 	int error = 0;
 
 	qup_err = readl_relaxed(controller->base + QUP_ERROR_FLAGS);
@@ -625,10 +624,10 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 		error = -EIO;
 	}
 
-	spin_lock_irqsave(&controller->lock, flags);
+	spin_lock(&controller->lock);
 	if (!controller->error)
 		controller->error = error;
-	spin_unlock_irqrestore(&controller->lock, flags);
+	spin_unlock(&controller->lock);
 
 	if (spi_qup_is_dma_xfer(controller->mode)) {
 		writel_relaxed(opflags, controller->base + QUP_OPERATIONAL);
-- 
GitLab


From 320689a1b543ca1396b3ed43bb18045e4a7ffd79 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Mon, 7 Mar 2022 11:35:29 +0000
Subject: [PATCH 1015/1586] spi: Fix Tegra QSPI example

When running dt_binding_check on the nvidia,tegra210-quad.yaml binding
document the following error is reported ...

 nvidia,tegra210-quad.example.dt.yaml:0:0: /example-0/spi@70410000/flash@0:
 	failed to match any schema with compatible: ['spi-nor']

Update the example in the binding document to fix the above error.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Fixes: 9684752e5fe3 ("dt-bindings: spi: Add Tegra Quad SPI device tree  binding")
Link: https://lore.kernel.org/r/20220307113529.315685-1-jonathanh@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
index 6efea8970e623..0296edd1de22e 100644
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml
@@ -107,7 +107,7 @@ examples:
             dma-names = "rx", "tx";
 
             flash@0 {
-                    compatible = "spi-nor";
+                    compatible = "jedec,spi-nor";
                     reg = <0>;
                     spi-max-frequency = <104000000>;
                     spi-tx-bus-width = <2>;
-- 
GitLab


From b15e3bc76925eb1366348483fca89f115c8cde31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 6 Mar 2022 15:23:12 +0100
Subject: [PATCH 1016/1586] spi: npcm-fiu: Fix typo ("npxm")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The platform is called NPCM, not NPXM.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Link: https://lore.kernel.org/r/20220306142312.109017-1-j.neuschaefer@gmx.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-npcm-fiu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index 71ed75bf0a20b..ba67dbed9fb8f 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -201,7 +201,7 @@ struct fiu_data {
 	int fiu_max;
 };
 
-static const struct npcm_fiu_info npxm7xx_fiu_info[] = {
+static const struct npcm_fiu_info npcm7xx_fiu_info[] = {
 	{.name = "FIU0", .fiu_id = FIU0,
 		.max_map_size = MAP_SIZE_128MB, .max_cs = 2},
 	{.name = "FIU3", .fiu_id = FIU3,
@@ -209,8 +209,8 @@ static const struct npcm_fiu_info npxm7xx_fiu_info[] = {
 	{.name = "FIUX", .fiu_id = FIUX,
 		.max_map_size = MAP_SIZE_16MB, .max_cs = 2} };
 
-static const struct fiu_data npxm7xx_fiu_data = {
-	.npcm_fiu_data_info = npxm7xx_fiu_info,
+static const struct fiu_data npcm7xx_fiu_data = {
+	.npcm_fiu_data_info = npcm7xx_fiu_info,
 	.fiu_max = 3,
 };
 
@@ -664,7 +664,7 @@ static const struct spi_controller_mem_ops npcm_fiu_mem_ops = {
 };
 
 static const struct of_device_id npcm_fiu_dt_ids[] = {
-	{ .compatible = "nuvoton,npcm750-fiu", .data = &npxm7xx_fiu_data  },
+	{ .compatible = "nuvoton,npcm750-fiu", .data = &npcm7xx_fiu_data  },
 	{ /* sentinel */ }
 };
 
-- 
GitLab


From 57e95e4670d1126c103305bcf34a9442f49f6d6a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:00:56 +0100
Subject: [PATCH 1017/1586] block: fix and cleanup bio_check_ro

Don't use a WARN_ON when printing a potentially user triggered
condition.  Also don't print the partno when the block device name
already includes it, and use the %pg specifier to simplify printing
the block device name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 94bf37f8e61d2..34e1b7fdb7c89 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -580,14 +580,10 @@ late_initcall(fail_make_request_debugfs);
 static inline bool bio_check_ro(struct bio *bio)
 {
 	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
-		char b[BDEVNAME_SIZE];
-
 		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
 			return false;
-
-		WARN_ONCE(1,
-		       "Trying to write to read-only block-device %s (partno %d)\n",
-			bio_devname(bio, b), bio->bi_bdev->bd_partno);
+		pr_warn("Trying to write to read-only block-device %pg\n",
+			bio->bi_bdev);
 		/* Older lvm-tools actually trigger this */
 		return false;
 	}
-- 
GitLab


From ad740780bbc2fe37856f944dbbaff07aac9db9e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:00:57 +0100
Subject: [PATCH 1018/1586] block: remove handle_bad_sector

Use the %pg format specifier instead of the stack hungry bdevname
function, and remove handle_bad_sector given that it is not pointless.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 34e1b7fdb7c89..4d858fc08f8ba 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -540,17 +540,6 @@ bool blk_get_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_get_queue);
 
-static void handle_bad_sector(struct bio *bio, sector_t maxsector)
-{
-	char b[BDEVNAME_SIZE];
-
-	pr_info_ratelimited("%s: attempt to access beyond end of device\n"
-			    "%s: rw=%d, want=%llu, limit=%llu\n",
-			    current->comm,
-			    bio_devname(bio, b), bio->bi_opf,
-			    bio_end_sector(bio), maxsector);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 
 static DECLARE_FAULT_ATTR(fail_make_request);
@@ -612,7 +601,11 @@ static inline int bio_check_eod(struct bio *bio)
 	if (nr_sectors && maxsector &&
 	    (nr_sectors > maxsector ||
 	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
-		handle_bad_sector(bio, maxsector);
+		pr_info_ratelimited("%s: attempt to access beyond end of device\n"
+				    "%pg: rw=%d, want=%llu, limit=%llu\n",
+				    current->comm,
+				    bio->bi_bdev, bio->bi_opf,
+				    bio_end_sector(bio), maxsector);
 		return -EIO;
 	}
 	return 0;
-- 
GitLab


From 47c426d5241795cfcd9be748c44d1b2e2987ce70 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:00:58 +0100
Subject: [PATCH 1019/1586] pktcdvd: remove a pointless debug check in
 pkt_submit_bio

->queuedata is set up in pkt_init_queue, so it can't be NULL here.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index be749c686feb7..e745fc29e55d8 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2400,18 +2400,11 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 
 static void pkt_submit_bio(struct bio *bio)
 {
-	struct pktcdvd_device *pd;
-	char b[BDEVNAME_SIZE];
+	struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
 	struct bio *split;
 
 	blk_queue_split(&bio);
 
-	pd = bio->bi_bdev->bd_disk->queue->queuedata;
-	if (!pd) {
-		pr_err("%s incorrect request queue\n", bio_devname(bio, b));
-		goto end_io;
-	}
-
 	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
 		(unsigned long long)bio->bi_iter.bi_sector,
 		(unsigned long long)bio_end_sector(bio));
-- 
GitLab


From 66671719650085f92fd460d2a356c33f9198dd35 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:00:59 +0100
Subject: [PATCH 1020/1586] dm-crypt: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-crypt.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a5006cb6ee8ad..e2b0af4a2ee84 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1364,11 +1364,10 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
 	}
 
 	if (r == -EBADMSG) {
-		char b[BDEVNAME_SIZE];
 		sector_t s = le64_to_cpu(*sector);
 
-		DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
-			    bio_devname(ctx->bio_in, b), s);
+		DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+			    ctx->bio_in->bi_bdev, s);
 		dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
 				 ctx->bio_in, s, 0);
 	}
@@ -2169,11 +2168,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 		error = cc->iv_gen_ops->post(cc, org_iv_of_dmreq(cc, dmreq), dmreq);
 
 	if (error == -EBADMSG) {
-		char b[BDEVNAME_SIZE];
 		sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
 
-		DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
-			    bio_devname(ctx->bio_in, b), s);
+		DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+			    ctx->bio_in->bi_bdev, s);
 		dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
 				 ctx->bio_in, s, 0);
 		io->error = BLK_STS_PROTECTION;
-- 
GitLab


From 0a806cfde82fcd1fb856864e33d17c68d1b51dee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:00 +0100
Subject: [PATCH 1021/1586] dm-integrity: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-integrity.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index eb4b5e52bd6ff..c58a5111cb575 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1788,12 +1788,11 @@ again:
 						checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
 			if (unlikely(r)) {
 				if (r > 0) {
-					char b[BDEVNAME_SIZE];
 					sector_t s;
 
 					s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
-					DMERR_LIMIT("%s: Checksum failed at sector 0x%llx",
-						    bio_devname(bio, b), s);
+					DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+						    bio->bi_bdev, s);
 					r = -EILSEQ;
 					atomic64_inc(&ic->number_of_mismatches);
 					dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
-- 
GitLab


From ee1925bd834418218c782c94e889f826d40b14d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:01 +0100
Subject: [PATCH 1022/1586] md-multipath: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/md-multipath.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 97fb948e3e741..3081a936350d2 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -297,7 +297,6 @@ static void multipathd(struct md_thread *thread)
 
 	md_check_recovery(mddev);
 	for (;;) {
-		char b[BDEVNAME_SIZE];
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (list_empty(head))
 			break;
@@ -309,13 +308,13 @@ static void multipathd(struct md_thread *thread)
 		bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
 
 		if ((mp_bh->path = multipath_map (conf))<0) {
-			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
-			       bio_devname(bio, b),
+			pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
+			       bio->bi_bdev,
 			       (unsigned long long)bio->bi_iter.bi_sector);
 			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 		} else {
-			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
-			       bio_devname(bio, b),
+			pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
+			       bio->bi_bdev,
 			       (unsigned long long)bio->bi_iter.bi_sector);
 			*bio = *(mp_bh->master_bio);
 			bio->bi_iter.bi_sector +=
-- 
GitLab


From ac483eb375fa4a815a515945a5456086c197430e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:02 +0100
Subject: [PATCH 1023/1586] raid1: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/raid1.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3288d46948de..03477e971699b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2072,15 +2072,14 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 		} while (!success && d != r1_bio->read_disk);
 
 		if (!success) {
-			char b[BDEVNAME_SIZE];
 			int abort = 0;
 			/* Cannot read from anywhere, this block is lost.
 			 * Record a bad block on each device.  If that doesn't
 			 * work just disable and interrupt the recovery.
 			 * Don't fail devices as that won't really help.
 			 */
-			pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
-					    mdname(mddev), bio_devname(bio, b),
+			pr_crit_ratelimited("md/raid1:%s: %pg: unrecoverable I/O read error for block %llu\n",
+					    mdname(mddev), bio->bi_bdev,
 					    (unsigned long long)r1_bio->sector);
 			for (d = 0; d < conf->raid_disks * 2; d++) {
 				rdev = conf->mirrors[d].rdev;
-- 
GitLab


From c7dec4623c9cde20dad8de319d177ed6aa382aaa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:03 +0100
Subject: [PATCH 1024/1586] raid5-ppl: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Song Liu <song@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/raid5-ppl.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 3446797fa0aca..bbb5673104ec2 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -416,12 +416,10 @@ static void ppl_log_endio(struct bio *bio)
 
 static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
 {
-	char b[BDEVNAME_SIZE];
-
-	pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n",
+	pr_debug("%s: seq: %llu size: %u sector: %llu dev: %pg\n",
 		 __func__, io->seq, bio->bi_iter.bi_size,
 		 (unsigned long long)bio->bi_iter.bi_sector,
-		 bio_devname(bio, b));
+		 bio->bi_bdev);
 
 	submit_bio(bio);
 }
@@ -589,9 +587,8 @@ static void ppl_flush_endio(struct bio *bio)
 	struct ppl_log *log = io->log;
 	struct ppl_conf *ppl_conf = log->ppl_conf;
 	struct r5conf *conf = ppl_conf->mddev->private;
-	char b[BDEVNAME_SIZE];
 
-	pr_debug("%s: dev: %s\n", __func__, bio_devname(bio, b));
+	pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev);
 
 	if (bio->bi_status) {
 		struct md_rdev *rdev;
@@ -634,7 +631,6 @@ static void ppl_do_flush(struct ppl_io_unit *io)
 
 		if (bdev) {
 			struct bio *bio;
-			char b[BDEVNAME_SIZE];
 
 			bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
 					       REQ_OP_WRITE | REQ_PREFLUSH,
@@ -642,8 +638,7 @@ static void ppl_do_flush(struct ppl_io_unit *io)
 			bio->bi_private = io;
 			bio->bi_end_io = ppl_flush_endio;
 
-			pr_debug("%s: dev: %s\n", __func__,
-				 bio_devname(bio, b));
+			pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev);
 
 			submit_bio(bio);
 			flushed_disks++;
-- 
GitLab


From 734294e47a2ec48fd25dcf2d96cdf2c6c6740c00 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:04 +0100
Subject: [PATCH 1025/1586] ext4: stop using bio_devname

Use the %pg format specifier to save on stack consuption and code size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/ext4/page-io.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1253982268730..18373fa529225 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -323,10 +323,9 @@ static void ext4_end_bio(struct bio *bio)
 {
 	ext4_io_end_t *io_end = bio->bi_private;
 	sector_t bi_sector = bio->bi_iter.bi_sector;
-	char b[BDEVNAME_SIZE];
 
-	if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
-		      bio_devname(bio, b),
+	if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n",
+		      bio->bi_bdev,
 		      (long long) bio->bi_iter.bi_sector,
 		      (unsigned) bio_sectors(bio),
 		      bio->bi_status)) {
-- 
GitLab


From 97939610b893de068c82c347d06319cd231a4602 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2022 19:01:05 +0100
Subject: [PATCH 1026/1586] block: remove bio_devname

All callers are gone, so remove this wrapper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20220304180105.409765-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 6 ------
 include/linux/bio.h | 2 --
 2 files changed, 8 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index b15f5466ce084..151cace2dbe16 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -807,12 +807,6 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio,
 }
 EXPORT_SYMBOL(bio_init_clone);
 
-const char *bio_devname(struct bio *bio, char *buf)
-{
-	return bdevname(bio->bi_bdev, buf);
-}
-EXPORT_SYMBOL(bio_devname);
-
 /**
  * bio_full - check if the bio is full
  * @bio:	bio to check
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7523aba4ddf7c..4c21f6e69e182 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -491,8 +491,6 @@ static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
 		__bio_release_pages(bio, mark_dirty);
 }
 
-extern const char *bio_devname(struct bio *bio, char *buffer);
-
 #define bio_dev(bio) \
 	disk_devt((bio)->bi_bdev->bd_disk)
 
-- 
GitLab


From bceaae3bac0ce27c549bb050336d8d08abc2ee54 Mon Sep 17 00:00:00 2001
From: Drew Fustini <dfustini@baylibre.com>
Date: Thu, 3 Feb 2022 21:35:05 -0800
Subject: [PATCH 1027/1586] clocksource/drivers/timer-ti-dm: Fix regression
 from errata i940 fix

The existing fix for errata i940 causes a conflict for IPU2 which is
using timer 3 and 4. From arch/arm/boot/dts/dra7-ipu-dsp-common.dtsi:

  &ipu2 {
          mboxes = <&mailbox6 &mbox_ipu2_ipc3x>;
          ti,timers = <&timer3>;
          ti,watchdog-timers = <&timer4>, <&timer9>;
  };

The conflict was noticed when booting mainline on the BeagleBoard X15
which has a TI AM5728 SoC:

  remoteproc remoteproc1: 55020000.ipu is available
  remoteproc remoteproc1: powering up 55020000.ipu
  remoteproc remoteproc1: Booting fw image dra7-ipu2-fw.xem4
  omap-rproc 55020000.ipu: could not get timer platform device
  omap-rproc 55020000.ipu: omap_rproc_enable_timers failed: -19
  remoteproc remoteproc1: can't start rproc 55020000.ipu: -19

This change modifies the errata fix to instead use timer 15 and 16 which
resolves the timer conflict.

It does not appear to introduce any latency regression. Results from
cyclictest with original errata fix using dmtimer 3 and 4:

  # cyclictest --mlockall --smp --priority=80 --interval=200 --distance=0
  policy: fifo: loadavg: 0.02 0.03 0.05

  T: 0 ( 1449) P:80 I:200 C: 800368 Min:   0 Act:   32 Avg:   22 Max:  128
  T: 1 ( 1450) P:80 I:200 C: 800301 Min:   0 Act:   12 Avg:   23 Max:   70

The results after the change to dmtimer 15 and 16:

  # cyclictest --mlockall --smp --priority=80 --interval=200 --distance=0
  policy: fifo: loadavg: 0.36 0.19 0.07

  T: 0 ( 1711) P:80 I:200 C: 759599 Min:   0 Act:    6 Avg:   22 Max:  108
  T: 1 ( 1712) P:80 I:200 C: 759539 Min:   0 Act:   19 Avg:   23 Max:   79

Fixes: 25de4ce5ed02 ("clocksource/drivers/timer-ti-dm: Handle dra7 timer wrap errata i940")
Link: https://lore.kernel.org/linux-omap/YfWsG0p6to3IJuvE@x1/
Suggested-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Drew Fustini <dfustini@baylibre.com>
Link: https://lore.kernel.org/r/20220204053503.1409162-1-dfustini@baylibre.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/boot/dts/dra7-l4.dtsi             | 5 ++---
 arch/arm/boot/dts/dra7.dtsi                | 8 ++++----
 drivers/clocksource/timer-ti-dm-systimer.c | 4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 956a26d52a4c3..0a11bacffc1f1 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3482,8 +3482,7 @@
 				ti,timer-pwm;
 			};
 		};
-
-		target-module@2c000 {			/* 0x4882c000, ap 17 02.0 */
+		timer15_target: target-module@2c000 {	/* 0x4882c000, ap 17 02.0 */
 			compatible = "ti,sysc-omap4-timer", "ti,sysc";
 			reg = <0x2c000 0x4>,
 			      <0x2c010 0x4>;
@@ -3511,7 +3510,7 @@
 			};
 		};
 
-		target-module@2e000 {			/* 0x4882e000, ap 19 14.0 */
+		timer16_target: target-module@2e000 {	/* 0x4882e000, ap 19 14.0 */
 			compatible = "ti,sysc-omap4-timer", "ti,sysc";
 			reg = <0x2e000 0x4>,
 			      <0x2e010 0x4>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 6b485cbed8d50..8f7ffe2f66e90 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1339,20 +1339,20 @@
 };
 
 /* Local timers, see ARM architected timer wrap erratum i940 */
-&timer3_target {
+&timer15_target {
 	ti,no-reset-on-init;
 	ti,no-idle;
 	timer@0 {
-		assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
+		assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>;
 		assigned-clock-parents = <&timer_sys_clk_div>;
 	};
 };
 
-&timer4_target {
+&timer16_target {
 	ti,no-reset-on-init;
 	ti,no-idle;
 	timer@0 {
-		assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
+		assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>;
 		assigned-clock-parents = <&timer_sys_clk_div>;
 	};
 };
diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
index b6f97960d8ee0..f52bf81dc1dd0 100644
--- a/drivers/clocksource/timer-ti-dm-systimer.c
+++ b/drivers/clocksource/timer-ti-dm-systimer.c
@@ -695,9 +695,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa)
 		return 0;
 	}
 
-	if (pa == 0x48034000)		/* dra7 dmtimer3 */
+	if (pa == 0x4882c000)           /* dra7 dmtimer15 */
 		return dmtimer_percpu_timer_init(np, 0);
-	else if (pa == 0x48036000)	/* dra7 dmtimer4 */
+	else if (pa == 0x4882e000)      /* dra7 dmtimer16 */
 		return dmtimer_percpu_timer_init(np, 1);
 
 	return 0;
-- 
GitLab


From ab8da93dc06d82f464c47ab30e6c75190702f369 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Sun, 20 Feb 2022 11:38:15 +0100
Subject: [PATCH 1028/1586] clocksource/drivers/exynos_mct: Handle DTS with
 higher number of interrupts

The driver statically defines maximum number of interrupts it can
handle, however it does not respect that limit when configuring them.
When provided with a DTS with more interrupts than assumed, the driver
will overwrite static array mct_irqs leading to silent memory
corruption.

Validate the interrupts coming from DTS to avoid this.  This does not
change the fact that such DTS might not boot at all, because it is
simply incompatible, however at least some warning will be printed.

Fixes: 36ba5d527e95 ("ARM: EXYNOS: add device tree support for MCT controller driver")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Link: https://lore.kernel.org/r/20220220103815.135380-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/exynos_mct.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 6db3d5511b0ff..03782b399ea1a 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -541,6 +541,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
 	 * irqs are specified.
 	 */
 	nr_irqs = of_irq_count(np);
+	if (nr_irqs > ARRAY_SIZE(mct_irqs)) {
+		pr_err("exynos-mct: too many (%d) interrupts configured in DT\n",
+			nr_irqs);
+		nr_irqs = ARRAY_SIZE(mct_irqs);
+	}
 	for (i = MCT_L0_IRQ; i < nr_irqs; i++)
 		mct_irqs[i] = irq_of_parse_and_map(np, i);
 
@@ -553,11 +558,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
 		     mct_irqs[MCT_L0_IRQ], err);
 	} else {
 		for_each_possible_cpu(cpu) {
-			int mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
+			int mct_irq;
 			struct mct_clock_event_device *pcpu_mevt =
 				per_cpu_ptr(&percpu_mct_tick, cpu);
 
 			pcpu_mevt->evt.irq = -1;
+			if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
+				break;
+			mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
 
 			irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
 			if (request_irq(mct_irq,
-- 
GitLab


From dfc597c9bca9b4447820a59fe86526f016be1458 Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Mon, 21 Feb 2022 23:15:45 +0530
Subject: [PATCH 1029/1586] clocksource/drivers/exynos_mct: Remove mct
 interrupt index enum

MCT driver define an enum which list global and local timer's
irq index. Most of them are not used but MCT_G0_IRQ and
MCT_L0_IRQ and these two are at a fixed offset/index.
Get rid of this enum and use a #define for the used irq index.

No functional changes expected.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Link: https://lore.kernel.org/r/20220221174547.26176-1-alim.akhtar@samsung.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/exynos_mct.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 03782b399ea1a..341ee47ba35bb 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -60,27 +60,17 @@
 #define MCT_CLKEVENTS_RATING		350
 #endif
 
+/* There are four Global timers starting with 0 offset */
+#define MCT_G0_IRQ	0
+/* Local timers count starts after global timer count */
+#define MCT_L0_IRQ	4
+#define MCT_NR_IRQS	12
+
 enum {
 	MCT_INT_SPI,
 	MCT_INT_PPI
 };
 
-enum {
-	MCT_G0_IRQ,
-	MCT_G1_IRQ,
-	MCT_G2_IRQ,
-	MCT_G3_IRQ,
-	MCT_L0_IRQ,
-	MCT_L1_IRQ,
-	MCT_L2_IRQ,
-	MCT_L3_IRQ,
-	MCT_L4_IRQ,
-	MCT_L5_IRQ,
-	MCT_L6_IRQ,
-	MCT_L7_IRQ,
-	MCT_NR_IRQS,
-};
-
 static void __iomem *reg_base;
 static unsigned long clk_rate;
 static unsigned int mct_int_type;
-- 
GitLab


From f49b82a0a54fa85451ed96c35f24679522d59c7a Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Mon, 21 Feb 2022 23:15:46 +0530
Subject: [PATCH 1030/1586] clocksource/drivers/exynos_mct: Bump up mct max irq
 number

Bump-up maximum number of MCT IRQ to match the binding
documentation. This make driver scalable for SoC which
has more than 12 timer irqs, like recently added FSD SoC.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Link: https://lore.kernel.org/r/20220221174547.26176-2-alim.akhtar@samsung.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/exynos_mct.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 341ee47ba35bb..bcf21006ebd88 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -64,7 +64,8 @@
 #define MCT_G0_IRQ	0
 /* Local timers count starts after global timer count */
 #define MCT_L0_IRQ	4
-#define MCT_NR_IRQS	12
+/* Max number of IRQ as per DT binding document */
+#define MCT_NR_IRQS	20
 
 enum {
 	MCT_INT_SPI,
-- 
GitLab


From 0a3a4b9d2bb7928f54579421bbadd4aa9c4a94f0 Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Mon, 21 Feb 2022 23:15:47 +0530
Subject: [PATCH 1031/1586] clocksource/drivers/exynos_mct: Increase the size
 of name array

Variable _name_ hold mct_tick number per cpu and it is currently
limited to 10. Which restrict the scalability of the MCT driver for
the SoC which has more local timers interrupts (>= 12).
Increase the length of it to make mct_tick printed correctly for
each local timer interrupts per CPU.

Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220221174547.26176-3-alim.akhtar@samsung.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/exynos_mct.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index bcf21006ebd88..f29c812b70c99 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -80,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS];
 struct mct_clock_event_device {
 	struct clock_event_device evt;
 	unsigned long base;
-	char name[10];
+	/**
+	 *  The length of the name must be adjusted if number of
+	 *  local timer interrupts grow over two digits
+	 */
+	char name[11];
 };
 
 static void exynos4_mct_write(unsigned int value, unsigned long offset)
-- 
GitLab


From 8c4b810a87005eb46564a48a69b5b255e515fa62 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 3 Feb 2022 17:05:02 +0000
Subject: [PATCH 1032/1586] clocksource/drivers/arm_arch_timer: Use event
 stream scaling when available

With FEAT_ECV and the 1GHz counter, it is pretty likely that the
event stream divider doesn't fit in the field that holds the
divider value (we only have 4 bits to describe counter bits [15:0]

Thankfully, FEAT_ECV also provides a scaling mechanism to switch
the field to cover counter bits [23:8] instead.

Enable this on arm64 when ECV is available (32bit doesn't have
any detection infrastructure and is unlikely to be run on an
ARMv8.6 system anyway).

Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20220203170502.2694422-1-maz@kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/arm_arch_timer.c | 13 +++++++++++--
 include/clocksource/arm_arch_timer.h |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 1ecd52f903b8d..9ab8221ee3c65 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type,
 	clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta);
 }
 
-static void arch_timer_evtstrm_enable(int divider)
+static void arch_timer_evtstrm_enable(unsigned int divider)
 {
 	u32 cntkctl = arch_timer_get_cntkctl();
 
+#ifdef CONFIG_ARM64
+	/* ECV is likely to require a large divider. Use the EVNTIS flag. */
+	if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) {
+		cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE;
+		divider -= 8;
+	}
+#endif
+
+	divider = min(divider, 15U);
 	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
 	/* Set the divider and enable virtual event stream */
 	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
@@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void)
 		lsb++;
 
 	/* enable event stream */
-	arch_timer_evtstrm_enable(max(0, min(lsb, 15)));
+	arch_timer_evtstrm_enable(max(0, lsb));
 }
 
 static void arch_counter_set_user_access(void)
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index e715bdb720d51..057c8964aefba 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -56,6 +56,7 @@ enum arch_timer_spi_nr {
 #define ARCH_TIMER_EVT_TRIGGER_MASK	(0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
 #define ARCH_TIMER_USR_VT_ACCESS_EN	(1 << 8) /* virtual timer registers */
 #define ARCH_TIMER_USR_PT_ACCESS_EN	(1 << 9) /* physical timer registers */
+#define ARCH_TIMER_EVT_INTERVAL_SCALE	(1 << 17) /* EVNTIS in the ARMv8 ARM */
 
 #define ARCH_TIMER_EVT_STREAM_PERIOD_US	100
 #define ARCH_TIMER_EVT_STREAM_FREQ				\
-- 
GitLab


From bf127df3cceada8693888fc86a3121c38ef25701 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 3 Mar 2022 11:42:12 -0700
Subject: [PATCH 1033/1586] clocksource/drivers/imx-tpm: Move
 tpm_read_sched_clock() under CONFIG_ARM

When building this driver for an architecture other than ARCH=arm:

  drivers/clocksource/timer-imx-tpm.c:78:20: error: unused function 'tpm_read_sched_clock' [-Werror,-Wunused-function]
  static u64 notrace tpm_read_sched_clock(void)
                     ^
  1 error generated.

Move the function definition under the existing CONFIG_ARM section so
there is no more warning.

Fixes: 10720e120e2b ("clocksource/drivers/imx-tpm: Exclude sched clock for ARM64")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20220303184212.2356245-1-nathan@kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-imx-tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 60cefc247b715..bd64a8a8427f3 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void)
 {
 	return tpm_read_counter();
 }
-#endif
 
 static u64 notrace tpm_read_sched_clock(void)
 {
 	return tpm_read_counter();
 }
+#endif
 
 static int tpm_set_next_event(unsigned long delta,
 				struct clock_event_device *evt)
-- 
GitLab


From cea9ffe0094d468b17814056fcebe7b3840af5f0 Mon Sep 17 00:00:00 2001
From: David Heidelberg <david@ixit.cz>
Date: Fri, 4 Mar 2022 00:33:06 +0100
Subject: [PATCH 1034/1586] dt-bindings: timer: Tegra: Convert text bindings to
 yaml

Convert Tegra timer binding into yaml format.

This commit also merge 3 text bindings with almost
identical content (differens in number of registers).

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David Heidelberg <david@ixit.cz>
Link: https://lore.kernel.org/r/20220303233307.61753-1-david@ixit.cz
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/timer/nvidia,tegra-timer.yaml    | 150 ++++++++++++++++++
 .../bindings/timer/nvidia,tegra20-timer.txt   |  24 ---
 .../bindings/timer/nvidia,tegra210-timer.txt  |  36 -----
 .../bindings/timer/nvidia,tegra30-timer.txt   |  28 ----
 4 files changed, 150 insertions(+), 88 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
 delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
 delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
 delete mode 100644 Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt

diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
new file mode 100644
index 0000000000000..b78209cd0f286
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: NVIDIA Tegra timer
+
+maintainers:
+  - Stephen Warren <swarren@nvidia.com>
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: nvidia,tegra210-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 14 individual interrupts
+          minItems: 1
+          maxItems: 14
+          description: >
+            A list of 14 interrupts; one per each timer channels 0 through 13
+
+  - if:
+      properties:
+        compatible:
+          oneOf:
+            - items:
+                - enum:
+                    - nvidia,tegra114-timer
+                    - nvidia,tegra124-timer
+                    - nvidia,tegra132-timer
+                - const: nvidia,tegra30-timer
+            - items:
+                - const: nvidia,tegra30-timer
+                - const: nvidia,tegra20-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 6 individual interrupts
+          minItems: 1
+          maxItems: 6
+          description: >
+            A list of 6 interrupts; one per each of timer channels 1 through 5,
+            and one for the shared interrupt for the remaining channels.
+
+  - if:
+      properties:
+        compatible:
+          const: nvidia,tegra20-timer
+    then:
+      properties:
+        interrupts:
+          # Either a single combined interrupt or up to 4 individual interrupts
+          minItems: 1
+          maxItems: 4
+          description: |
+            A list of 4 interrupts; one per timer channel.
+
+properties:
+  compatible:
+    oneOf:
+      - const: nvidia,tegra210-timer
+        description: >
+          The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
+          timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
+          from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
+          (TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
+          or watchdog interrupts.
+      - items:
+          - enum:
+              - nvidia,tegra114-timer
+              - nvidia,tegra124-timer
+              - nvidia,tegra132-timer
+          - const: nvidia,tegra30-timer
+      - items:
+          - const: nvidia,tegra30-timer
+          - const: nvidia,tegra20-timer
+        description: >
+          The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
+          running counter, and 5 watchdog modules. The first two channels may also
+          trigger a legacy watchdog reset.
+      - const: nvidia,tegra20-timer
+        description: >
+          The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
+          running counter. The first two channels may also trigger a watchdog reset.
+
+  reg:
+    maxItems: 1
+
+  interrupts: true
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: timer
+
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    timer@60005000 {
+        compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
+        reg = <0x60005000 0x400>;
+        interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 1 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 41 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 121 IRQ_TYPE_LEVEL_HIGH>,
+                     <0 122 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&tegra_car 214>;
+    };
+  - |
+    #include <dt-bindings/clock/tegra210-car.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    timer@60005000 {
+        compatible = "nvidia,tegra210-timer";
+        reg = <0x60005000 0x400>;
+        interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+        clock-names = "timer";
+    };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
deleted file mode 100644
index 4a864bd10d3d3..0000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-NVIDIA Tegra20 timer
-
-The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
-running counter. The first two channels may also trigger a watchdog reset.
-
-Required properties:
-
-- compatible : should be "nvidia,tegra20-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 4 interrupts; one per timer channel.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-Example:
-
-timer {
-	compatible = "nvidia,tegra20-timer";
-	reg = <0x60005000 0x60>;
-	interrupts = <0 0 0x04
-			0 1 0x04
-			0 41 0x04
-			0 42 0x04>;
-	clocks = <&tegra_car 132>;
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
deleted file mode 100644
index 032cda96fe0db..0000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-NVIDIA Tegra210 timer
-
-The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit
-timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived
-from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock
-(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic,
-or watchdog interrupts.
-
-Required properties:
-- compatible : "nvidia,tegra210-timer".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 14 interrupts; one per each timer channels 0 through
-  13.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-timer@60005000 {
-	compatible = "nvidia,tegra210-timer";
-	reg = <0x0 0x60005000 0x0 0x400>;
-	interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>,
-		     <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&tegra_car TEGRA210_CLK_TIMER>;
-	clock-names = "timer";
-};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
deleted file mode 100644
index 1761f53ee36f7..0000000000000
--- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-NVIDIA Tegra30 timer
-
-The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
-running counter, and 5 watchdog modules. The first two channels may also
-trigger a legacy watchdog reset.
-
-Required properties:
-
-- compatible : For Tegra30, must contain "nvidia,tegra30-timer".  Otherwise,
-  must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where
-  <chip> is tegra124 or tegra132.
-- reg : Specifies base physical address and size of the registers.
-- interrupts : A list of 6 interrupts; one per each of timer channels 1
-    through 5, and one for the shared interrupt for the remaining channels.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-
-timer {
-	compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
-	reg = <0x60005000 0x400>;
-	interrupts = <0 0 0x04
-		      0 1 0x04
-		      0 41 0x04
-		      0 42 0x04
-		      0 121 0x04
-		      0 122 0x04>;
-	clocks = <&tegra_car 214>;
-};
-- 
GitLab


From 34f03f7f3e9f79412b5bf8db9f2778c9ed2fd733 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Fri, 4 Mar 2022 15:35:56 +0200
Subject: [PATCH 1035/1586] clocksource/drivers/timer-microchip-pit64b: Remove
 mmio selection

PIT64B timer driver doesn't depend on CLKSRC_MMIO since
commit e85c1d21b16b ("clocksource/drivers/timer-microchip-pit64b:
Add clocksource suspend/resume"). Remove the selection.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/20220304133601.2404086-2-claudiu.beznea@microchip.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index cfb8ea0df3b18..1ea556e754944 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -713,7 +713,6 @@ config INGENIC_OST
 config MICROCHIP_PIT64B
 	bool "Microchip PIT64B support"
 	depends on OF || COMPILE_TEST
-	select CLKSRC_MMIO
 	select TIMER_OF
 	help
 	  This option enables Microchip PIT64B timer for Atmel
-- 
GitLab


From ff10ee97cb203262e88d9c8bc87369cbd4004a0c Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Fri, 4 Mar 2022 15:35:57 +0200
Subject: [PATCH 1036/1586] clocksource/drivers/timer-microchip-pit64b: Use
 notrace

Use notrace for mchp_pit64b_sched_read_clk() to avoid recursive call of
prepare_ftrace_return() when issuing:
echo function_graph > /sys/kernel/debug/tracing/current_tracer

Fixes: 625022a5f160 ("clocksource/drivers/timer-microchip-pit64b: Add Microchip PIT64B support")
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/20220304133601.2404086-3-claudiu.beznea@microchip.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-microchip-pit64b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
index cfa4ec7ef3968..790d2c9b42a70 100644
--- a/drivers/clocksource/timer-microchip-pit64b.c
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -165,7 +165,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs)
 	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
 }
 
-static u64 mchp_pit64b_sched_read_clk(void)
+static u64 notrace mchp_pit64b_sched_read_clk(void)
 {
 	return mchp_pit64b_cnt_read(mchp_pit64b_cs_base);
 }
-- 
GitLab


From 389e3bff69b4341b42779833063c7b462a6e6d42 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Fri, 4 Mar 2022 15:35:58 +0200
Subject: [PATCH 1037/1586] clocksource/drivers/timer-microchip-pit64b: Use
 5MHz for clockevent

Use 5MHz clock for clockevent timers. This increases timer's
resolution.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/20220304133601.2404086-4-claudiu.beznea@microchip.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-microchip-pit64b.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c
index 790d2c9b42a70..abce83d2f00bd 100644
--- a/drivers/clocksource/timer-microchip-pit64b.c
+++ b/drivers/clocksource/timer-microchip-pit64b.c
@@ -42,8 +42,7 @@
 #define MCHP_PIT64B_LSBMASK		GENMASK_ULL(31, 0)
 #define MCHP_PIT64B_PRES_TO_MODE(p)	(MCHP_PIT64B_MR_PRES & ((p) << 8))
 #define MCHP_PIT64B_MODE_TO_PRES(m)	((MCHP_PIT64B_MR_PRES & (m)) >> 8)
-#define MCHP_PIT64B_DEF_CS_FREQ		5000000UL	/* 5 MHz */
-#define MCHP_PIT64B_DEF_CE_FREQ		32768		/* 32 KHz */
+#define MCHP_PIT64B_DEF_FREQ		5000000UL	/* 5 MHz */
 
 #define MCHP_PIT64B_NAME		"pit64b"
 
@@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer,
 static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
 					    bool clkevt)
 {
-	u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ;
 	struct mchp_pit64b_timer timer;
 	unsigned long clk_rate;
 	u32 irq = 0;
@@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node,
 	}
 
 	/* Initialize mode (prescaler + SGCK bit). To be used at runtime. */
-	ret = mchp_pit64b_init_mode(&timer, freq);
+	ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ);
 	if (ret)
 		goto irq_unmap;
 
-- 
GitLab


From 4467b8bad2401794fb89a0268c8c8257180bf60f Mon Sep 17 00:00:00 2001
From: Guillaume Ranquet <granquet@baylibre.com>
Date: Mon, 7 Mar 2022 18:26:56 +0100
Subject: [PATCH 1038/1586] clocksource/drivers/timer-of: Check return value of
 of_iomap in timer_of_base_init()

of_base->base can either be iomapped using of_io_request_and_map() or
of_iomap() depending whether or not an of_base->name has been set.

Thus check of_base->base against NULL as of_iomap() does not return a
PTR_ERR() in case of error.

Fixes: 9aea417afa6b ("clocksource/drivers/timer-of: Don't request the resource by name")
Signed-off-by: Guillaume Ranquet <granquet@baylibre.com>
Link: https://lore.kernel.org/r/20220307172656.4836-1-granquet@baylibre.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-of.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c
index 529cc6a51cdb3..c3f54d9912be7 100644
--- a/drivers/clocksource/timer-of.c
+++ b/drivers/clocksource/timer-of.c
@@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np,
 	of_base->base = of_base->name ?
 		of_io_request_and_map(np, of_base->index, of_base->name) :
 		of_iomap(np, of_base->index);
-	if (IS_ERR(of_base->base)) {
-		pr_err("Failed to iomap (%s)\n", of_base->name);
-		return PTR_ERR(of_base->base);
+	if (IS_ERR_OR_NULL(of_base->base)) {
+		pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name);
+		return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM;
 	}
 
 	return 0;
-- 
GitLab


From 0c9992315e738e7d6e927ef36839a466b080dba6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Mar 2022 20:28:26 +0100
Subject: [PATCH 1039/1586] ACPICA: Avoid walking the ACPI Namespace if it is
 not there

ACPICA commit b1c3656ef4950098e530be68d4b589584f06cddc

Prevent acpi_ns_walk_namespace() from crashing when called with
start_node equal to ACPI_ROOT_OBJECT if the Namespace has not been
instantiated yet and acpi_gbl_root_node is NULL.

For instance, this can happen if the kernel is run with "acpi=off"
in the command line.

Link: https://github.com/acpica/acpica/commit/b1c3656ef4950098e530be68d4b589584f06cddc
Link: https://lore.kernel.org/linux-acpi/CAJZ5v0hJWW_vZ3wwajE7xT38aWjY7cZyvqMJpXHzUL98-SiCVQ@mail.gmail.com/
Reported-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nswalk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index 915c2433463d7..e7c30ce06e189 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type,
 
 	if (start_node == ACPI_ROOT_OBJECT) {
 		start_node = acpi_gbl_root_node;
+		if (!start_node) {
+			return_ACPI_STATUS(AE_NO_NAMESPACE);
+		}
 	}
 
 	/* Null child means "get first node" */
-- 
GitLab


From 2369f171d5c5550b85ce96fd35d4438cf2e6b09e Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie@os.amperecomputing.com>
Date: Wed, 9 Feb 2022 09:26:42 +0000
Subject: [PATCH 1040/1586] arm64: crash_core: Export MODULES, VMALLOC, and
 VMEMMAP ranges

The following interrelated ranges are needed by the kdump crash tool:
	MODULES_VADDR ~ MODULES_END,
	VMALLOC_START ~ VMALLOC_END,
	VMEMMAP_START ~ VMEMMAP_END

Since these values change from time to time, it is preferable to export
them via vmcoreinfo than to change the crash's code frequently.

Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20220209092642.9181-1-shijie@os.amperecomputing.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/admin-guide/kdump/vmcoreinfo.rst | 8 ++++++++
 arch/arm64/kernel/crash_core.c                 | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 3861a25faae18..a339af45a22ed 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
 addresses in the higher VA range (refer to ARMv8 ARM document for
 more details).
 
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
+-------------
+
+Used to get the correct ranges:
+	MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
+	VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
+	VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
+
 arm
 ===
 
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index 314391a156ee6..2b65aae332ce9 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void)
 {
 	VMCOREINFO_NUMBER(VA_BITS);
 	/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+	vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
+	vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
 	vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
 						kimage_voffset);
 	vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
-- 
GitLab


From 614c0b9fee711dd89b1dd65c88ba83612a373fdc Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Feb 2022 18:10:28 +0000
Subject: [PATCH 1041/1586] arm64: prevent instrumentation of bp hardening
 callbacks

We may call arm64_apply_bp_hardening() early during entry (e.g. in
el0_ia()) before it is safe to run instrumented code. Unfortunately this
may result in running instrumented code in two cases:

* The hardening callbacks called by arm64_apply_bp_hardening() are not
  marked as `noinstr`, and have been observed to be instrumented when
  compiled with either GCC or LLVM.

* Since arm64_apply_bp_hardening() itself is only marked as `inline`
  rather than `__always_inline`, it is possible that the compiler
  decides to place it out-of-line, whereupon it may be instrumented.

For example, with defconfig built with clang 13.0.0,
call_hvc_arch_workaround_1() is compiled as:

| <call_hvc_arch_workaround_1>:
|        d503233f        paciasp
|        f81f0ffe        str     x30, [sp, #-16]!
|        320183e0        mov     w0, #0x80008000
|        d503201f        nop
|        d4000002        hvc     #0x0
|        f84107fe        ldr     x30, [sp], #16
|        d50323bf        autiasp
|        d65f03c0        ret

... but when CONFIG_FTRACE=y and CONFIG_KCOV=y this is compiled as:

| <call_hvc_arch_workaround_1>:
|        d503245f        bti     c
|        d503201f        nop
|        d503201f        nop
|        d503233f        paciasp
|        a9bf7bfd        stp     x29, x30, [sp, #-16]!
|        910003fd        mov     x29, sp
|        94000000        bl      0 <__sanitizer_cov_trace_pc>
|        320183e0        mov     w0, #0x80008000
|        d503201f        nop
|        d4000002        hvc     #0x0
|        a8c17bfd        ldp     x29, x30, [sp], #16
|        d50323bf        autiasp
|        d65f03c0        ret

... with a patchable function entry registered with ftrace, and a direct
call to __sanitizer_cov_trace_pc(). Neither of these are safe early
during entry sequences.

This patch avoids the unsafe instrumentation by marking
arm64_apply_bp_hardening() as `__always_inline` and by marking the
hardening functions as `noinstr`. This avoids the potential for
instrumentation, and causes clang to consistently generate the function
as with the defconfig sample.

Note: in the defconfig compilation, when CONFIG_SVE=y, x30 is spilled to
the stack without being placed in a frame record, which will result in a
missing entry if call_hvc_arch_workaround_1() is backtraced. Similar is
true of qcom_link_stack_sanitisation(), where inline asm spills the LR
to a GPR prior to corrupting it. This is not a significant issue
presently as we will only backtrace here if an exception is taken, and
in such cases we may omit entries for other reasons today.

The relevant hardening functions were introduced in commits:

  ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor")
  b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support")

... and these were subsequently moved in commit:

  d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code")

The arm64_apply_bp_hardening() function was introduced in commit:

  0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks")

... and was subsequently moved and reworked in commit:

  6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h")

Fixes: ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor")
Fixes: b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support")
Fixes: d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code")
Fixes: 0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks")
Fixes: 6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220224181028.512873-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/spectre.h | 3 ++-
 arch/arm64/kernel/proton-pack.c  | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index f62ca39da6c5a..d476c06757c54 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -67,7 +67,8 @@ struct bp_hardening_data {
 
 DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
-static inline void arm64_apply_bp_hardening(void)
+/* Called during entry so must be __always_inline */
+static __always_inline void arm64_apply_bp_hardening(void)
 {
 	struct bp_hardening_data *d;
 
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 902e4084c4775..ea78f0b3d8478 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -193,17 +193,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 	__this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
 }
 
-static void call_smc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_smc_arch_workaround_1(void)
 {
 	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void call_hvc_arch_workaround_1(void)
+/* Called during entry so must be noinstr */
+static noinstr void call_hvc_arch_workaround_1(void)
 {
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void qcom_link_stack_sanitisation(void)
+/* Called during entry so must be noinstr */
+static noinstr void qcom_link_stack_sanitisation(void)
 {
 	u64 tmp;
 
-- 
GitLab


From 0a32c88ddb9af30e8a16d41d7b9b824c27d29459 Mon Sep 17 00:00:00 2001
From: David Engraf <david.engraf@sysgo.com>
Date: Fri, 25 Feb 2022 11:40:08 +0100
Subject: [PATCH 1042/1586] arm64: signal: nofpsimd: Do not allocate fp/simd
 context when not available

Commit 6d502b6ba1b2 ("arm64: signal: nofpsimd: Handle fp/simd context for
signal frames") introduced saving the fp/simd context for signal handling
only when support is available. But setup_sigframe_layout() always
reserves memory for fp/simd context. The additional memory is not touched
because preserve_fpsimd_context() is not called and thus the magic is
invalid.

This may lead to an error when parse_user_sigframe() checks the fp/simd
area and does not find a valid magic number.

Signed-off-by: David Engraf <david.engraf@sysgo.com>
Reviwed-by: Mark Brown <broonie@kernel.org>
Fixes: 6d502b6ba1b267b3 ("arm64: signal: nofpsimd: Handle fp/simd context for signal frames")
Cc: <stable@vger.kernel.org> # 5.6.x
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220225104008.820289-1-david.engraf@sysgo.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/signal.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index d8aaf4b6f4320..3d66fba69016f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -577,10 +577,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
 {
 	int err;
 
-	err = sigframe_alloc(user, &user->fpsimd_offset,
-			     sizeof(struct fpsimd_context));
-	if (err)
-		return err;
+	if (system_supports_fpsimd()) {
+		err = sigframe_alloc(user, &user->fpsimd_offset,
+				     sizeof(struct fpsimd_context));
+		if (err)
+			return err;
+	}
 
 	/* fault information, if valid */
 	if (add_all || current->thread.fault_code) {
-- 
GitLab


From b8fc780137b4b2d7b84d75488b429fd882e0dd3b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 2 Mar 2022 13:42:25 +0000
Subject: [PATCH 1043/1586] arm64: cpufeature: Add missing .field_width for GIC
 system registers

This was missed when making specification of a field standard.

Fixes: 0a2eec83c2c23cf6 ("arm64: cpufeature: Always specify and use a field width for capabilities")
Reported-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220302134225.159217-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 64a748c2b351f..499e37a301568 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1947,6 +1947,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_useable_gicv3_cpuif,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
-- 
GitLab


From f2c281204b47309534f26dc63cee2a130c2b497b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Mar 2022 18:08:59 +0000
Subject: [PATCH 1044/1586] arm64: cpufeature: Warn if we attempt to read a
 zero width field

Add a WARN_ON_ONCE() when extracting a field if no width is specified. This
should never happen outside of development since it will be triggered with
or without the feature so long as the relevant ID register is present.  If
the warning triggers hope that the field was the standard 4 bits wide and
soldier on.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220307180900.3045812-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 2728abd9cae4b..2b407fbccdfb3 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -577,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
 static inline int __attribute_const__
 cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
 {
+	if (WARN_ON_ONCE(!width))
+		width = 4;
 	return (sign) ?
 		cpuid_feature_extract_signed_field_width(features, field, width) :
 		cpuid_feature_extract_unsigned_field_width(features, field, width);
-- 
GitLab


From cf5a501d985ba1b6ace9b18c64346441819bffea Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Wed, 2 Mar 2022 16:46:23 +0800
Subject: [PATCH 1045/1586] arm64: avoid flushing icache multiple times on
 contiguous HugeTLB

When a contiguous HugeTLB page is mapped, set_pte_at() will be called
CONT_PTES/CONT_PMDS times.  Therefore, __sync_icache_dcache() will
flush cache multiple times if the page is executable (to ensure
the I-D cache coherency).  However, the first flushing cache already
covers subsequent cache flush operations.  So only flusing cache
for the head page if it is a HugeTLB page to avoid redundant cache
flushing.  In the next patch, it is also depends on this change
since the tail vmemmap pages of HugeTLB is mapped with read-only
meanning only head page struct can be modified.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220302084624.33340-1-songmuchun@bytedance.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/flush.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 2aaf950b906cb..a06c6ac770d45 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte)
 {
 	struct page *page = pte_page(pte);
 
+	/*
+	 * HugeTLB pages are always fully mapped, so only setting head page's
+	 * PG_dcache_clean flag is enough.
+	 */
+	if (PageHuge(page))
+		page = compound_head(page);
+
 	if (!test_bit(PG_dcache_clean, &page->flags)) {
 		sync_icache_aliases((unsigned long)page_address(page),
 				    (unsigned long)page_address(page) +
-- 
GitLab


From 1310222c276b7946e440a7ab49c1e1508561f5fd Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 16 Feb 2022 10:36:52 +0530
Subject: [PATCH 1046/1586] arm64/mm: Drop use_1G_block()

pud_sect_supported() already checks for PUD level block mapping support i.e
on ARM64_4K_PAGES config. Hence pud_sect_supported(), along with some other
required alignment checks can help completely drop use_1G_block().

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1644988012-25455-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/mmu.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1681430ecab73..12feac701d8e9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -295,18 +295,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 	} while (addr = next, addr != end);
 }
 
-static inline bool use_1G_block(unsigned long addr, unsigned long next,
-			unsigned long phys)
-{
-	if (PAGE_SHIFT != 12)
-		return false;
-
-	if (((addr | next | phys) & ~PUD_MASK) != 0)
-		return false;
-
-	return true;
-}
-
 static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 			   phys_addr_t phys, pgprot_t prot,
 			   phys_addr_t (*pgtable_alloc)(int),
@@ -345,7 +333,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys) &&
+		if (pud_sect_supported() &&
+		   ((addr | next | phys) & ~PUD_MASK) == 0 &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
 			pud_set_huge(pudp, phys, prot);
 
-- 
GitLab


From 24a147bcef8ca039cb75d6d4b68c7cc339b11178 Mon Sep 17 00:00:00 2001
From: Linu Cherian <lcherian@marvell.com>
Date: Mon, 7 Mar 2022 20:00:14 +0530
Subject: [PATCH 1047/1586] irqchip/gic-v3: Workaround Marvell erratum 38545
 when reading IAR

When a IAR register read races with a GIC interrupt RELEASE event,
GIC-CPU interface could wrongly return a valid INTID to the CPU
for an interrupt that is already released(non activated) instead of 0x3ff.

As a side effect, an interrupt handler could run twice, once with
interrupt priority and then with idle priority.

As a workaround, gic_read_iar is updated so that it will return a
valid interrupt ID only if there is a change in the active priority list
after the IAR read on all the affected Silicons.

Since there are silicon variants where both 23154 and 38545 are applicable,
workaround for erratum 23154 has been extended to address both of them.

Signed-off-by: Linu Cherian <lcherian@marvell.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220307143014.22758-1-lcherian@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/silicon-errata.rst |  2 +-
 arch/arm64/Kconfig                     |  8 ++++++--
 arch/arm64/include/asm/arch_gicv3.h    | 23 +++++++++++++++++++++--
 arch/arm64/include/asm/cputype.h       | 13 +++++++++++++
 arch/arm64/kernel/cpu_errata.c         | 20 +++++++++++++++++---
 5 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ea281dd755171..466cb9e89047f 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -136,7 +136,7 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 +----------------+-----------------+-----------------+-----------------------------+
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cbcd42decb2ad..b154aa98c43ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -890,13 +890,17 @@ config CAVIUM_ERRATUM_23144
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_23154
-	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+	bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
 	default y
 	help
-	  The gicv3 of ThunderX requires a modified version for
+	  The ThunderX GICv3 implementation requires a modified version for
 	  reading the IAR status to ensure data synchronization
 	  (access to icc_iar1_el1 is not sync'ed before and after).
 
+	  It also suffers from erratum 38545 (also present on Marvell's
+	  OcteonTX and OcteonTX2), resulting in deactivated interrupts being
+	  spuriously presented to the CPU interface.
+
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_27456
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 4ad22c3135dbb..8bd5afc7b692e 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void)
  * The gicv3 of ThunderX requires a modified version for reading the
  * IAR status to ensure data synchronization (access to icc_iar1_el1
  * is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ *    erratum 23154 is not applicable.
  */
 static inline u64 gic_read_iar_cavium_thunderx(void)
 {
-	u64 irqstat;
+	u64 irqstat, apr;
 
+	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
 	nops(8);
 	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
 	mb();
 
-	return irqstat;
+	/* Max priority groups implemented is only 32 */
+	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+		return irqstat;
+
+	return 0x3ff;
 }
 
 static inline void gic_write_ctlr(u32 val)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 999b9149f8568..4596e7ca29a3f 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -84,6 +84,13 @@
 #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
 #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3
 #define CAVIUM_CPU_PART_THUNDERX2	0x0AF
+/* OcteonTx2 series */
+#define CAVIUM_CPU_PART_OCTX2_98XX	0x0B1
+#define CAVIUM_CPU_PART_OCTX2_96XX	0x0B2
+#define CAVIUM_CPU_PART_OCTX2_95XX	0x0B3
+#define CAVIUM_CPU_PART_OCTX2_95XXN	0x0B4
+#define CAVIUM_CPU_PART_OCTX2_95XXMM	0x0B5
+#define CAVIUM_CPU_PART_OCTX2_95XXO	0x0B6
 
 #define BRCM_CPU_PART_BRAHMA_B53	0x100
 #define BRCM_CPU_PART_VULCAN		0x516
@@ -124,6 +131,12 @@
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX)
+#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX)
+#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX)
+#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN)
+#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM)
+#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO)
 #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
 #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
 #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b217941713a8d..510f47055b914 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -214,6 +214,20 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 };
 #endif
 
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+const struct midr_range cavium_erratum_23154_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX),
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
+	MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM),
+	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO),
+};
+#endif
+
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
 const struct midr_range cavium_erratum_27456_cpus[] = {
 	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
@@ -425,10 +439,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
 	{
-	/* Cavium ThunderX, pass 1.x */
-		.desc = "Cavium erratum 23154",
+		.desc = "Cavium errata 23154 and 38545",
 		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
-- 
GitLab


From e2dc49ef6c6b0e76c6d37cbec1161662570044e7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 3 Mar 2022 19:28:17 +0000
Subject: [PATCH 1048/1586] kselftest/arm64: Log the PIDs of the parent and
 child in sve-ptrace

If the test triggers a problem it may well result in a log message from
the kernel such as a WARN() or BUG(). If these include a PID it can help
with debugging to know if it was the parent or child process that triggered
the issue, since the test is just creating a new thread the process name
will be the same either way. Print the PIDs of the parent and child on
startup so users have this information to hand should it be needed.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220303192817.2732509-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 4bd333768cc45..4c418b2021e02 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -487,6 +487,8 @@ static int do_parent(pid_t child)
 	unsigned int vq, vl;
 	bool vl_supported;
 
+	ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
 	/* Attach to the child */
 	while (1) {
 		int sig;
-- 
GitLab


From 507f788d05e7fba6cf478ffa1c99f5c2b0020f63 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Tue, 1 Mar 2022 10:14:33 +0000
Subject: [PATCH 1049/1586] arm64: lib: Import latest version of Arm Optimized
 Routines' strcmp

Import the latest version of the Arm Optimized Routines strcmp function based
on the upstream code of string/aarch64/strcmp.S at commit 189dfefe37d5 from:
  https://github.com/ARM-software/optimized-routines

This latest version includes MTE support.

Note that for simplicity Arm have chosen to contribute this code to Linux under
GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-exception license.
Arm is the sole copyright holder for this code.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220301101435.19327-2-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/strcmp.S | 238 +++++++++++++++++++++-------------------
 1 file changed, 126 insertions(+), 112 deletions(-)

diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 83bcad72ec972..758de77afd2f7 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2012-2021, Arm Limited.
+ * Copyright (c) 2012-2022, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
  */
 
 #include <linux/linkage.h>
@@ -11,161 +11,175 @@
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
  */
 
 #define L(label) .L ## label
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
 
-/* Parameters and result.  */
 #define src1		x0
 #define src2		x1
 #define result		x0
 
-/* Internal variables.  */
 #define data1		x2
 #define data1w		w2
 #define data2		x3
 #define data2w		w3
 #define has_nul		x4
 #define diff		x5
+#define off1		x5
 #define syndrome	x6
-#define tmp1		x7
-#define tmp2		x8
-#define tmp3		x9
-#define zeroones	x10
-#define pos		x11
-
-	/* Start of performance-critical section  -- one 64B cache line.  */
-	.align 6
+#define tmp		x6
+#define data3		x7
+#define zeroones	x8
+#define shift		x9
+#define off2		x10
+
+/* On big-endian early bytes are at MSB and on little-endian LSB.
+   LS_FW means shifting towards early bytes.  */
+#ifdef __AARCH64EB__
+# define LS_FW lsl
+#else
+# define LS_FW lsr
+#endif
+
+/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+   can be done in parallel across the entire word.
+   Since carry propagation makes 0x1 bytes before a NUL byte appear
+   NUL too in big-endian, byte-reverse the data before the NUL check.  */
+
+
 SYM_FUNC_START_WEAK_PI(strcmp)
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
+	sub	off2, src2, src1
+	mov	zeroones, REP8_01
+	and	tmp, src1, 7
+	tst	off2, 7
 	b.ne	L(misaligned8)
-	ands	tmp1, src1, #7
-	b.ne	L(mutual_align)
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
+	cbnz	tmp, L(mutual_align)
+
+	.p2align 4
+
 L(loop_aligned):
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
+	ldr	data2, [src1, off2]
+	ldr	data1, [src1], 8
 L(start_realigned):
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+#ifdef __AARCH64EB__
+	rev	tmp, data1
+	sub	has_nul, tmp, zeroones
+	orr	tmp, tmp, REP8_7f
+#else
+	sub	has_nul, data1, zeroones
+	orr	tmp, data1, REP8_7f
+#endif
+	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
+	ccmp	data1, data2, 0, eq
+	b.eq	L(loop_aligned)
+#ifdef __AARCH64EB__
+	rev	has_nul, has_nul
+#endif
+	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_aligned)
-	/* End of performance-critical section  -- one 64B cache line.  */
-
 L(end):
-#ifndef	__AARCH64EB__
+#ifndef __AARCH64EB__
 	rev	syndrome, syndrome
 	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	clz	pos, syndrome
 	rev	data2, data2
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-#else
-	/* For big-endian we cannot use the trick with the syndrome value
-	   as carry-propagation can corrupt the upper bits if the trailing
-	   bytes in the string contain 0x01.  */
-	/* However, if there is no NUL byte in the dword, we can generate
-	   the result directly.  We can't just subtract the bytes as the
-	   MSB might be significant.  */
-	cbnz	has_nul, 1f
-	cmp	data1, data2
-	cset	result, ne
-	cneg	result, result, lo
-	ret
-1:
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
-	rev	tmp3, data1
-	sub	tmp1, tmp3, zeroones
-	orr	tmp2, tmp3, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	rev	has_nul, has_nul
-	orr	syndrome, diff, has_nul
-	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
+#endif
+	clz	shift, syndrome
+	/* The most-significant-non-zero bit of the syndrome marks either the
+	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
+	lsl	data1, data1, shift
+	lsl	data2, data2, shift
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
+	lsr	data1, data1, 56
+	sub	result, data1, data2, lsr 56
 	ret
-#endif
+
+	.p2align 4
 
 L(mutual_align):
 	/* Sources are mutually aligned, but are not currently at an
 	   alignment boundary.  Round down the addresses and then mask off
-	   the bytes that preceed the start point.  */
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	ldr	data1, [src1], #8
-	neg	tmp1, tmp1		/* Bits to alignment -64.  */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#endif
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
+	   the bytes that precede the start point.  */
+	bic	src1, src1, 7
+	ldr	data2, [src1, off2]
+	ldr	data1, [src1], 8
+	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
+	mov	tmp, -1
+	LS_FW	tmp, tmp, shift
+	orr	data1, data1, tmp
+	orr	data2, data2, tmp
 	b	L(start_realigned)
 
 L(misaligned8):
 	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
-	   checking to make sure that we don't access beyond page boundary in
-	   SRC2.  */
-	tst	src1, #7
-	b.eq	L(loop_misaligned)
+	   checking to make sure that we don't access beyond the end of SRC2.  */
+	cbz	tmp, L(src1_aligned)
 L(do_misaligned):
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	ldrb	data1w, [src1], 1
+	ldrb	data2w, [src2], 1
+	cmp	data1w, 0
+	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
 	b.ne	L(done)
-	tst	src1, #7
+	tst	src1, 7
 	b.ne	L(do_misaligned)
 
-L(loop_misaligned):
-	/* Test if we are within the last dword of the end of a 4K page.  If
-	   yes then jump back to the misaligned loop to copy a byte at a time.  */
-	and	tmp1, src2, #0xff8
-	eor	tmp1, tmp1, #0xff8
-	cbz	tmp1, L(do_misaligned)
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+L(src1_aligned):
+	neg	shift, src2, lsl 3
+	bic	src2, src2, 7
+	ldr	data3, [src2], 8
+#ifdef __AARCH64EB__
+	rev	data3, data3
+#endif
+	lsr	tmp, zeroones, shift
+	orr	data3, data3, tmp
+	sub	has_nul, data3, zeroones
+	orr	tmp, data3, REP8_7f
+	bics	has_nul, has_nul, tmp
+	b.ne	L(tail)
+
+	sub	off1, src2, src1
+
+	.p2align 4
+
+L(loop_unaligned):
+	ldr	data3, [src1, off1]
+	ldr	data2, [src1, off2]
+#ifdef __AARCH64EB__
+	rev	data3, data3
+#endif
+	sub	has_nul, data3, zeroones
+	orr	tmp, data3, REP8_7f
+	ldr	data1, [src1], 8
+	bics	has_nul, has_nul, tmp
+	ccmp	data1, data2, 0, eq
+	b.eq	L(loop_unaligned)
+
+	lsl	tmp, has_nul, shift
+#ifdef __AARCH64EB__
+	rev	tmp, tmp
+#endif
+	eor	diff, data1, data2
+	orr	syndrome, diff, tmp
+	cbnz	syndrome, L(end)
+L(tail):
+	ldr	data1, [src1]
+	neg	shift, shift
+	lsr	data2, data3, shift
+	lsr	has_nul, has_nul, shift
+#ifdef __AARCH64EB__
+	rev     data2, data2
+	rev	has_nul, has_nul
+#endif
+	eor	diff, data1, data2
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, L(loop_misaligned)
 	b	L(end)
 
 L(done):
-- 
GitLab


From 387d828adffcf1eb949f3141079c479793c59aac Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Tue, 1 Mar 2022 10:14:34 +0000
Subject: [PATCH 1050/1586] arm64: lib: Import latest version of Arm Optimized
 Routines' strncmp

Import the latest version of the Arm Optimized Routines strncmp function based
on the upstream code of string/aarch64/strncmp.S at commit 189dfefe37d5 from:
  https://github.com/ARM-software/optimized-routines

This latest version includes MTE support.

Note that for simplicity Arm have chosen to contribute this code to Linux under
GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-exception license.
Arm is the sole copyright holder for this code.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220301101435.19327-3-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/strncmp.S | 234 +++++++++++++++++++++++----------------
 1 file changed, 141 insertions(+), 93 deletions(-)

diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6f..a4884b97e9a81 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2013-2021, Arm Limited.
+ * Copyright (c) 2013-2022, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S
  */
 
 #include <linux/linkage.h>
@@ -11,14 +11,14 @@
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
  */
 
 #define L(label) .L ## label
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
 
 /* Parameters and result.  */
 #define src1		x0
@@ -39,10 +39,24 @@
 #define tmp3		x10
 #define zeroones	x11
 #define pos		x12
-#define limit_wd	x13
-#define mask		x14
-#define endloop		x15
+#define mask		x13
+#define endloop		x14
 #define count		mask
+#define offset		pos
+#define neg_offset	x15
+
+/* Define endian dependent shift operations.
+   On big-endian early bytes are at MSB and on little-endian LSB.
+   LS_FW means shifting towards early bytes.
+   LS_BK means shifting towards later bytes.
+   */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
 
 SYM_FUNC_START_WEAK_PI(strncmp)
 	cbz	limit, L(ret0)
@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
 	and	count, src1, #7
 	b.ne	L(misaligned8)
 	cbnz	count, L(mutual_align)
-	/* Calculate the number of full and partial words -1.  */
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
 
 	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
 	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -64,56 +75,52 @@ L(loop_aligned):
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
 L(start_realigned):
-	subs	limit_wd, limit_wd, #1
+	subs	limit, limit, #8
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
+	csinv	endloop, diff, xzr, hi	/* Last Dword or differences.  */
 	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	ccmp	endloop, #0, #0, eq
 	b.eq	L(loop_aligned)
 	/* End of main loop */
 
-	/* Not reached the limit, must have found the end or a diff.  */
-	tbz	limit_wd, #63, L(not_limit)
-
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	L(not_limit)
-
-	lsl	limit, limit, #3	/* Bits -> bytes.  */
-	mov	mask, #~0
-#ifdef __AARCH64EB__
-	lsr	mask, mask, limit
-#else
-	lsl	mask, mask, limit
-#endif
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	/* Make sure that the NUL byte is marked in the syndrome.  */
-	orr	has_nul, has_nul, mask
-
-L(not_limit):
+L(full_check):
+#ifndef __AARCH64EB__
 	orr	syndrome, diff, has_nul
-
-#ifndef	__AARCH64EB__
+	add	limit, limit, 8	/* Rewind limit to before last subs. */
+L(syndrome_check):
+	/* Limit was reached. Check if the NUL byte or the difference
+	   is before the limit. */
 	rev	syndrome, syndrome
 	rev	data1, data1
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
 	clz	pos, syndrome
 	rev	data2, data2
 	lsl	data1, data1, pos
+	cmp	limit, pos, lsr #3
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
 	   perform a signed 32-bit subtraction.  */
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
+	csel result, result, xzr, hi
 	ret
 #else
+	/* Not reached the limit, must have found the end or a diff.  */
+	tbz	limit, #63, L(not_limit)
+	add	tmp1, limit, 8
+	cbz	limit, L(not_limit)
+
+	lsl	limit, tmp1, #3	/* Bits -> bytes.  */
+	mov	mask, #~0
+	lsr	mask, mask, limit
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	/* Make sure that the NUL byte is marked in the syndrome.  */
+	orr	has_nul, has_nul, mask
+
+L(not_limit):
 	/* For big-endian we cannot use the trick with the syndrome value
 	   as carry-propagation can corrupt the upper bits if the trailing
 	   bytes in the string contain 0x01.  */
@@ -134,10 +141,11 @@ L(not_limit):
 	rev	has_nul, has_nul
 	orr	syndrome, diff, has_nul
 	clz	pos, syndrome
-	/* The MS-non-zero bit of the syndrome marks either the first bit
-	   that is different, or the top bit of the first zero byte.
+	/* The most-significant-non-zero bit of the syndrome marks either the
+	   first bit that is different, or the top bit of the first zero byte.
 	   Shifting left now will bring the critical information into the
 	   top bits.  */
+L(end_quick):
 	lsl	data1, data1, pos
 	lsl	data2, data2, pos
 	/* But we need to zero-extend (char is unsigned) the value and then
@@ -159,22 +167,12 @@ L(mutual_align):
 	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
 	ldr	data2, [src2], #8
 	mov	tmp2, #~0
-	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
-#endif
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
-	add	limit, limit, count
-	add	tmp3, tmp3, count
+	LS_FW	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+	/* Adjust the limit and ensure it doesn't overflow.  */
+	adds	limit, limit, count
+	csinv	limit, limit, xzr, lo
 	orr	data1, data1, tmp2
 	orr	data2, data2, tmp2
-	add	limit_wd, limit_wd, tmp3, lsr #3
 	b	L(start_realigned)
 
 	.p2align 4
@@ -197,13 +195,11 @@ L(done):
 	/* Align the SRC1 to a dword by doing a bytewise compare and then do
 	   the dword loop.  */
 L(try_misaligned_words):
-	lsr	limit_wd, limit, #3
-	cbz	count, L(do_misaligned)
+	cbz	count, L(src1_aligned)
 
 	neg	count, count
 	and	count, count, #7
 	sub	limit, limit, count
-	lsr	limit_wd, limit, #3
 
 L(page_end_loop):
 	ldrb	data1w, [src1], #1
@@ -214,48 +210,100 @@ L(page_end_loop):
 	subs	count, count, #1
 	b.hi	L(page_end_loop)
 
-L(do_misaligned):
-	/* Prepare ourselves for the next page crossing.  Unlike the aligned
-	   loop, we fetch 1 less dword because we risk crossing bounds on
-	   SRC2.  */
-	mov	count, #8
-	subs	limit_wd, limit_wd, #1
-	b.lo	L(done_loop)
-L(loop_misaligned):
-	and	tmp2, src2, #0xff8
-	eor	tmp2, tmp2, #0xff8
-	cbz	tmp2, L(page_end_loop)
+	/* The following diagram explains the comparison of misaligned strings.
+	   The bytes are shown in natural order. For little-endian, it is
+	   reversed in the registers. The "x" bytes are before the string.
+	   The "|" separates data that is loaded at one time.
+	   src1     | a a a a a a a a | b b b c c c c c | . . .
+	   src2     | x x x x x a a a   a a a a a b b b | c c c c c . . .
+
+	   After shifting in each step, the data looks like this:
+	                STEP_A              STEP_B              STEP_C
+	   data1    a a a a a a a a     b b b c c c c c     b b b c c c c c
+	   data2    a a a a a a a a     b b b 0 0 0 0 0     0 0 0 c c c c c
 
+	   The bytes with "0" are eliminated from the syndrome via mask.
+
+	   Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+	   time from SRC2. The comparison happens in 3 steps. After each step
+	   the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+	/* Calculate offset from 8 byte alignment to string start in bits. No
+	   need to mask offset since shifts are ignoring upper bits. */
+	lsl	offset, src2, #3
+	bic	src2, src2, #0xf
+	mov	mask, -1
+	neg	neg_offset, offset
 	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	L(not_limit)
-	subs	limit_wd, limit_wd, #1
-	b.pl	L(loop_misaligned)
+	ldp	tmp1, tmp2, [src2], #16
+	LS_BK	mask, mask, neg_offset
+	and	neg_offset, neg_offset, #63	/* Need actual value for cmp later. */
+	/* Skip the first compare if data in tmp1 is irrelevant. */
+	tbnz	offset, 6, L(misaligned_mid_loop)
 
-L(done_loop):
-	/* We found a difference or a NULL before the limit was reached.  */
-	and	limit, limit, #7
-	cbz	limit, L(not_limit)
-	/* Read the last word.  */
-	sub	src1, src1, 8
-	sub	src2, src2, 8
-	ldr	data1, [src1, limit]
-	ldr	data2, [src2, limit]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
+L(loop_misaligned):
+	/* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+	LS_FW	data2, tmp1, offset
+	LS_BK	tmp1, tmp2, neg_offset
+	subs	limit, limit, #8
+	orr	data2, data2, tmp1	/* 8 bytes from SRC2 combined from two regs.*/
+	sub	has_nul, data1, zeroones
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	ccmp	diff, #0, #0, eq
-	b.ne	L(not_limit)
+	orr	tmp3, data1, #REP8_7f
+	csinv	endloop, diff, xzr, hi	/* If limit, set to all ones. */
+	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL byte found in SRC1. */
+	orr	tmp3, endloop, has_nul
+	cbnz	tmp3, L(full_check)
+
+	ldr	data1, [src1], #8
+L(misaligned_mid_loop):
+	/* STEP_B: Compare first part of data1 to second part of tmp2. */
+	LS_FW	data2, tmp2, offset
+#ifdef __AARCH64EB__
+	/* For big-endian we do a byte reverse to avoid carry-propagation
+	problem described above. This way we can reuse the has_nul in the
+	next step and also use syndrome value trick at the end. */
+	rev	tmp3, data1
+	#define data1_fixed tmp3
+#else
+	#define data1_fixed data1
+#endif
+	sub	has_nul, data1_fixed, zeroones
+	orr	tmp3, data1_fixed, #REP8_7f
+	eor	diff, data2, data1	/* Non-zero if differences found.  */
+	bic	has_nul, has_nul, tmp3	/* Non-zero if NUL terminator.  */
+#ifdef __AARCH64EB__
+	rev	has_nul, has_nul
+#endif
+	cmp	limit, neg_offset, lsr #3
+	orr	syndrome, diff, has_nul
+	bic	syndrome, syndrome, mask	/* Ignore later bytes. */
+	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
+	cbnz	tmp3, L(syndrome_check)
+
+	/* STEP_C: Compare second part of data1 to first part of tmp1. */
+	ldp	tmp1, tmp2, [src2], #16
+	cmp	limit, #8
+	LS_BK	data2, tmp1, neg_offset
+	eor	diff, data2, data1	/* Non-zero if differences found.  */
+	orr	syndrome, diff, has_nul
+	and	syndrome, syndrome, mask	/* Ignore earlier bytes. */
+	csinv	tmp3, syndrome, xzr, hi	/* If limit, set to all ones. */
+	cbnz	tmp3, L(syndrome_check)
+
+	ldr	data1, [src1], #8
+	sub	limit, limit, #8
+	b	L(loop_misaligned)
+
+#ifdef	__AARCH64EB__
+L(syndrome_check):
+	clz	pos, syndrome
+	cmp	pos, limit, lsl #3
+	b.lo	L(end_quick)
+#endif
 
 L(ret0):
 	mov	result, #0
 	ret
-
 SYM_FUNC_END_PI(strncmp)
 EXPORT_SYMBOL_NOHWKASAN(strncmp)
-- 
GitLab


From e33c89256e66ba64ce5190c7f2c2741e619c6321 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Tue, 1 Mar 2022 10:14:35 +0000
Subject: [PATCH 1051/1586] Revert "arm64: Mitigate MTE issues with
 str{n}cmp()"

This reverts commit 59a68d4138086c015ab8241c3267eec5550fbd44.

Now that the str{n}cmp functions have been updated to handle MTE
properly, the workaround to use the generic functions is no longer
needed.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220301101435.19327-4-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 5 -----
 arch/arm64/include/asm/string.h    | 2 --
 arch/arm64/lib/strcmp.S            | 2 +-
 arch/arm64/lib/strncmp.S           | 2 +-
 4 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index e8bd0af0141c3..8df412178efbd 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -535,11 +535,6 @@ alternative_endif
 #define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
 #endif
 
-#ifdef CONFIG_KASAN_HW_TAGS
-#define EXPORT_SYMBOL_NOHWKASAN(name)
-#else
-#define EXPORT_SYMBOL_NOHWKASAN(name)	EXPORT_SYMBOL_NOKASAN(name)
-#endif
 	/*
 	 * Emit a 64-bit absolute little endian symbol reference in a way that
 	 * ensures that it will be resolved at build time, even when building a
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 95f7686b728d7..3a3264ff47b97 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c);
 #define __HAVE_ARCH_STRCHR
 extern char *strchr(const char *, int c);
 
-#ifndef CONFIG_KASAN_HW_TAGS
 #define __HAVE_ARCH_STRCMP
 extern int strcmp(const char *, const char *);
 
 #define __HAVE_ARCH_STRNCMP
 extern int strncmp(const char *, const char *, __kernel_size_t);
-#endif
 
 #define __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 758de77afd2f7..e6815a3dd2656 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -187,4 +187,4 @@ L(done):
 	ret
 
 SYM_FUNC_END_PI(strcmp)
-EXPORT_SYMBOL_NOHWKASAN(strcmp)
+EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index a4884b97e9a81..bc195cb866932 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -306,4 +306,4 @@ L(ret0):
 	mov	result, #0
 	ret
 SYM_FUNC_END_PI(strncmp)
-EXPORT_SYMBOL_NOHWKASAN(strncmp)
+EXPORT_SYMBOL_NOKASAN(strncmp)
-- 
GitLab


From a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 2 Feb 2022 01:01:07 +0100
Subject: [PATCH 1052/1586] tick: Detect and fix jiffies update stall

On some rare cases, the timekeeper CPU may be delaying its jiffies
update duty for a while. Known causes include:

* The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ
  or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping
  updates while waiting for the target CPU to complete its
  stop_machine() callback.

* The timekeeper vcpu has VMEXIT'ed for a long while due to some overload
  on the host.

Detect and fix these situations with emergency timekeeping catchups.

Original-patch-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 17 +++++++++++++++++
 kernel/time/tick-sched.h |  4 ++++
 2 files changed, 21 insertions(+)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 17a283ce2b20f..c89f50a7e6903 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
 	return period;
 }
 
+#define MAX_STALLED_JIFFIES 5
+
 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
 	int cpu = smp_processor_id();
@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	if (tick_do_timer_cpu == cpu)
 		tick_do_update_jiffies64(now);
 
+	/*
+	 * If jiffies update stalled for too long (timekeeper in stop_machine()
+	 * or VMEXIT'ed for several msecs), force an update.
+	 */
+	if (ts->last_tick_jiffies != jiffies) {
+		ts->stalled_jiffies = 0;
+		ts->last_tick_jiffies = READ_ONCE(jiffies);
+	} else {
+		if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
+			tick_do_update_jiffies64(now);
+			ts->stalled_jiffies = 0;
+			ts->last_tick_jiffies = READ_ONCE(jiffies);
+		}
+	}
+
 	if (ts->inidle)
 		ts->got_idle_tick = 1;
 }
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index d952ae3934236..504649513399b 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
  * @timer_expires_base:	Base time clock monotonic for @timer_expires
  * @next_timer:		Expiry time of next expiring timer for debugging purpose only
  * @tick_dep_mask:	Tick dependency mask - is set, if someone needs the tick
+ * @last_tick_jiffies:	Value of jiffies seen on last tick
+ * @stalled_jiffies:	Number of stalled jiffies detected across ticks
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
 	u64				next_timer;
 	ktime_t				idle_expires;
 	atomic_t			tick_dep_mask;
+	unsigned long			last_tick_jiffies;
+	unsigned int			stalled_jiffies;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
-- 
GitLab


From 2984539959dbaf4e65e19bf90c2419304a81a985 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 8 Feb 2022 17:16:33 +0100
Subject: [PATCH 1053/1586] tick/rcu: Remove obsolete rcu_needs_cpu()
 parameters

With the removal of CONFIG_RCU_FAST_NO_HZ, the parameters in
rcu_needs_cpu() are not necessary anymore. Simply remove them.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
---
 include/linux/rcutiny.h  |  3 +--
 include/linux/rcutree.h  |  2 +-
 kernel/rcu/tree.c        |  3 +--
 kernel/time/tick-sched.c | 10 ++++------
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 858f4d429946d..5fed476f977f6 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void)
 		rcu_tasks_qs(current, (preempt)); \
 	} while (0)
 
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+static inline int rcu_needs_cpu(void)
 {
-	*nextevt = KTIME_MAX;
 	return 0;
 }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 53209d6694001..6cc91291d0782 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -19,7 +19,7 @@
 
 void rcu_softirq_qs(void);
 void rcu_note_context_switch(bool preempt);
-int rcu_needs_cpu(u64 basem, u64 *nextevt);
+int rcu_needs_cpu(void);
 void rcu_cpu_stall_reset(void);
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0b..80faf2273ce9d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void)
  * Just check whether or not this CPU has non-offloaded RCU callbacks
  * queued.
  */
-int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+int rcu_needs_cpu(void)
 {
-	*nextevt = KTIME_MAX;
 	return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
 		!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c89f50a7e6903..566ad5bd83e96 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -785,7 +785,7 @@ static inline bool local_timer_softirq_pending(void)
 
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
+	u64 basemono, next_tick, delta, expires;
 	unsigned long basejiff;
 	unsigned int seq;
 
@@ -808,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 * minimal delta which brings us back to this place
 	 * immediately. Lather, rinse and repeat...
 	 */
-	if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+	if (rcu_needs_cpu() || arch_needs_cpu() ||
 	    irq_work_needs_cpu() || local_timer_softirq_pending()) {
 		next_tick = basemono + TICK_NSEC;
 	} else {
@@ -819,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 		 * disabled this also looks at the next expiring
 		 * hrtimer.
 		 */
-		next_tmr = get_next_timer_interrupt(basejiff, basemono);
-		ts->next_timer = next_tmr;
-		/* Take the next rcu event into account */
-		next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
+		next_tick = get_next_timer_interrupt(basejiff, basemono);
+		ts->next_timer = next_tick;
 	}
 
 	/*
-- 
GitLab


From 0345691b24c076655ce8f0f4bfd24cba3467ccbd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 8 Feb 2022 17:16:34 +0100
Subject: [PATCH 1054/1586] tick/rcu: Stop allowing RCU_SOFTIRQ in idle

RCU_SOFTIRQ used to be special in that it could be raised on purpose
within the idle path to prevent from stopping the tick. Some code still
prevents from unnecessary warnings related to this specific behaviour
while entering in dynticks-idle mode.

However the nohz layout has changed quite a bit in ten years, and the
removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this
safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from
sane places.

A remaining corner case is admitted though when the vector is invoked
in fragile hotplug path.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
---
 include/linux/interrupt.h |  8 ++++++-
 kernel/time/tick-sched.c  | 50 +++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9367f1cb2e3c4..9613326d2f8af 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -579,7 +579,13 @@ enum
 	NR_SOFTIRQS
 };
 
-#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
+/*
+ * Ignoring the RCU vector after ksoftirqd is parked is fine
+ * because:
+ * 	1) rcutree_migrate_callbacks() takes care of the queue.
+ * 	2) rcu_report_dead() reports the final quiescent states.
+ */
+#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ))
 
 /* map softirq index to softirq name. update 'softirq_to_name' in
  * kernel/softirq.c when adding a new softirq.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 566ad5bd83e96..2d76c91b85de4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -999,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 	__tick_nohz_full_update_tick(ts, ktime_get());
 }
 
+/*
+ * A pending softirq outside an IRQ (or softirq disabled section) context
+ * should be waiting for ksoftirqd to handle it. Therefore we shouldn't
+ * reach here due to the need_resched() early check in can_stop_idle_tick().
+ *
+ * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
+ * cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
+ * triggering the below since wakep_softirqd() is ignored.
+ *
+ */
+static bool report_idle_softirq(void)
+{
+	static int ratelimit;
+	unsigned int pending = local_softirq_pending();
+
+	if (likely(!pending))
+		return false;
+
+	/* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
+	if (!cpu_active(smp_processor_id())) {
+		pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
+		if (!pending)
+			return false;
+	}
+
+	if (ratelimit < 10)
+		return false;
+
+	/* On RT, softirqs handling may be waiting on some lock */
+	if (!local_bh_blocked())
+		return false;
+
+	pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
+		pending);
+	ratelimit++;
+
+	return true;
+}
+
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 {
 	/*
@@ -1025,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (need_resched())
 		return false;
 
-	if (unlikely(local_softirq_pending())) {
-		static int ratelimit;
-
-		if (ratelimit < 10 && !local_bh_blocked() &&
-		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
-			pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
-				(unsigned int) local_softirq_pending());
-			ratelimit++;
-		}
+	if (unlikely(report_idle_softirq()))
 		return false;
-	}
 
 	if (tick_nohz_full_enabled()) {
 		/*
-- 
GitLab


From e941dc13fd3717122207d74539ab95da07ef797f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Mar 2022 13:17:33 -0800
Subject: [PATCH 1055/1586] Input: zinitix - do not report shadow fingers

I observed the following problem with the BT404 touch pad
running the Phosh UI:

When e.g. typing on the virtual keyboard pressing "g" would
produce "ggg".

After some analysis it turns out the firmware reports that three
fingers hit that coordinate at the same time, finger 0, 2 and
4 (of the five available 0,1,2,3,4).

DOWN
  Zinitix-TS 3-0020: finger 0 down (246, 395)
  Zinitix-TS 3-0020: finger 1 up (0, 0)
  Zinitix-TS 3-0020: finger 2 down (246, 395)
  Zinitix-TS 3-0020: finger 3 up (0, 0)
  Zinitix-TS 3-0020: finger 4 down (246, 395)
UP
  Zinitix-TS 3-0020: finger 0 up (246, 395)
  Zinitix-TS 3-0020: finger 2 up (246, 395)
  Zinitix-TS 3-0020: finger 4 up (246, 395)

This is one touch and release: i.e. this is all reported on
touch (down) and release.

There is a field in the struct touch_event called finger_cnt
which is actually a bitmask of the fingers active in the
event.

Rename this field finger_mask as this matches the use contents
better, then use for_each_set_bit() to iterate over just the
fingers that are actally active.

Factor out a finger reporting function zinitix_report_fingers()
to handle all fingers.

Also be more careful in reporting finger down/up: we were
reporting every event with input_mt_report_slot_state(..., true);
but this should only be reported on finger down or move,
not on finger up, so also add code to check p->sub_status
to see what is happening and report correctly.

After this my Zinitix BT404 touchscreen report fingers
flawlessly.

The vendor drive I have notably does not use the "finger_cnt"
and contains obviously incorrect code like this:

  if (touch_dev->touch_info.finger_cnt > MAX_SUPPORTED_FINGER_NUM)
      touch_dev->touch_info.finger_cnt = MAX_SUPPORTED_FINGER_NUM;

As MAX_SUPPORTED_FINGER_NUM is an ordinal and the field is
a bitmask this seems quite confused.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20220228233017.2270599-1-linus.walleij@linaro.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/zinitix.c | 44 +++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 129ebc810de87..8bd03278ad9a7 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -135,7 +135,7 @@ struct point_coord {
 
 struct touch_event {
 	__le16	status;
-	u8	finger_cnt;
+	u8	finger_mask;
 	u8	time_stamp;
 	struct point_coord point_coord[MAX_SUPPORTED_FINGER_NUM];
 };
@@ -322,11 +322,32 @@ static int zinitix_send_power_on_sequence(struct bt541_ts_data *bt541)
 static void zinitix_report_finger(struct bt541_ts_data *bt541, int slot,
 				  const struct point_coord *p)
 {
+	u16 x, y;
+
+	if (unlikely(!(p->sub_status &
+		       (SUB_BIT_UP | SUB_BIT_DOWN | SUB_BIT_MOVE)))) {
+		dev_dbg(&bt541->client->dev, "unknown finger event %#02x\n",
+			p->sub_status);
+		return;
+	}
+
+	x = le16_to_cpu(p->x);
+	y = le16_to_cpu(p->y);
+
 	input_mt_slot(bt541->input_dev, slot);
-	input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, true);
-	touchscreen_report_pos(bt541->input_dev, &bt541->prop,
-			       le16_to_cpu(p->x), le16_to_cpu(p->y), true);
-	input_report_abs(bt541->input_dev, ABS_MT_TOUCH_MAJOR, p->width);
+	if (input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER,
+				       !(p->sub_status & SUB_BIT_UP))) {
+		touchscreen_report_pos(bt541->input_dev,
+				       &bt541->prop, x, y, true);
+		input_report_abs(bt541->input_dev,
+				 ABS_MT_TOUCH_MAJOR, p->width);
+		dev_dbg(&bt541->client->dev, "finger %d %s (%u, %u)\n",
+			slot, p->sub_status & SUB_BIT_DOWN ? "down" : "move",
+			x, y);
+	} else {
+		dev_dbg(&bt541->client->dev, "finger %d up (%u, %u)\n",
+			slot, x, y);
+	}
 }
 
 static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
@@ -334,6 +355,7 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
 	struct bt541_ts_data *bt541 = bt541_handler;
 	struct i2c_client *client = bt541->client;
 	struct touch_event touch_event;
+	unsigned long finger_mask;
 	int error;
 	int i;
 
@@ -346,10 +368,14 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler)
 		goto out;
 	}
 
-	for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++)
-		if (touch_event.point_coord[i].sub_status & SUB_BIT_EXIST)
-			zinitix_report_finger(bt541, i,
-					      &touch_event.point_coord[i]);
+	finger_mask = touch_event.finger_mask;
+	for_each_set_bit(i, &finger_mask, MAX_SUPPORTED_FINGER_NUM) {
+		const struct point_coord *p = &touch_event.point_coord[i];
+
+		/* Only process contacts that are actually reported */
+		if (p->sub_status & SUB_BIT_EXIST)
+			zinitix_report_finger(bt541, i, p);
+	}
 
 	input_mt_sync_frame(bt541->input_dev);
 	input_sync(bt541->input_dev);
-- 
GitLab


From f96272a90d9eaea9933aaab704ddbd258feb3841 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 8 Feb 2022 17:16:35 +0100
Subject: [PATCH 1056/1586] lib/irq_poll: Declare IRQ_POLL softirq vector as
 ksoftirqd-parking safe

The following warning may appear while setting a CPU down:

	NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #20!!!

The IRQ_POLL_SOFTIRQ vector can be raised during the hotplug cpu_down()
path after ksoftirqd is parked and before the CPU actually dies. However
this is handled afterward at the CPUHP_IRQ_POLL_DEAD stage where the
queue gets migrated.

Hence this warning can be considered spurious and the vector can join
the "hotplug-safe" list.

Reported-and-tested-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
---
 include/linux/interrupt.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9613326d2f8af..f40754caaefa4 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -580,12 +580,15 @@ enum
 };
 
 /*
- * Ignoring the RCU vector after ksoftirqd is parked is fine
- * because:
- * 	1) rcutree_migrate_callbacks() takes care of the queue.
+ * The following vectors can be safely ignored after ksoftirqd is parked:
+ *
+ * _ RCU:
+ * 	1) rcutree_migrate_callbacks() migrates the queue.
  * 	2) rcu_report_dead() reports the final quiescent states.
+ *
+ * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
  */
-#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ))
+#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
 
 /* map softirq index to softirq name. update 'softirq_to_name' in
  * kernel/softirq.c when adding a new softirq.
-- 
GitLab


From 0d060f230fa06c32d91e67978a3088be9635848e Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Sat, 27 Nov 2021 23:10:51 -0500
Subject: [PATCH 1057/1586] selftests: tpm2: Determine available PCR bank

Determine an available PCR bank to be used by a test case by querying the
capability TPM2_GET_CAP. The TPM2 returns TPML_PCR_SELECTIONS that
contains an array of TPMS_PCR_SELECTIONs indicating available PCR banks
and the bitmasks that show which PCRs are enabled in each bank. Collect
the data in a dictionary. From the dictionary determine the PCR bank that
has the PCRs enabled that the test needs. This avoids test failures with
TPM2's that either to not have a SHA-1 bank or whose SHA-1 bank is
disabled.

Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Tested-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 tools/testing/selftests/tpm2/tpm2.py       | 31 ++++++++++++++++++++++
 tools/testing/selftests/tpm2/tpm2_tests.py | 29 ++++++++++++++------
 2 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index f34486cd7342d..057a4f49c79d9 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT)
 
 TPM2_CAP_HANDLES = 0x00000001
 TPM2_CAP_COMMANDS = 0x00000002
+TPM2_CAP_PCRS = 0x00000005
 TPM2_CAP_TPM_PROPERTIES = 0x00000006
 
 TPM2_PT_FIXED = 0x100
@@ -712,3 +713,33 @@ class Client:
             pt += 1
 
         return handles
+
+    def get_cap_pcrs(self):
+        pcr_banks = {}
+
+        fmt = '>HII III'
+
+        cmd = struct.pack(fmt,
+                          TPM2_ST_NO_SESSIONS,
+                          struct.calcsize(fmt),
+                          TPM2_CC_GET_CAPABILITY,
+                          TPM2_CAP_PCRS, 0, 1)
+        rsp = self.send_cmd(cmd)[10:]
+        _, _, cnt = struct.unpack('>BII', rsp[:9])
+        rsp = rsp[9:]
+
+        # items are TPMS_PCR_SELECTION's
+        for i in range(0, cnt):
+              hash, sizeOfSelect = struct.unpack('>HB', rsp[:3])
+              rsp = rsp[3:]
+
+              pcrSelect = 0
+              if sizeOfSelect > 0:
+                  pcrSelect, = struct.unpack('%ds' % sizeOfSelect,
+                                             rsp[:sizeOfSelect])
+                  rsp = rsp[sizeOfSelect:]
+                  pcrSelect = int.from_bytes(pcrSelect, byteorder='big')
+
+              pcr_banks[hash] = pcrSelect
+
+        return pcr_banks
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 9d764306887b7..e63a378199785 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase):
         result = self.client.unseal(self.root_key, blob, auth, None)
         self.assertEqual(data, result)
 
+    def determine_bank_alg(self, mask):
+        pcr_banks = self.client.get_cap_pcrs()
+        for bank_alg, pcrSelection in pcr_banks.items():
+            if pcrSelection & mask == mask:
+                return bank_alg
+        return None
+
     def test_seal_with_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase):
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase):
         self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
 
     def test_unseal_with_wrong_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase):
         # Extend first a PCR that is not part of the policy and try to unseal.
         # This should succeed.
 
-        ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
-        self.client.extend_pcr(1, ('X' * ds).encode())
+        ds = tpm2.get_digest_size(bank_alg)
+        self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase):
 
         # Then, extend a PCR that is part of the policy and try to unseal.
         # This should fail.
-        self.client.extend_pcr(16, ('X' * ds).encode())
+        self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         rc = 0
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
-- 
GitLab


From 2e8e4c8f6673247e22efc7985ce5497accd16f88 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tstruk@gmail.com>
Date: Sat, 15 Jan 2022 17:26:26 -0800
Subject: [PATCH 1058/1586] tpm: Fix error handling in async work

When an invalid (non existing) handle is used in a TPM command,
that uses the resource manager interface (/dev/tpmrm0) the resource
manager tries to load it from its internal cache, but fails and
the tpm_dev_transmit returns an -EINVAL error to the caller.
The existing async handler doesn't handle these error cases
currently and the condition in the poll handler never returns
mask with EPOLLIN set.
The result is that the poll call blocks and the application gets stuck
until the user_read_timer wakes it up after 120 sec.
Change the tpm_dev_async_work function to handle error conditions
returned from tpm_dev_transmit they are also reflected in the poll mask
and a correct error code could passed back to the caller.

Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: <linux-integrity@vger.kernel.org>
Cc: <stable@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>

Fixes: 9e1b74a63f77 ("tpm: add support for nonblocking operation")
Tested-by: Jarkko Sakkinen<jarkko@kernel.org>
Signed-off-by: Tadeusz Struk <tstruk@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm-dev-common.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index c08cbb306636b..dc4c0a0a51290 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work)
 	ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
 			       sizeof(priv->data_buffer));
 	tpm_put_ops(priv->chip);
-	if (ret > 0) {
+
+	/*
+	 * If ret is > 0 then tpm_dev_transmit returned the size of the
+	 * response. If ret is < 0 then tpm_dev_transmit failed and
+	 * returned an error code.
+	 */
+	if (ret != 0) {
 		priv->response_length = ret;
 		mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
 	}
-- 
GitLab


From 8335adb8f9d3ea783422d22883a327427c1dbee8 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tstruk@gmail.com>
Date: Sat, 15 Jan 2022 17:26:27 -0800
Subject: [PATCH 1059/1586] selftests: tpm: add async space test with
 noneexisting handle

Add a test for /dev/tpmrm0 in async mode that checks if
the code handles invalid handles correctly.

Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <linux-integrity@vger.kernel.org>
Cc: <linux-kselftest@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>
Tested-by: Jarkko Sakkinen<jarkko@kernel.org>
Signed-off-by: Tadeusz Struk <tstruk@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 tools/testing/selftests/tpm2/tpm2_tests.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index e63a378199785..ffe98b5c8d22f 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -315,3 +315,19 @@ class AsyncTest(unittest.TestCase):
         log.debug("Calling get_cap in a NON_BLOCKING mode")
         async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
         async_client.close()
+
+    def test_flush_invalid_context(self):
+        log = logging.getLogger(__name__)
+        log.debug(sys._getframe().f_code.co_name)
+
+        async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK)
+        log.debug("Calling flush_context passing in an invalid handle ")
+        handle = 0x80123456
+        rc = 0
+        try:
+            async_client.flush_context(handle)
+        except OSError as e:
+            rc = e.errno
+
+        self.assertEqual(rc, 22)
+        async_client.close()
-- 
GitLab


From c51abd96837f600d8fd940b6ab8e2da578575504 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 13 Jan 2022 12:04:54 -0800
Subject: [PATCH 1060/1586] KEYS: fix length validation in
 keyctl_pkey_params_get_2()

In many cases, keyctl_pkey_params_get_2() is validating the user buffer
lengths against the wrong algorithm properties.  Fix it to check against
the correct properties.

Probably this wasn't noticed before because for all asymmetric keys of
the "public_key" subtype, max_data_size == max_sig_size == max_enc_size
== max_dec_size.  However, this isn't necessarily true for the
"asym_tpm" subtype (it should be, but it's not strictly validated).  Of
course, future key types could have different values as well.

Fixes: 00d60fd3b932 ("KEYS: Provide keyctls to drive the new key type ops for asymmetric keys [ver #2]")
Cc: <stable@vger.kernel.org> # v4.20+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/keyctl_pkey.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
index 5de0d599a2748..97bc27bbf0797 100644
--- a/security/keys/keyctl_pkey.c
+++ b/security/keys/keyctl_pkey.c
@@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
 
 	switch (op) {
 	case KEYCTL_PKEY_ENCRYPT:
+		if (uparams.in_len  > info.max_dec_size ||
+		    uparams.out_len > info.max_enc_size)
+			return -EINVAL;
+		break;
 	case KEYCTL_PKEY_DECRYPT:
 		if (uparams.in_len  > info.max_enc_size ||
 		    uparams.out_len > info.max_dec_size)
 			return -EINVAL;
 		break;
 	case KEYCTL_PKEY_SIGN:
+		if (uparams.in_len  > info.max_data_size ||
+		    uparams.out_len > info.max_sig_size)
+			return -EINVAL;
+		break;
 	case KEYCTL_PKEY_VERIFY:
-		if (uparams.in_len  > info.max_sig_size ||
-		    uparams.out_len > info.max_data_size)
+		if (uparams.in_len  > info.max_data_size ||
+		    uparams.in2_len > info.max_sig_size)
 			return -EINVAL;
 		break;
 	default:
@@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par
 	}
 
 	params->in_len  = uparams.in_len;
-	params->out_len = uparams.out_len;
+	params->out_len = uparams.out_len; /* Note: same as in2_len */
 	return 0;
 }
 
-- 
GitLab


From 8f2a7b518bb878c9a8c4f86432908a53e060da1f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:54:33 -0800
Subject: [PATCH 1061/1586] KEYS: x509: clearly distinguish between key and
 signature algorithms

An X.509 certificate has two, potentially different public key
algorithms: the one used by the certificate's key, and the one that was
used to sign the certificate.  Some of the naming made it unclear which
algorithm was meant.  Rename things appropriately:

    - x509_note_pkey_algo() => x509_note_sig_algo()
    - algo_oid => sig_algo

Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/x509.asn1          |  2 +-
 crypto/asymmetric_keys/x509_cert_parser.c | 32 +++++++++++++----------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/crypto/asymmetric_keys/x509.asn1 b/crypto/asymmetric_keys/x509.asn1
index 5c9f4e4a52310..92d59c32f96a8 100644
--- a/crypto/asymmetric_keys/x509.asn1
+++ b/crypto/asymmetric_keys/x509.asn1
@@ -7,7 +7,7 @@ Certificate ::= SEQUENCE {
 TBSCertificate ::= SEQUENCE {
 	version           [ 0 ]	Version DEFAULT,
 	serialNumber		CertificateSerialNumber ({ x509_note_serial }),
-	signature		AlgorithmIdentifier ({ x509_note_pkey_algo }),
+	signature		AlgorithmIdentifier ({ x509_note_sig_algo }),
 	issuer			Name ({ x509_note_issuer }),
 	validity		Validity,
 	subject			Name ({ x509_note_subject }),
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 083405eb80c32..aec2396a7f7e1 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -24,9 +24,9 @@ struct x509_parse_context {
 	size_t		key_size;		/* Size of key data */
 	const void	*params;		/* Key parameters */
 	size_t		params_size;		/* Size of key parameters */
-	enum OID	key_algo;		/* Public key algorithm */
+	enum OID	key_algo;		/* Algorithm used by the cert's key */
 	enum OID	last_oid;		/* Last OID encountered */
-	enum OID	algo_oid;		/* Algorithm OID */
+	enum OID	sig_algo;		/* Algorithm used to sign the cert */
 	unsigned char	nr_mpi;			/* Number of MPIs stored */
 	u8		o_size;			/* Size of organizationName (O) */
 	u8		cn_size;		/* Size of commonName (CN) */
@@ -187,11 +187,10 @@ int x509_note_tbs_certificate(void *context, size_t hdrlen,
 }
 
 /*
- * Record the public key algorithm
+ * Record the algorithm that was used to sign this certificate.
  */
-int x509_note_pkey_algo(void *context, size_t hdrlen,
-			unsigned char tag,
-			const void *value, size_t vlen)
+int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag,
+		       const void *value, size_t vlen)
 {
 	struct x509_parse_context *ctx = context;
 
@@ -263,22 +262,22 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
 rsa_pkcs1:
 	ctx->cert->sig->pkey_algo = "rsa";
 	ctx->cert->sig->encoding = "pkcs1";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 ecrdsa:
 	ctx->cert->sig->pkey_algo = "ecrdsa";
 	ctx->cert->sig->encoding = "raw";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 sm2:
 	ctx->cert->sig->pkey_algo = "sm2";
 	ctx->cert->sig->encoding = "raw";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 ecdsa:
 	ctx->cert->sig->pkey_algo = "ecdsa";
 	ctx->cert->sig->encoding = "x962";
-	ctx->algo_oid = ctx->last_oid;
+	ctx->sig_algo = ctx->last_oid;
 	return 0;
 }
 
@@ -291,11 +290,16 @@ int x509_note_signature(void *context, size_t hdrlen,
 {
 	struct x509_parse_context *ctx = context;
 
-	pr_debug("Signature type: %u size %zu\n", ctx->last_oid, vlen);
+	pr_debug("Signature: alg=%u, size=%zu\n", ctx->last_oid, vlen);
 
-	if (ctx->last_oid != ctx->algo_oid) {
-		pr_warn("Got cert with pkey (%u) and sig (%u) algorithm OIDs\n",
-			ctx->algo_oid, ctx->last_oid);
+	/*
+	 * In X.509 certificates, the signature's algorithm is stored in two
+	 * places: inside the TBSCertificate (the data that is signed), and
+	 * alongside the signature.  These *must* match.
+	 */
+	if (ctx->last_oid != ctx->sig_algo) {
+		pr_warn("signatureAlgorithm (%u) differs from tbsCertificate.signature (%u)\n",
+			ctx->last_oid, ctx->sig_algo);
 		return -EINVAL;
 	}
 
-- 
GitLab


From 7804fe9e8dc70846ff2c683f4781ed75499b12ae Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:54:34 -0800
Subject: [PATCH 1062/1586] KEYS: x509: remove unused fields

Remove unused fields from struct x509_parse_context.

Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index aec2396a7f7e1..2899ed80bb18e 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -19,7 +19,6 @@
 struct x509_parse_context {
 	struct x509_certificate	*cert;		/* Certificate being constructed */
 	unsigned long	data;			/* Start of data */
-	const void	*cert_start;		/* Start of cert content */
 	const void	*key;			/* Key data */
 	size_t		key_size;		/* Size of key data */
 	const void	*params;		/* Key parameters */
@@ -27,7 +26,6 @@ struct x509_parse_context {
 	enum OID	key_algo;		/* Algorithm used by the cert's key */
 	enum OID	last_oid;		/* Last OID encountered */
 	enum OID	sig_algo;		/* Algorithm used to sign the cert */
-	unsigned char	nr_mpi;			/* Number of MPIs stored */
 	u8		o_size;			/* Size of organizationName (O) */
 	u8		cn_size;		/* Size of commonName (CN) */
 	u8		email_size;		/* Size of emailAddress */
-- 
GitLab


From 9f8b3f321f39d6ff63fb40673c3be61b73ba0a1d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:54:35 -0800
Subject: [PATCH 1063/1586] KEYS: x509: remove never-set ->unsupported_key flag

The X.509 parser always sets cert->pub->pkey_algo on success, since
x509_extract_key_data() is a mandatory action in the X.509 ASN.1
grammar, and it returns an error if the algorithm is unknown.  Thus,
remove the dead code which handled this field being NULL.  This results
in the ->unsupported_key flag never being set, so remove that too.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/pkcs7_verify.c    | 7 ++-----
 crypto/asymmetric_keys/x509_parser.h     | 1 -
 crypto/asymmetric_keys/x509_public_key.c | 9 ---------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 0b4d07aa88111..d37b187faf9ae 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -226,9 +226,6 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			return 0;
 		}
 
-		if (x509->unsupported_key)
-			goto unsupported_crypto_in_x509;
-
 		pr_debug("- issuer %s\n", x509->issuer);
 		sig = x509->sig;
 		if (sig->auth_ids[0])
@@ -245,7 +242,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 			 * authority.
 			 */
 			if (x509->unsupported_sig)
-				goto unsupported_crypto_in_x509;
+				goto unsupported_sig_in_x509;
 			x509->signer = x509;
 			pr_debug("- self-signed\n");
 			return 0;
@@ -309,7 +306,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7,
 		might_sleep();
 	}
 
-unsupported_crypto_in_x509:
+unsupported_sig_in_x509:
 	/* Just prune the certificate chain at this point if we lack some
 	 * crypto module to go further.  Note, however, we don't want to set
 	 * sinfo->unsupported_crypto as the signed info block may still be
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index c233f136fb354..da854c94f1115 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -36,7 +36,6 @@ struct x509_certificate {
 	bool		seen;			/* Infinite recursion prevention */
 	bool		verified;
 	bool		self_signed;		/* T if self-signed (check unsupported_sig too) */
-	bool		unsupported_key;	/* T if key uses unsupported crypto */
 	bool		unsupported_sig;	/* T if signature uses unsupported crypto */
 	bool		blacklisted;
 };
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index fe14cae115b51..b03d04d78eb9d 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -33,9 +33,6 @@ int x509_get_sig_params(struct x509_certificate *cert)
 	sig->data = cert->tbs;
 	sig->data_size = cert->tbs_size;
 
-	if (!cert->pub->pkey_algo)
-		cert->unsupported_key = true;
-
 	if (!sig->pkey_algo)
 		cert->unsupported_sig = true;
 
@@ -173,12 +170,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 
 	pr_devel("Cert Issuer: %s\n", cert->issuer);
 	pr_devel("Cert Subject: %s\n", cert->subject);
-
-	if (cert->unsupported_key) {
-		ret = -ENOPKG;
-		goto error_free_cert;
-	}
-
 	pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo);
 	pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to);
 
-- 
GitLab


From 8bdc3e05cc78513b7495366ef9b962a3ea568e8e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Jan 2022 16:54:36 -0800
Subject: [PATCH 1064/1586] KEYS: x509: remove dead code that set
 ->unsupported_sig

The X.509 parser always sets cert->sig->pkey_algo and
cert->sig->hash_algo on success, since x509_note_sig_algo() is a
mandatory action in the X.509 ASN.1 grammar, and it returns an error if
the signature's algorithm is unknown.  Thus, remove the dead code which
handled these fields being NULL.

Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/x509_public_key.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index b03d04d78eb9d..8c77a297a82d4 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -33,15 +33,6 @@ int x509_get_sig_params(struct x509_certificate *cert)
 	sig->data = cert->tbs;
 	sig->data_size = cert->tbs_size;
 
-	if (!sig->pkey_algo)
-		cert->unsupported_sig = true;
-
-	/* We check the hash if we can - even if we can't then verify it */
-	if (!sig->hash_algo) {
-		cert->unsupported_sig = true;
-		return 0;
-	}
-
 	sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL);
 	if (!sig->s)
 		return -ENOMEM;
-- 
GitLab


From 2dd634664d4128668c44bc9e497af24157f1eef4 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 19 Jan 2022 17:38:03 -0600
Subject: [PATCH 1065/1586] tpm: xen-tpmfront: Use struct_size() helper

Make use of the struct_size() helper instead of an open-coded version,
in order to avoid any potential type mistakes or integer overflows that,
in the worse scenario, could lead to heap overflows.

Also, address the following sparse warning:
drivers/char/tpm/xen-tpmfront.c:131:16: warning: using sizeof on a flexible structure

Link: https://github.com/KSPP/linux/issues/160
Link: https://github.com/KSPP/linux/issues/174
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/xen-tpmfront.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index da5b30771418f..f53e0cf1ec7e9 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -126,16 +126,16 @@ static void vtpm_cancel(struct tpm_chip *chip)
 	notify_remote_via_evtchn(priv->evtchn);
 }
 
-static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
+static size_t shr_data_offset(struct vtpm_shared_page *shr)
 {
-	return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
+	return struct_size(shr, extra_pages, shr->nr_extra_pages);
 }
 
 static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_private *priv = dev_get_drvdata(&chip->dev);
 	struct vtpm_shared_page *shr = priv->shr;
-	unsigned int offset = shr_data_offset(shr);
+	size_t offset = shr_data_offset(shr);
 
 	u32 ordinal;
 	unsigned long duration;
@@ -177,7 +177,7 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_private *priv = dev_get_drvdata(&chip->dev);
 	struct vtpm_shared_page *shr = priv->shr;
-	unsigned int offset = shr_data_offset(shr);
+	size_t offset = shr_data_offset(shr);
 	size_t length = shr->length;
 
 	if (shr->state == VTPM_STATE_IDLE)
-- 
GitLab


From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001
From: Vijay Balakrishna <vijayb@linux.microsoft.com>
Date: Wed, 2 Mar 2022 09:38:09 -0800
Subject: [PATCH 1066/1586] arm64: Do not defer reserve_crashkernel() for
 platforms with no DMA memory zones

The following patches resulted in deferring crash kernel reservation to
mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU),
in particular Raspberry Pi 4.

commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges")
commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()")
commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required")

Above changes introduced boot slowdown due to linear map creation for
all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1].  The proposed
changes restore crash kernel reservation to earlier behavior thus avoids
slow boot, particularly for platforms with IOMMU (no DMA memory zones).

Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU
and 8GB memory.  Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm
no regression to deferring scheme of crash kernel memory reservation.
In both cases successfully collected kernel crash dump.

[1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.microsoft.com/

Signed-off-by: Vijay Balakrishna <vijayb@linux.microsoft.com>
Cc: stable@vger.kernel.org
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.microsoft.com
[will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/init.c | 36 ++++++++++++++++++++++++++++++++----
 arch/arm64/mm/mmu.c  | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index db63cc885771a..919be440494fa 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr);
  * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
  * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
  * otherwise it is empty.
+ *
+ * Memory reservation for crash kernel either done early or deferred
+ * depending on DMA memory zones configs (ZONE_DMA) --
+ *
+ * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized
+ * here instead of max_zone_phys().  This lets early reservation of
+ * crash kernel memory which has a dependency on arm64_dma_phys_limit.
+ * Reserving memory early for crash kernel allows linear creation of block
+ * mappings (greater than page-granularity) for all the memory bank rangs.
+ * In this scheme a comparatively quicker boot is observed.
+ *
+ * If ZONE_DMA configs are defined, crash kernel memory reservation
+ * is delayed until DMA zone memory range size initilazation performed in
+ * zone_sizes_init().  The defer is necessary to steer clear of DMA zone
+ * memory range to avoid overlap allocation.  So crash kernel memory boundaries
+ * are not known when mapping all bank memory ranges, which otherwise means
+ * not possible to exclude crash kernel range from creating block mappings
+ * so page-granularity mappings are created for the entire memory range.
+ * Hence a slightly slower boot is observed.
+ *
+ * Note: Page-granularity mapppings are necessary for crash kernel memory
+ * range for shrinking its size via /sys/kernel/kexec_crash_size interface.
  */
-phys_addr_t arm64_dma_phys_limit __ro_after_init;
+#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
+phys_addr_t __ro_after_init arm64_dma_phys_limit;
+#else
+const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1;
+#endif
 
 #ifdef CONFIG_KEXEC_CORE
 /*
@@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	if (!arm64_dma_phys_limit)
 		arm64_dma_phys_limit = dma32_phys_limit;
 #endif
-	if (!arm64_dma_phys_limit)
-		arm64_dma_phys_limit = PHYS_MASK + 1;
 	max_zone_pfns[ZONE_NORMAL] = max;
 
 	free_area_init(max_zone_pfns);
@@ -315,6 +339,9 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
+	if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+		reserve_crashkernel();
+
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 }
 
@@ -361,7 +388,8 @@ void __init bootmem_init(void)
 	 * request_standard_resources() depends on crashkernel's memory being
 	 * reserved, so do it here.
 	 */
-	reserve_crashkernel();
+	if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+		reserve_crashkernel();
 
 	memblock_dump_all();
 }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index acfae9b41cc8c..ed21bf83d0b75 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
@@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
 
+#ifdef CONFIG_KEXEC_CORE
+	if (crash_mem_map) {
+		if (IS_ENABLED(CONFIG_ZONE_DMA) ||
+		    IS_ENABLED(CONFIG_ZONE_DMA32))
+			flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		else if (crashk_res.end)
+			memblock_mark_nomap(crashk_res.start,
+			    resource_size(&crashk_res));
+	}
+#endif
+
 	/* map all the memory banks */
 	for_each_mem_range(i, &start, &end) {
 		if (start >= end)
@@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp)
 	__map_memblock(pgdp, kernel_start, kernel_end,
 		       PAGE_KERNEL, NO_CONT_MAPPINGS);
 	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+	/*
+	 * Use page-level mappings here so that we can shrink the region
+	 * in page granularity and put back unused memory to buddy system
+	 * through /sys/kernel/kexec_crash_size interface.
+	 */
+#ifdef CONFIG_KEXEC_CORE
+	if (crash_mem_map &&
+	    !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+		if (crashk_res.end) {
+			__map_memblock(pgdp, crashk_res.start,
+				       crashk_res.end + 1,
+				       PAGE_KERNEL,
+				       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+			memblock_clear_nomap(crashk_res.start,
+					     resource_size(&crashk_res));
+		}
+	}
+#endif
 }
 
 void mark_rodata_ro(void)
-- 
GitLab


From ee94b5a061b121305cae8ff46b7fa8325aa84b12 Mon Sep 17 00:00:00 2001
From: Sagar Patel <sagarmp@cs.unc.edu>
Date: Mon, 7 Mar 2022 17:24:13 -0500
Subject: [PATCH 1067/1586] arm64: drop unused includes of
 <linux/personality.h>

Drop several includes of <linux/personality.h> which are not used.
git-blame indicates they were used at some point, but they're not needed
anymore.

Signed-off-by: Sagar Patel <sagarmp@cs.unc.edu>
Link: https://lore.kernel.org/r/20220307222412.146506-1-sagarmp@cs.unc.edu
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/signal.c     | 1 -
 arch/arm64/kernel/sys_compat.c | 1 -
 arch/arm64/kernel/traps.c      | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index d8aaf4b6f4320..fe83a42dac8f8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/personality.h>
 #include <linux/freezer.h>
 #include <linux/stddef.h>
 #include <linux/uaccess.h>
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index db5159a3055fc..12c6864e51e13 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -9,7 +9,6 @@
 
 #include <linux/compat.h>
 #include <linux/cpufeature.h>
-#include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 70fc42470f138..bb878f52ca0ae 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -9,7 +9,6 @@
 #include <linux/bug.h>
 #include <linux/context_tracking.h>
 #include <linux/signal.h>
-#include <linux/personality.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/spinlock.h>
-- 
GitLab


From 205295c7e1abba9c1db1f9fe075f22f71351887f Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 24 Feb 2022 18:41:21 +0000
Subject: [PATCH 1068/1586] perf/arm-cmn: Hide XP PUB events for CMN-600

CMN-600 doesn't have XP events for the PUB channel, but we missed
the appropriate check to avoid exposing them.

Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/4c108d39a0513def63acccf09ab52b328f242aeb.1645727871.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index d0e1ce2c83bab..e924d458cd6df 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -595,6 +595,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 		if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
 			return 0;
 
+		if (chan == 4 && cmn->model == CMN600)
+			return 0;
+
 		if ((chan == 5 && cmn->rsp_vc_num < 2) ||
 		    (chan == 6 && cmn->dat_vc_num < 2))
 			return 0;
-- 
GitLab


From 31fac565773981df43f018b2dbfbc7a3164f4b6c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 24 Feb 2022 18:41:22 +0000
Subject: [PATCH 1069/1586] perf/arm-cmn: Update watchpoint format

From CMN-650 onwards, some of the fields in the watchpoint config
registers moved subtly enough to easily overlook. Watchpoint events are
still only partially supported on newer IPs - which in itself deserves
noting - but were not intended to become any *less* functional than on
CMN-600.

Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/e1ce4c2f1e4f73ab1c60c3a85e4037cd62dd6352.1645727871.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index e924d458cd6df..9c1d82be7a2fe 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -71,9 +71,11 @@
 #define CMN_DTM_WPn(n)			(0x1A0 + (n) * 0x18)
 #define CMN_DTM_WPn_CONFIG(n)		(CMN_DTM_WPn(n) + 0x00)
 #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2	GENMASK_ULL(18,17)
-#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(6)
-#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
-#define CMN_DTM_WPn_CONFIG_WP_GRP	BIT(4)
+#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(9)
+#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(8)
+#define CMN600_WPn_CONFIG_WP_COMBINE	BIT(6)
+#define CMN600_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
+#define CMN_DTM_WPn_CONFIG_WP_GRP	GENMASK_ULL(5, 4)
 #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL	GENMASK_ULL(3, 1)
 #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL	BIT(0)
 #define CMN_DTM_WPn_VAL(n)		(CMN_DTM_WPn(n) + 0x08)
@@ -155,6 +157,7 @@
 #define CMN_CONFIG_WP_COMBINE		GENMASK_ULL(27, 24)
 #define CMN_CONFIG_WP_DEV_SEL		GENMASK_ULL(50, 48)
 #define CMN_CONFIG_WP_CHN_SEL		GENMASK_ULL(55, 51)
+/* Note that we don't yet support the tertiary match group on newer IPs */
 #define CMN_CONFIG_WP_GRP		BIT_ULL(56)
 #define CMN_CONFIG_WP_EXCLUSIVE		BIT_ULL(57)
 #define CMN_CONFIG1_WP_VAL		GENMASK_ULL(63, 0)
@@ -908,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
 	u32 grp = CMN_EVENT_WP_GRP(event);
 	u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
 	u32 combine = CMN_EVENT_WP_COMBINE(event);
+	bool is_cmn600 = to_cmn(event->pmu)->model == CMN600;
 
 	config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
-		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) |
 		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
+	if (exc)
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
+				      CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
 	if (combine && !grp)
-		config |= CMN_DTM_WPn_CONFIG_WP_COMBINE;
-
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
+				      CMN_DTM_WPn_CONFIG_WP_COMBINE;
 	return config;
 }
 
-- 
GitLab


From 819a47d24b61b5e2d660d19c8798f0f9b4498b73 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 27 Feb 2022 17:52:32 +0900
Subject: [PATCH 1070/1586] arm64: clean up tools Makefile

Remove unused gen-y.

Remove redundant $(shell ...) because 'mkdir' is done in cmd_gen_cpucaps.

Replace $(filter-out $(PHONY), $^) with the $(real-prereqs) shorthand.

The '&&' in cmd_gen_cpucaps should be replaced with ';' because it is
run under 'set -e' environment.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20220227085232.206529-1-masahiroy@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/tools/Makefile | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
index 932b4fe5c7684..cf1307188150f 100644
--- a/arch/arm64/tools/Makefile
+++ b/arch/arm64/tools/Makefile
@@ -5,18 +5,14 @@ kapi := $(gen)/asm
 
 kapi-hdrs-y := $(kapi)/cpucaps.h
 
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+targets += $(addprefix ../../../, $(kapi-hdrs-y))
 
 PHONY += kapi
 
-kapi:   $(kapi-hdrs-y) $(gen-y)
-
-# Create output directory if not already present
-_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+kapi:   $(kapi-hdrs-y)
 
 quiet_cmd_gen_cpucaps = GEN     $@
-      cmd_gen_cpucaps = mkdir -p $(dir $@) && \
-                     $(AWK) -f $(filter-out $(PHONY),$^) > $@
+      cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@
 
 $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
 	$(call if_changed,gen_cpucaps)
-- 
GitLab


From 805bbdf28b271ed82b204cfd58b6eb456462ea49 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <bbhushan2@marvell.com>
Date: Fri, 11 Feb 2022 10:23:43 +0530
Subject: [PATCH 1071/1586] dt-bindings: perf: marvell: cn10k ddr performance
 monitor

Add binding documentation for the Marvell CN10k DDR
performance monitor unit.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220211045346.17894-2-bbhushan2@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../bindings/perf/marvell-cn10k-ddr.yaml      | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml

diff --git a/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
new file mode 100644
index 0000000000000..a18dd0a8c43a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/perf/marvell-cn10k-ddr.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell CN10K DDR performance monitor
+
+maintainers:
+  - Bharat Bhushan <bbhushan2@marvell.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - marvell,cn10k-ddr-pmu
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    bus {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        pmu@87e1c0000000 {
+            compatible = "marvell,cn10k-ddr-pmu";
+            reg = <0x87e1 0xc0000000 0x0 0x10000>;
+        };
+    };
-- 
GitLab


From 7cf83e222bce0f135f9c2714a49623cbb9fbde29 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <bbhushan2@marvell.com>
Date: Fri, 11 Feb 2022 10:23:44 +0530
Subject: [PATCH 1072/1586] perf/marvell: CN10k DDR performance monitor support

Marvell CN10k DRAM Subsystem (DSS) supports eight event counters for
monitoring performance and software can program each counter to monitor
any of the defined performance event. Performance events are for
interface between the DDR controller and the PHY, interface between the
DDR Controller and the CHI interconnect, or within the DDR Controller.
Additionally DSS also supports two fixed performance event counters, one
for number of ddr reads and other for ddr writes.

This patch add basic support for these performance monitoring events
on CN10k.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Reviewed-by: Bhaskara Budiredla <bbudiredla@marvell.com>
Link: https://lore.kernel.org/r/20220211045346.17894-3-bbhushan2@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Makefile                |   1 +
 drivers/perf/marvell_cn10k_ddr_pmu.c | 601 +++++++++++++++++++++++++++
 2 files changed, 602 insertions(+)
 create mode 100644 drivers/perf/marvell_cn10k_ddr_pmu.c

diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 2db5418d5b0aa..df52fb5ea2a6b 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
new file mode 100644
index 0000000000000..d93268943e234
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+
+/* Performance Counters Operating Mode Control Registers */
+#define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
+#define OP_MODE_CTRL_VAL_MANNUAL	0x1
+
+/* Performance Counters Start Operation Control Registers */
+#define DDRC_PERF_CNT_START_OP_CTRL	0x8028
+#define START_OP_CTRL_VAL_START		0x1ULL
+#define START_OP_CTRL_VAL_ACTIVE	0x2
+
+/* Performance Counters End Operation Control Registers */
+#define DDRC_PERF_CNT_END_OP_CTRL	0x8030
+#define END_OP_CTRL_VAL_END		0x1ULL
+
+/* Performance Counters End Status Registers */
+#define DDRC_PERF_CNT_END_STATUS		0x8038
+#define END_STATUS_VAL_END_TIMER_MODE_END	0x1
+
+/* Performance Counters Configuration Registers */
+#define DDRC_PERF_CFG_BASE		0x8040
+
+/* 8 Generic event counter + 2 fixed event counters */
+#define DDRC_PERF_NUM_GEN_COUNTERS	8
+#define DDRC_PERF_NUM_FIX_COUNTERS	2
+#define DDRC_PERF_READ_COUNTER_IDX	DDRC_PERF_NUM_GEN_COUNTERS
+#define DDRC_PERF_WRITE_COUNTER_IDX	(DDRC_PERF_NUM_GEN_COUNTERS + 1)
+#define DDRC_PERF_NUM_COUNTERS		(DDRC_PERF_NUM_GEN_COUNTERS + \
+					 DDRC_PERF_NUM_FIX_COUNTERS)
+
+/* Generic event counter registers */
+#define DDRC_PERF_CFG(n)		(DDRC_PERF_CFG_BASE + 8 * (n))
+#define EVENT_ENABLE			BIT_ULL(63)
+
+/* Two dedicated event counters for DDR reads and writes */
+#define EVENT_DDR_READS			101
+#define EVENT_DDR_WRITES		100
+
+/*
+ * programmable events IDs in programmable event counters.
+ * DO NOT change these event-id numbers, they are used to
+ * program event bitmap in h/w.
+ */
+#define EVENT_OP_IS_ZQLATCH			55
+#define EVENT_OP_IS_ZQSTART			54
+#define EVENT_OP_IS_TCR_MRR			53
+#define EVENT_OP_IS_DQSOSC_MRR			52
+#define EVENT_OP_IS_DQSOSC_MPC			51
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR	50
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD	49
+#define EVENT_BSM_STARVATION			48
+#define EVENT_BSM_ALLOC				47
+#define EVENT_LPR_REQ_WITH_NOCREDIT		46
+#define EVENT_HPR_REQ_WITH_NOCREDIT		45
+#define EVENT_OP_IS_ZQCS			44
+#define EVENT_OP_IS_ZQCL			43
+#define EVENT_OP_IS_LOAD_MODE			42
+#define EVENT_OP_IS_SPEC_REF			41
+#define EVENT_OP_IS_CRIT_REF			40
+#define EVENT_OP_IS_REFRESH			39
+#define EVENT_OP_IS_ENTER_MPSM			35
+#define EVENT_OP_IS_ENTER_POWERDOWN		31
+#define EVENT_OP_IS_ENTER_SELFREF		27
+#define EVENT_WAW_HAZARD			26
+#define EVENT_RAW_HAZARD			25
+#define EVENT_WAR_HAZARD			24
+#define EVENT_WRITE_COMBINE			23
+#define EVENT_RDWR_TRANSITIONS			22
+#define EVENT_PRECHARGE_FOR_OTHER		21
+#define EVENT_PRECHARGE_FOR_RDWR		20
+#define EVENT_OP_IS_PRECHARGE			19
+#define EVENT_OP_IS_MWR				18
+#define EVENT_OP_IS_WR				17
+#define EVENT_OP_IS_RD				16
+#define EVENT_OP_IS_RD_ACTIVATE			15
+#define EVENT_OP_IS_RD_OR_WR			14
+#define EVENT_OP_IS_ACTIVATE			13
+#define EVENT_WR_XACT_WHEN_CRITICAL		12
+#define EVENT_LPR_XACT_WHEN_CRITICAL		11
+#define EVENT_HPR_XACT_WHEN_CRITICAL		10
+#define EVENT_DFI_RD_DATA_CYCLES		9
+#define EVENT_DFI_WR_DATA_CYCLES		8
+#define EVENT_ACT_BYPASS			7
+#define EVENT_READ_BYPASS			6
+#define EVENT_HIF_HI_PRI_RD			5
+#define EVENT_HIF_RMW				4
+#define EVENT_HIF_RD				3
+#define EVENT_HIF_WR				2
+#define EVENT_HIF_RD_OR_WR			1
+
+/* Event counter value registers */
+#define DDRC_PERF_CNT_VALUE_BASE		0x8080
+#define DDRC_PERF_CNT_VALUE(n)	(DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+
+/* Fixed event counter enable/disable register */
+#define DDRC_PERF_CNT_FREERUN_EN	0x80C0
+#define DDRC_PERF_FREERUN_WRITE_EN	0x1
+#define DDRC_PERF_FREERUN_READ_EN	0x2
+
+/* Fixed event counter control register */
+#define DDRC_PERF_CNT_FREERUN_CTRL	0x80C8
+#define DDRC_FREERUN_WRITE_CNT_CLR	0x1
+#define DDRC_FREERUN_READ_CNT_CLR	0x2
+
+/* Fixed event counter value register */
+#define DDRC_PERF_CNT_VALUE_WR_OP	0x80D0
+#define DDRC_PERF_CNT_VALUE_RD_OP	0x80D8
+#define DDRC_PERF_CNT_VALUE_OVERFLOW	BIT_ULL(48)
+#define DDRC_PERF_CNT_MAX_VALUE		GENMASK_ULL(48, 0)
+
+struct cn10k_ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct	device *dev;
+	int active_events;
+	struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
+};
+
+#define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
+
+static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+
+}
+
+#define CN10K_DDR_PMU_EVENT_ATTR(_name, _id)				     \
+	PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id)
+
+static struct attribute *cn10k_ddr_perf_events_attrs[] = {
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+					EVENT_HPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+					EVENT_LPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+					EVENT_WR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+					EVENT_PRECHARGE_FOR_OTHER),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit,
+					EVENT_HPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit,
+					EVENT_LPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+	/* Free run event counters */
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+	NULL
+};
+
+static struct attribute_group cn10k_ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = cn10k_ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-8");
+
+static struct attribute *cn10k_ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = cn10k_ddr_perf_format_attrs,
+};
+
+static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute cn10k_ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = {
+	&cn10k_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = {
+	.attrs = cn10k_ddr_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *cn10k_attr_groups[] = {
+	&cn10k_ddr_perf_events_attr_group,
+	&cn10k_ddr_perf_format_attr_group,
+	&cn10k_ddr_perf_cpumask_attr_group,
+	NULL,
+};
+
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+{
+	switch (eventid) {
+	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
+	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
+		*event_bitmap = (1ULL << (eventid - 1));
+		break;
+	case EVENT_OP_IS_ENTER_SELFREF:
+	case EVENT_OP_IS_ENTER_POWERDOWN:
+	case EVENT_OP_IS_ENTER_MPSM:
+		*event_bitmap = (0xFULL << (eventid - 1));
+		break;
+	default:
+		pr_err("%s Invalid eventid %d\n", __func__, eventid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
+					struct perf_event *event)
+{
+	u8 config = event->attr.config;
+	int i;
+
+	/* DDR read free-run counter index */
+	if (config == EVENT_DDR_READS) {
+		pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event;
+		return DDRC_PERF_READ_COUNTER_IDX;
+	}
+
+	/* DDR write free-run counter index */
+	if (config == EVENT_DDR_WRITES) {
+		pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event;
+		return DDRC_PERF_WRITE_COUNTER_IDX;
+	}
+
+	/* Allocate DDR generic counters */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL) {
+			pmu->events[i] = event;
+			return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	pmu->events[counter] = NULL;
+}
+
+static int cn10k_ddr_perf_event_init(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event)) {
+		dev_info(pmu->dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*  We must NOT create groups containing mixed PMUs */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader))
+		return -EINVAL;
+
+	/* Set ownership of event to one CPU, same event can not be observed
+	 * on multiple cpus at same time.
+	 */
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+	return 0;
+}
+
+static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
+					  int counter, bool enable)
+{
+	u32 reg;
+	u64 val;
+
+	if (counter > DDRC_PERF_NUM_COUNTERS) {
+		pr_err("Error: unsupported counter %d\n", counter);
+		return;
+	}
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		reg = DDRC_PERF_CFG(counter);
+		val = readq_relaxed(pmu->base + reg);
+
+		if (enable)
+			val |= EVENT_ENABLE;
+		else
+			val &= ~EVENT_ENABLE;
+
+		writeq_relaxed(val, pmu->base + reg);
+	} else {
+		val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+		if (enable) {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val |= DDRC_PERF_FREERUN_READ_EN;
+			else
+				val |= DDRC_PERF_FREERUN_WRITE_EN;
+		} else {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val &= ~DDRC_PERF_FREERUN_READ_EN;
+			else
+				val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+		}
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+	}
+}
+
+static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	u64 val;
+
+	if (counter == DDRC_PERF_READ_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+
+	if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+
+	val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+	return val;
+}
+
+static void cn10k_ddr_perf_event_update(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+	} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+	mask = DDRC_PERF_CNT_MAX_VALUE;
+
+	local64_add((new_count - prev_count) & mask, &event->count);
+}
+
+static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, true);
+
+	hwc->state = 0;
+}
+
+static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u8 config = event->attr.config;
+	int counter, ret;
+	u32 reg_offset;
+	u64 val;
+
+	counter = cn10k_ddr_perf_alloc_counter(pmu, event);
+	if (counter < 0)
+		return -EAGAIN;
+
+	pmu->active_events++;
+	hwc->idx = counter;
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		/* Generic counters, configure event id */
+		reg_offset = DDRC_PERF_CFG(counter);
+		ret = ddr_perf_get_event_bitmap(config, &val);
+		if (ret)
+			return ret;
+
+		writeq_relaxed(val, pmu->base + reg_offset);
+	} else {
+		/* fixed event counter, clear counter value */
+		if (counter == DDRC_PERF_READ_COUNTER_IDX)
+			val = DDRC_FREERUN_READ_CNT_CLR;
+		else
+			val = DDRC_FREERUN_WRITE_CNT_CLR;
+
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+	}
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cn10k_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, false);
+
+	if (flags & PERF_EF_UPDATE)
+		cn10k_ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	cn10k_ddr_perf_free_counter(pmu, counter);
+	pmu->active_events--;
+	hwc->idx = -1;
+}
+
+static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+		       DDRC_PERF_CNT_START_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+		       DDRC_PERF_CNT_END_OP_CTRL);
+}
+
+static int cn10k_ddr_perf_probe(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu;
+	struct resource *res;
+	void __iomem *base;
+	char *name;
+	int ret;
+
+	ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddr_pmu), GFP_KERNEL);
+	if (!ddr_pmu)
+		return -ENOMEM;
+
+	ddr_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ddr_pmu);
+
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ddr_pmu->base = base;
+
+	/* Setup the PMU counter to work in manual mode */
+	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
+		       DDRC_PERF_CNT_OP_MODE_CTRL);
+
+	ddr_pmu->pmu = (struct pmu) {
+		.module	      = THIS_MODULE,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr = perf_invalid_context,
+		.attr_groups = cn10k_attr_groups,
+		.event_init  = cn10k_ddr_perf_event_init,
+		.add	     = cn10k_ddr_perf_event_add,
+		.del	     = cn10k_ddr_perf_event_del,
+		.start	     = cn10k_ddr_perf_event_start,
+		.stop	     = cn10k_ddr_perf_event_stop,
+		.read	     = cn10k_ddr_perf_event_update,
+		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
+		.pmu_disable = cn10k_ddr_perf_pmu_disable,
+	};
+
+	/* Choose this cpu to collect perf data */
+	ddr_pmu->cpu = raw_smp_processor_id();
+
+	name = devm_kasprintf(ddr_pmu->dev, GFP_KERNEL, "mrvl_ddr_pmu_%llx",
+			      res->start);
+	if (!name)
+		return -ENOMEM;
+
+	ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
+	if (ret)
+		return ret;
+
+	pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+	return 0;
+}
+
+static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&ddr_pmu->pmu);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
+	{ .compatible = "marvell,cn10k-ddr-pmu", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
+#endif
+
+static struct platform_driver cn10k_ddr_pmu_driver = {
+	.driver	= {
+		.name   = "cn10k-ddr-pmu",
+		.of_match_table = cn10k_ddr_pmu_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= cn10k_ddr_perf_probe,
+	.remove		= cn10k_ddr_perf_remove,
+};
+
+static int __init cn10k_ddr_pmu_init(void)
+{
+	return platform_driver_register(&cn10k_ddr_pmu_driver);
+}
+
+static void __exit cn10k_ddr_pmu_exit(void)
+{
+	platform_driver_unregister(&cn10k_ddr_pmu_driver);
+}
+
+module_init(cn10k_ddr_pmu_init);
+module_exit(cn10k_ddr_pmu_exit);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 35a43326a9e3c229254fd531dfc711d20897d0fc Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <bbhushan2@marvell.com>
Date: Fri, 11 Feb 2022 10:23:45 +0530
Subject: [PATCH 1073/1586] perf/marvell: cn10k DDR perfmon event overflow
 handling

CN10k DSS h/w perfmon does not support event overflow interrupt, so
periodic timer is being used. Each event counter is 48bit, which in worst
case scenario can increment at maximum 5.6 GT/s. At this rate it may take
many hours to overflow these counters. Therefore polling period for
overflow is set to 100 sec, which can be changed using sysfs parameter.

Two fixed event counters starts counting from zero on overflow, so
overflow condition is when new count less than previous count. While
eight programmable event counters freezes at maximum value. Also individual
counter cannot be restarted, so need to restart all eight counters.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Reviewed-by: Bhaskara Budiredla <bbudiredla@marvell.com>
Link: https://lore.kernel.org/r/20220211045346.17894-4-bbhushan2@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/marvell_cn10k_ddr_pmu.c | 111 +++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index d93268943e234..19c8744e1a23b 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -11,6 +11,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/perf_event.h>
+#include <linux/hrtimer.h>
 
 /* Performance Counters Operating Mode Control Registers */
 #define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
@@ -127,6 +128,7 @@ struct cn10k_ddr_pmu {
 	struct	device *dev;
 	int active_events;
 	struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
+	struct hrtimer hrtimer;
 };
 
 #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
@@ -251,6 +253,18 @@ static const struct attribute_group *cn10k_attr_groups[] = {
 	NULL,
 };
 
+/* Default poll timeout is 100 sec, which is very sufficient for
+ * 48 bit counter incremented max at 5.6 GT/s, which may take many
+ * hours to overflow.
+ */
+static unsigned long cn10k_ddr_pmu_poll_period_sec = 100;
+module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644);
+
+static ktime_t cn10k_ddr_pmu_timer_period(void)
+{
+	return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
+}
+
 static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
 {
 	switch (eventid) {
@@ -433,6 +447,10 @@ static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
 	pmu->active_events++;
 	hwc->idx = counter;
 
+	if (pmu->active_events == 1)
+		hrtimer_start(&pmu->hrtimer, cn10k_ddr_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
 	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
 		/* Generic counters, configure event id */
 		reg_offset = DDRC_PERF_CFG(counter);
@@ -484,6 +502,10 @@ static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
 	cn10k_ddr_perf_free_counter(pmu, counter);
 	pmu->active_events--;
 	hwc->idx = -1;
+
+	/* Cancel timer when no events to capture */
+	if (pmu->active_events == 0)
+		hrtimer_cancel(&pmu->hrtimer);
 }
 
 static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
@@ -502,6 +524,92 @@ static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
 		       DDRC_PERF_CNT_END_OP_CTRL);
 }
 
+static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
+{
+	struct hw_perf_event *hwc;
+	int i;
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		cn10k_ddr_perf_event_update(pmu->events[i]);
+	}
+
+	/* Reset previous count as h/w counter are reset */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		hwc = &pmu->events[i]->hw;
+		local64_set(&hwc->prev_count, 0);
+	}
+}
+
+static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 prev_count, new_count;
+	u64 value;
+	int i;
+
+	event = pmu->events[DDRC_PERF_READ_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		value = cn10k_ddr_perf_read_counter(pmu, i);
+		if (value == DDRC_PERF_CNT_MAX_VALUE) {
+			pr_info("Counter-(%d) reached max value\n", i);
+			cn10k_ddr_perf_event_update_all(pmu);
+			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu,
+						 hrtimer);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cn10k_ddr_pmu_overflow_handler(pmu);
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, cn10k_ddr_pmu_timer_period());
+	return HRTIMER_RESTART;
+}
+
 static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 {
 	struct cn10k_ddr_pmu *ddr_pmu;
@@ -550,6 +658,9 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 	if (!name)
 		return -ENOMEM;
 
+	hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
+
 	ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
 	if (ret)
 		return ret;
-- 
GitLab


From 68fa55f0e05ce371c4b5de7932d9f570d61bf791 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <bbhushan2@marvell.com>
Date: Fri, 11 Feb 2022 10:23:46 +0530
Subject: [PATCH 1074/1586] perf/marvell: cn10k DDR perf event core ownership

As DDR perf event counters are not per core, so they should be accessed
only by one core at a time. Select new core when previously owning core
is going offline.

Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
Reviewed-by: Bhaskara Budiredla <bbudiredla@marvell.com>
Link: https://lore.kernel.org/r/20220211045346.17894-5-bbhushan2@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig                 |  7 ++++
 drivers/perf/marvell_cn10k_ddr_pmu.c | 50 ++++++++++++++++++++++++++--
 include/linux/cpuhotplug.h           |  1 +
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 7d6ffdf44a415..b9deef6ed423d 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -148,4 +148,11 @@ config MARVELL_CN10K_TAD_PMU
 
 source "drivers/perf/hisilicon/Kconfig"
 
+config MARVELL_CN10K_DDR_PMU
+	tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	help
+	  Enable perf support for Marvell DDR Performance monitoring
+	  event on CN10K platform.
+
 endmenu
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index 19c8744e1a23b..7f3146e71f995 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -129,6 +129,7 @@ struct cn10k_ddr_pmu {
 	int active_events;
 	struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
 	struct hrtimer hrtimer;
+	struct hlist_node node;
 };
 
 #define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
@@ -610,6 +611,24 @@ static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
+static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu,
+						     node);
+	unsigned int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+	return 0;
+}
+
 static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 {
 	struct cn10k_ddr_pmu *ddr_pmu;
@@ -661,18 +680,31 @@ static int cn10k_ddr_perf_probe(struct platform_device *pdev)
 	hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
 
+	cpuhp_state_add_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
 	ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
 	if (ret)
-		return ret;
+		goto error;
 
 	pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
 	return 0;
+error:
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+	return ret;
 }
 
 static int cn10k_ddr_perf_remove(struct platform_device *pdev)
 {
 	struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
 
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
 	perf_pmu_unregister(&ddr_pmu->pmu);
 	return 0;
 }
@@ -697,12 +729,26 @@ static struct platform_driver cn10k_ddr_pmu_driver = {
 
 static int __init cn10k_ddr_pmu_init(void)
 {
-	return platform_driver_register(&cn10k_ddr_pmu_driver);
+	int ret;
+
+	ret = cpuhp_setup_state_multi(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				"perf/marvell/cn10k/ddr:online", NULL,
+				cn10k_ddr_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&cn10k_ddr_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+	return ret;
 }
 
 static void __exit cn10k_ddr_pmu_exit(void)
 {
 	platform_driver_unregister(&cn10k_ddr_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
 }
 
 module_init(cn10k_ddr_pmu_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 411a428ace4d4..2bc550ac8dc79 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -231,6 +231,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 	CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
 	CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+	CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
-- 
GitLab


From 83f83cc0c1379413fb1199a78f91ab441a7e76fd Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 3 Mar 2022 16:54:19 +0800
Subject: [PATCH 1075/1586] arm64: perf: Expose some Armv9 common events under
 sysfs

Armv9[1] has introduced some common architectural events (0x400C-0x400F)
and common microarchitectural events (0x4010-0x401B), which can be detected
by PMCEID0_EL0 from bit44 to bit59, so expose these common events under
sysfs.

[1] https://developer.arm.com/documentation/ddi0608/ba

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/20220303085419.64085-1-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/perf_event.h | 14 ++++++++++++++
 arch/arm64/kernel/perf_event.c      | 10 ++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 4ef6f19331f98..b93cafd8313a1 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -96,6 +96,20 @@
 #define	ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS			0x400A
 #define	ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD			0x400B
 
+/* Trace buffer events */
+#define ARMV8_PMUV3_PERFCTR_TRB_WRAP				0x400C
+#define ARMV8_PMUV3_PERFCTR_TRB_TRIG				0x400E
+
+/* Trace unit events */
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0				0x4010
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1				0x4011
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2				0x4012
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3				0x4013
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4			0x4018
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5			0x4019
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6			0x401A
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7			0x401B
+
 /* additional latency from alignment events */
 #define	ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT			0x4020
 #define	ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT			0x4021
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cab678ed66183..cb69ff1e61380 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
 	ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
 	ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
 	ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+	ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP),
+	ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG),
+	ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0),
+	ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1),
+	ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2),
+	ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3),
+	ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4),
+	ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5),
+	ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6),
+	ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7),
 	ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
 	ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
 	ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
-- 
GitLab


From f00f3674873bb4ca4984aa3c440f3b0087a3677e Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 3 Mar 2022 18:07:10 +0800
Subject: [PATCH 1076/1586] arm64: perf: Consistently make all event numbers as
 16-bits

Arm ARM documents PMU event numbers as 16-bits in the table and more 0x4XXX
events have been added in the header file, so use 16-bits for all event
numbers and make them consistent.

No functional change intended.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/20220303100710.2238-1-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/perf_event.h | 298 ++++++++++++++--------------
 1 file changed, 149 insertions(+), 149 deletions(-)

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index b93cafd8313a1..3eaf462f5752c 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -15,70 +15,70 @@
 /*
  * Common architectural and microarchitectural event numbers.
  */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x01
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x02
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x05
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x06
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x07
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x0C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x0D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x0E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x0F
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x13
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x14
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x15
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x16
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x17
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x18
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x19
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x1A
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x1C
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x1D
-#define ARMV8_PMUV3_PERFCTR_CHAIN				0x1E
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x1F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x20
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x21
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x22
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x23
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x24
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x25
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x26
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x27
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x28
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x29
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x2A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x2B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x2C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x2D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x2E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x30
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS			0x31
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE				0x32
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS			0x33
-#define ARMV8_PMUV3_PERFCTR_DTLB_WALK				0x34
-#define ARMV8_PMUV3_PERFCTR_ITLB_WALK				0x35
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x36
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x37
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x38
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD			0x39
-#define ARMV8_PMUV3_PERFCTR_OP_RETIRED				0x3A
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x3B
-#define ARMV8_PMUV3_PERFCTR_STALL				0x3C
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND			0x3D
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND			0x3E
-#define ARMV8_PMUV3_PERFCTR_STALL_SLOT				0x3F
+#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x0000
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x0001
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x0002
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x0003
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x0004
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x0005
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x0006
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x0007
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x0008
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x0009
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x000A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x000B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x000C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x000D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x000E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x000F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x0010
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x0011
+#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x0012
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x0013
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x0014
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x0015
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x0016
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x0017
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x0018
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x0019
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x001A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x001B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x001C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x001D
+#define ARMV8_PMUV3_PERFCTR_CHAIN				0x001E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x001F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x0020
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x0021
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x0022
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x0023
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x0024
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x0025
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x0026
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x0027
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x0028
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x0029
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x002A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x002B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x002C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x002D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x002E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x002F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x0030
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS			0x0031
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE				0x0032
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS			0x0033
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK				0x0034
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK				0x0035
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x0036
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x0037
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x0038
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD			0x0039
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED				0x003A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x003B
+#define ARMV8_PMUV3_PERFCTR_STALL				0x003C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND			0x003D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND			0x003E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT				0x003F
 
 /* Statistical profiling extension microarchitectural events */
 #define	ARMV8_SPE_PERFCTR_SAMPLE_POP				0x4000
@@ -121,91 +121,91 @@
 #define	ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR			0x4026
 
 /* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x42
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x43
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x44
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x45
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x46
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x47
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x48
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x4C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x4D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x4E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x4F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x50
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x51
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x52
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x53
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x56
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x57
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x58
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x5C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x5D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x5E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x5F
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x60
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x61
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x62
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x63
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x64
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x65
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x66
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x67
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x68
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x69
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x6A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x6C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x6D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x6E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x6F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x70
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x71
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x72
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x73
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x74
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x75
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x76
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x77
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x78
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x79
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x7A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x7C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x7D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x7E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x81
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x82
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x83
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x84
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x86
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x87
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x88
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x8A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x8B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x8C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x8D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x8E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x8F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x90
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x91
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0xA0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0xA1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0xA2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0xA3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0xA6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0xA7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0xA8
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x0040
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x0041
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x0042
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x0043
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x0044
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x0045
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x0046
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x0047
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x0048
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x004C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x004D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x004E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x004F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x0050
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x0051
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x0052
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x0053
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x0056
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x0057
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x0058
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x005C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x005D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x005E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x005F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x0060
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x0061
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x0062
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x0063
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x0064
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x0065
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x0066
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x0067
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x0068
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x0069
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x006A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x006C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x006D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x006E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x006F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x0070
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x0071
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x0072
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x0073
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x0074
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x0075
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x0076
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x0077
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x0078
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x0079
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x007A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x007C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x007D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x007E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x0081
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x0082
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x0083
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x0084
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x0086
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x0087
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x0088
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x008A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x008B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x008C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x008D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x008E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x008F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x0090
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x0091
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0x00A0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0x00A1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0x00A2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0x00A3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0x00A6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0x00A7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0x00A8
 
 /*
  * Per-CPU PMCR: config reg
-- 
GitLab


From 969a26446bcd142faedfe8c6f41cd7668596c1fa Mon Sep 17 00:00:00 2001
From: Andreas Rammhold <andreas@rammhold.de>
Date: Wed, 1 Dec 2021 10:59:00 +0100
Subject: [PATCH 1077/1586] KEYS: trusted: Fix trusted key backends when
 building as module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this commit the kernel could end up with no trusted key sources
even though both of the currently supported backends (TPM and TEE) were
compiled as modules. This manifested in the trusted key type not being
registered at all.

When checking if a CONFIG_… preprocessor variable is defined we only
test for the builtin (=y) case and not the module (=m) case. By using
the IS_REACHABLE() macro we do test for both cases.

Fixes: 5d0682be3189 ("KEYS: trusted: Add generic trusted keys framework")
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Andreas Rammhold <andreas@rammhold.de>
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index d5c891d8d3534..5b35f1b876441 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0);
 MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)");
 
 static const struct trusted_key_source trusted_key_sources[] = {
-#if defined(CONFIG_TCG_TPM)
+#if IS_REACHABLE(CONFIG_TCG_TPM)
 	{ "tpm", &trusted_key_tpm_ops },
 #endif
-#if defined(CONFIG_TEE)
+#if IS_REACHABLE(CONFIG_TEE)
 	{ "tee", &trusted_key_tee_ops },
 #endif
 };
-- 
GitLab


From c5d1ed846e15090bc90dfdaafc07eac066e070bb Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Wed, 26 Jan 2022 14:32:43 -0600
Subject: [PATCH 1078/1586] KEYS: trusted: Avoid calling null function
 trusted_key_exit

If one loads and unloads the trusted module, trusted_key_exit can be
NULL. Call it through static_call_cond() to avoid a kernel trap.

Fixes: 5d0682be3189 ("KEYS: trusted: Add generic trusted keys framework")
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: James Bottomley <jejb@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: linux-integrity@vger.kernel.org
Cc: keyrings@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c
index 5b35f1b876441..9b9d3ef79cbe3 100644
--- a/security/keys/trusted-keys/trusted_core.c
+++ b/security/keys/trusted-keys/trusted_core.c
@@ -351,7 +351,7 @@ static int __init init_trusted(void)
 
 static void __exit cleanup_trusted(void)
 {
-	static_call(trusted_key_exit)();
+	static_call_cond(trusted_key_exit)();
 }
 
 late_initcall(init_trusted);
-- 
GitLab


From e561752c317023c1f68df3950641747475fdcb29 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:27 -0500
Subject: [PATCH 1079/1586] integrity: Fix warning about missing prototypes

make W=1 generates the following warning in keyring_handler.c

security/integrity/platform_certs/keyring_handler.c:71:30: warning: no previous prototype for get_handler_for_db [-Wmissing-prototypes]
 __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
                              ^~~~~~~~~~~~~~~~~~
security/integrity/platform_certs/keyring_handler.c:82:30: warning: no previous prototype for get_handler_for_dbx [-Wmissing-prototypes]
 __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
                              ^~~~~~~~~~~~~~~~~~~
Add the missing prototypes by including keyring_handler.h.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/platform_certs/keyring_handler.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 5604bd57c9907..e9791be98fd96 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -9,6 +9,7 @@
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
 #include "../integrity.h"
+#include "keyring_handler.h"
 
 static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
 static efi_guid_t efi_cert_x509_sha256_guid __initdata =
-- 
GitLab


From d19967764ba876f5c82dabaa28f983b21eb642a2 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:28 -0500
Subject: [PATCH 1080/1586] integrity: Introduce a Linux keyring called machine

Many UEFI Linux distributions boot using shim.  The UEFI shim provides
what is called Machine Owner Keys (MOK). Shim uses both the UEFI Secure
Boot DB and MOK keys to validate the next step in the boot chain.  The
MOK facility can be used to import user generated keys.  These keys can
be used to sign an end-users development kernel build.  When Linux
boots, both UEFI Secure Boot DB and MOK keys get loaded in the Linux
.platform keyring.

Define a new Linux keyring called machine.  This keyring shall contain just
MOK keys and not the remaining keys in the platform keyring. This new
machine keyring will be used in follow on patches.  Unlike keys in the
platform keyring, keys contained in the machine keyring will be trusted
within the kernel if the end-user has chosen to do so.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Tested-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/Kconfig                    | 13 ++++++
 security/integrity/Makefile                   |  1 +
 security/integrity/digsig.c                   | 13 +++++-
 security/integrity/integrity.h                | 12 +++++-
 .../platform_certs/machine_keyring.c          | 42 +++++++++++++++++++
 5 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 security/integrity/platform_certs/machine_keyring.c

diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index 71f0177e8716e..599429f99f99f 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -62,6 +62,19 @@ config INTEGRITY_PLATFORM_KEYRING
          provided by the platform for verifying the kexec'ed kerned image
          and, possibly, the initramfs signature.
 
+config INTEGRITY_MACHINE_KEYRING
+	bool "Provide a keyring to which Machine Owner Keys may be added"
+	depends on SECONDARY_TRUSTED_KEYRING
+	depends on INTEGRITY_ASYMMETRIC_KEYS
+	depends on SYSTEM_BLACKLIST_KEYRING
+	depends on LOAD_UEFI_KEYS
+	depends on !IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
+	help
+	 If set, provide a keyring to which Machine Owner Keys (MOK) may
+	 be added. This keyring shall contain just MOK keys.  Unlike keys
+	 in the platform keyring, keys contained in the .machine keyring will
+	 be trusted within the kernel.
+
 config LOAD_UEFI_KEYS
        depends on INTEGRITY_PLATFORM_KEYRING
        depends on EFI
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 7ee39d66cf166..d0ffe37dc1d64 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -10,6 +10,7 @@ integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
 integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
 integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
+integrity-$(CONFIG_INTEGRITY_MACHINE_KEYRING) += platform_certs/machine_keyring.o
 integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
 				      platform_certs/load_uefi.o \
 				      platform_certs/keyring_handler.o
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 3b06a01bd0fdd..2b7fa85613c00 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -30,6 +30,7 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 	".ima",
 #endif
 	".platform",
+	".machine",
 };
 
 #ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
@@ -126,7 +127,8 @@ int __init integrity_init_keyring(const unsigned int id)
 	perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
 		| KEY_USR_READ | KEY_USR_SEARCH;
 
-	if (id == INTEGRITY_KEYRING_PLATFORM) {
+	if (id == INTEGRITY_KEYRING_PLATFORM ||
+	    id == INTEGRITY_KEYRING_MACHINE) {
 		restriction = NULL;
 		goto out;
 	}
@@ -139,7 +141,14 @@ int __init integrity_init_keyring(const unsigned int id)
 		return -ENOMEM;
 
 	restriction->check = restrict_link_to_ima;
-	perm |= KEY_USR_WRITE;
+
+	/*
+	 * MOK keys can only be added through a read-only runtime services
+	 * UEFI variable during boot. No additional keys shall be allowed to
+	 * load into the machine keyring following init from userspace.
+	 */
+	if (id != INTEGRITY_KEYRING_MACHINE)
+		perm |= KEY_USR_WRITE;
 
 out:
 	return __integrity_init_keyring(id, perm, restriction);
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 547425c20e117..730771eececd7 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -151,7 +151,8 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 #define INTEGRITY_KEYRING_EVM		0
 #define INTEGRITY_KEYRING_IMA		1
 #define INTEGRITY_KEYRING_PLATFORM	2
-#define INTEGRITY_KEYRING_MAX		3
+#define INTEGRITY_KEYRING_MACHINE	3
+#define INTEGRITY_KEYRING_MAX		4
 
 extern struct dentry *integrity_dir;
 
@@ -283,3 +284,12 @@ static inline void __init add_to_platform_keyring(const char *source,
 {
 }
 #endif
+
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len);
+#else
+static inline void __init add_to_machine_keyring(const char *source,
+						  const void *data, size_t len)
+{
+}
+#endif
diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
new file mode 100644
index 0000000000000..ea2ac2f9f2b57
--- /dev/null
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Machine keyring routines.
+ *
+ * Copyright (c) 2021, Oracle and/or its affiliates.
+ */
+
+#include "../integrity.h"
+
+static __init int machine_keyring_init(void)
+{
+	int rc;
+
+	rc = integrity_init_keyring(INTEGRITY_KEYRING_MACHINE);
+	if (rc)
+		return rc;
+
+	pr_notice("Machine keyring initialized\n");
+	return 0;
+}
+device_initcall(machine_keyring_init);
+
+void __init add_to_machine_keyring(const char *source, const void *data, size_t len)
+{
+	key_perm_t perm;
+	int rc;
+
+	perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+	rc = integrity_load_cert(INTEGRITY_KEYRING_MACHINE, source, data, len, perm);
+
+	/*
+	 * Some MOKList keys may not pass the machine keyring restrictions.
+	 * If the restriction check does not pass and the platform keyring
+	 * is configured, try to add it into that keyring instead.
+	 */
+	if (rc && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+		rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source,
+					 data, len, perm);
+
+	if (rc)
+		pr_info("Error adding keys to machine keyring %s\n", source);
+}
-- 
GitLab


From 45fcd5e521cd0903bab05f59ad013c5d150f4e3b Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:29 -0500
Subject: [PATCH 1081/1586] integrity: add new keyring handler for mok keys

Currently both Secure Boot DB and Machine Owner Keys (MOK) go through
the same keyring handler (get_handler_for_db). With the addition of the
new machine keyring, the end-user may choose to trust MOK keys.

Introduce a new keyring handler specific for MOK keys.  If MOK keys are
trusted by the end-user, use the new keyring handler instead.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 .../integrity/platform_certs/keyring_handler.c  | 17 ++++++++++++++++-
 .../integrity/platform_certs/keyring_handler.h  |  5 +++++
 security/integrity/platform_certs/load_uefi.c   |  4 ++--
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index e9791be98fd96..4872850d081fe 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -67,7 +67,7 @@ static __init void uefi_revocation_list_x509(const char *source,
 
 /*
  * Return the appropriate handler for particular signature list types found in
- * the UEFI db and MokListRT tables.
+ * the UEFI db tables.
  */
 __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 {
@@ -76,6 +76,21 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 	return 0;
 }
 
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the MokListRT tables.
+ */
+__init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
+{
+	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) {
+		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING))
+			return add_to_machine_keyring;
+		else
+			return add_to_platform_keyring;
+	}
+	return 0;
+}
+
 /*
  * Return the appropriate handler for particular signature list types found in
  * the UEFI dbx and MokListXRT tables.
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 2462bfa08fe34..284558f30411e 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -24,6 +24,11 @@ void blacklist_binary(const char *source, const void *data, size_t len);
  */
 efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
 
+/*
+ * Return the handler for particular signature list types found in the mok.
+ */
+efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
+
 /*
  * Return the handler for particular signature list types found in the dbx.
  */
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 08b6d12f99b4f..5f45c3c07dbd4 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -95,7 +95,7 @@ static int __init load_moklist_certs(void)
 		rc = parse_efi_signature_list("UEFI:MokListRT (MOKvar table)",
 					      mokvar_entry->data,
 					      mokvar_entry->data_size,
-					      get_handler_for_db);
+					      get_handler_for_mok);
 		/* All done if that worked. */
 		if (!rc)
 			return rc;
@@ -110,7 +110,7 @@ static int __init load_moklist_certs(void)
 	mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status);
 	if (mok) {
 		rc = parse_efi_signature_list("UEFI:MokListRT",
-					      mok, moksize, get_handler_for_db);
+					      mok, moksize, get_handler_for_mok);
 		kfree(mok);
 		if (rc)
 			pr_err("Couldn't parse MokListRT signatures: %d\n", rc);
-- 
GitLab


From 56edb6c25f11f25df153f4804f2d5bced2b49a9e Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:30 -0500
Subject: [PATCH 1082/1586] KEYS: store reference to machine keyring

Expose the .machine keyring created in integrity code by adding
a reference.  Store a reference to the machine keyring in
system keyring code. The system keyring code needs this to complete
the keyring link to the machine keyring.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 certs/system_keyring.c        | 9 +++++++++
 include/keys/system_keyring.h | 8 ++++++++
 security/integrity/digsig.c   | 2 ++
 3 files changed, 19 insertions(+)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 692365dee2bd4..08ea542c8096d 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -22,6 +22,9 @@ static struct key *builtin_trusted_keys;
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 static struct key *secondary_trusted_keys;
 #endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+static struct key *machine_trusted_keys;
+#endif
 #ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
 static struct key *platform_trusted_keys;
 #endif
@@ -91,6 +94,12 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
 	return restriction;
 }
 #endif
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+void __init set_machine_trusted_keys(struct key *keyring)
+{
+	machine_trusted_keys = keyring;
+}
+#endif
 
 /*
  * Create the trusted keyrings
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 6acd3cf13a18c..98c9b10cdc17a 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -38,6 +38,14 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
 #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
 #endif
 
+#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+extern void __init set_machine_trusted_keys(struct key *keyring);
+#else
+static inline void __init set_machine_trusted_keys(struct key *keyring)
+{
+}
+#endif
+
 extern struct pkcs7_message *pkcs7;
 #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING
 extern int mark_hash_blacklisted(const char *hash);
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 2b7fa85613c00..7b719aa761885 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -112,6 +112,8 @@ static int __init __integrity_init_keyring(const unsigned int id,
 	} else {
 		if (id == INTEGRITY_KEYRING_PLATFORM)
 			set_platform_trusted_keys(keyring[id]);
+		if (id == INTEGRITY_KEYRING_MACHINE)
+			set_machine_trusted_keys(keyring[id]);
 		if (id == INTEGRITY_KEYRING_IMA)
 			load_module_cert(keyring[id]);
 	}
-- 
GitLab


From 087aa4ed379054951cb3c8ccaa0c4dbafd903c01 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:31 -0500
Subject: [PATCH 1083/1586] KEYS: Introduce link restriction for machine keys

Introduce a new link restriction that includes the trusted builtin,
secondary and machine keys. The restriction is based on the key to be
added being vouched for by a key in any of these three keyrings.

With the introduction of the machine keyring, the end-user may choose to
trust Machine Owner Keys (MOK) within the kernel. If they have chosen to
trust them, the .machine keyring will contain these keys.  If not, the
machine keyring will always be empty.  Update the restriction check to
allow the secondary trusted keyring to also trust machine keys.

Allow the .machine keyring to be linked to the secondary_trusted_keys.
After the link is created, keys contained in the .machine keyring will
automatically be searched when searching secondary_trusted_keys.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 certs/system_keyring.c        | 35 ++++++++++++++++++++++++++++++++++-
 include/keys/system_keyring.h |  6 ++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 08ea542c8096d..05b66ce9d1c9e 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -89,7 +89,10 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
 	if (!restriction)
 		panic("Can't allocate secondary trusted keyring restriction\n");
 
-	restriction->check = restrict_link_by_builtin_and_secondary_trusted;
+	if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING))
+		restriction->check = restrict_link_by_builtin_secondary_and_machine;
+	else
+		restriction->check = restrict_link_by_builtin_and_secondary_trusted;
 
 	return restriction;
 }
@@ -98,6 +101,36 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
 void __init set_machine_trusted_keys(struct key *keyring)
 {
 	machine_trusted_keys = keyring;
+
+	if (key_link(secondary_trusted_keys, machine_trusted_keys) < 0)
+		panic("Can't link (machine) trusted keyrings\n");
+}
+
+/**
+ * restrict_link_by_builtin_secondary_and_machine - Restrict keyring addition.
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @restrict_key: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in either the built-in, the secondary, or
+ * the machine keyrings.
+ */
+int restrict_link_by_builtin_secondary_and_machine(
+	struct key *dest_keyring,
+	const struct key_type *type,
+	const union key_payload *payload,
+	struct key *restrict_key)
+{
+	if (machine_trusted_keys && type == &key_type_keyring &&
+	    dest_keyring == secondary_trusted_keys &&
+	    payload == &machine_trusted_keys->payload)
+		/* Allow the machine keyring to be added to the secondary */
+		return 0;
+
+	return restrict_link_by_builtin_and_secondary_trusted(dest_keyring, type,
+							      payload, restrict_key);
 }
 #endif
 
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 98c9b10cdc17a..2419a735420fb 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -39,8 +39,14 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
 #endif
 
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
+extern int restrict_link_by_builtin_secondary_and_machine(
+	struct key *dest_keyring,
+	const struct key_type *type,
+	const union key_payload *payload,
+	struct key *restrict_key);
 extern void __init set_machine_trusted_keys(struct key *keyring);
 #else
+#define restrict_link_by_builtin_secondary_and_machine restrict_link_by_builtin_trusted
 static inline void __init set_machine_trusted_keys(struct key *keyring)
 {
 }
-- 
GitLab


From 847c5336d8439a3b8245b31fa127cf98a26afae8 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:32 -0500
Subject: [PATCH 1084/1586] efi/mokvar: move up init order

Move up the init order so it can be used by the new machine keyring.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/firmware/efi/mokvar-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c
index 38722d2009e20..5ed0602c2f75f 100644
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -359,4 +359,4 @@ static int __init efi_mokvar_sysfs_init(void)
 	}
 	return err;
 }
-device_initcall(efi_mokvar_sysfs_init);
+fs_initcall(efi_mokvar_sysfs_init);
-- 
GitLab


From 74f5e30051399d60dbce4296dbfd833212df13f1 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:33 -0500
Subject: [PATCH 1085/1586] integrity: Trust MOK keys if MokListTrustedRT found

A new Machine Owner Key (MOK) variable called MokListTrustedRT has been
introduced in shim. When this UEFI variable is set, it indicates the
end-user has made the decision themselves that they wish to trust MOK keys
within the Linux trust boundary.  It is not an error if this variable
does not exist. If it does not exist, the MOK keys should not be trusted
within the kernel.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 .../platform_certs/machine_keyring.c          | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
index ea2ac2f9f2b57..09fd8f20c7560 100644
--- a/security/integrity/platform_certs/machine_keyring.c
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2021, Oracle and/or its affiliates.
  */
 
+#include <linux/efi.h>
 #include "../integrity.h"
 
 static __init int machine_keyring_init(void)
@@ -40,3 +41,21 @@ void __init add_to_machine_keyring(const char *source, const void *data, size_t
 	if (rc)
 		pr_info("Error adding keys to machine keyring %s\n", source);
 }
+
+/*
+ * Try to load the MokListTrustedRT MOK variable to see if we should trust
+ * the MOK keys within the kernel. It is not an error if this variable
+ * does not exist.  If it does not exist, MOK keys should not be trusted
+ * within the machine keyring.
+ */
+static __init bool uefi_check_trust_mok_keys(void)
+{
+	struct efi_mokvar_table_entry *mokvar_entry;
+
+	mokvar_entry = efi_mokvar_entry_find("MokListTrustedRT");
+
+	if (mokvar_entry)
+		return true;
+
+	return false;
+}
-- 
GitLab


From 3d6ae1a5d0c2019d274284859f556dcb64aa98a7 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Tue, 25 Jan 2022 21:58:34 -0500
Subject: [PATCH 1086/1586] integrity: Only use machine keyring when
 uefi_check_trust_mok_keys is true

With the introduction of uefi_check_trust_mok_keys, it signifies the end-
user wants to trust the machine keyring as trusted keys.  If they have
chosen to trust the machine keyring, load the qualifying keys into it
during boot, then link it to the secondary keyring .  If the user has not
chosen to trust the machine keyring, it will be empty and not linked to
the secondary keyring.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/digsig.c                      |  2 +-
 security/integrity/integrity.h                   |  5 +++++
 .../integrity/platform_certs/keyring_handler.c   |  2 +-
 .../integrity/platform_certs/machine_keyring.c   | 16 ++++++++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 7b719aa761885..c8c8a4a4e7a00 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -112,7 +112,7 @@ static int __init __integrity_init_keyring(const unsigned int id,
 	} else {
 		if (id == INTEGRITY_KEYRING_PLATFORM)
 			set_platform_trusted_keys(keyring[id]);
-		if (id == INTEGRITY_KEYRING_MACHINE)
+		if (id == INTEGRITY_KEYRING_MACHINE && trust_moklist())
 			set_machine_trusted_keys(keyring[id]);
 		if (id == INTEGRITY_KEYRING_IMA)
 			load_module_cert(keyring[id]);
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 730771eececd7..2e214c7611586 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -287,9 +287,14 @@ static inline void __init add_to_platform_keyring(const char *source,
 
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
 void __init add_to_machine_keyring(const char *source, const void *data, size_t len);
+bool __init trust_moklist(void);
 #else
 static inline void __init add_to_machine_keyring(const char *source,
 						  const void *data, size_t len)
 {
 }
+static inline bool __init trust_moklist(void)
+{
+	return false;
+}
 #endif
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 4872850d081fe..1db4d3b4356dc 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -83,7 +83,7 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 __init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
 {
 	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) {
-		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING))
+		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) && trust_moklist())
 			return add_to_machine_keyring;
 		else
 			return add_to_platform_keyring;
diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
index 09fd8f20c7560..7aaed7950b6e3 100644
--- a/security/integrity/platform_certs/machine_keyring.c
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -8,6 +8,8 @@
 #include <linux/efi.h>
 #include "../integrity.h"
 
+static bool trust_mok;
+
 static __init int machine_keyring_init(void)
 {
 	int rc;
@@ -59,3 +61,17 @@ static __init bool uefi_check_trust_mok_keys(void)
 
 	return false;
 }
+
+bool __init trust_moklist(void)
+{
+	static bool initialized;
+
+	if (!initialized) {
+		initialized = true;
+
+		if (uefi_check_trust_mok_keys())
+			trust_mok = true;
+	}
+
+	return trust_mok;
+}
-- 
GitLab


From 7e0438f83dc769465ee663bb5dcf8cc154940712 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Wed, 2 Mar 2022 10:43:53 +0100
Subject: [PATCH 1087/1586] tpm: fix reference counting for struct tpm_chip

The following sequence of operations results in a refcount warning:

1. Open device /dev/tpmrm.
2. Remove module tpm_tis_spi.
3. Write a TPM command to the file descriptor opened at step 1.

------------[ cut here ]------------
WARNING: CPU: 3 PID: 1161 at lib/refcount.c:25 kobject_get+0xa0/0xa4
refcount_t: addition on 0; use-after-free.
Modules linked in: tpm_tis_spi tpm_tis_core tpm mdio_bcm_unimac brcmfmac
sha256_generic libsha256 sha256_arm hci_uart btbcm bluetooth cfg80211 vc4
brcmutil ecdh_generic ecc snd_soc_core crc32_arm_ce libaes
raspberrypi_hwmon ac97_bus snd_pcm_dmaengine bcm2711_thermal snd_pcm
snd_timer genet snd phy_generic soundcore [last unloaded: spi_bcm2835]
CPU: 3 PID: 1161 Comm: hold_open Not tainted 5.10.0ls-main-dirty #2
Hardware name: BCM2711
[<c0410c3c>] (unwind_backtrace) from [<c040b580>] (show_stack+0x10/0x14)
[<c040b580>] (show_stack) from [<c1092174>] (dump_stack+0xc4/0xd8)
[<c1092174>] (dump_stack) from [<c0445a30>] (__warn+0x104/0x108)
[<c0445a30>] (__warn) from [<c0445aa8>] (warn_slowpath_fmt+0x74/0xb8)
[<c0445aa8>] (warn_slowpath_fmt) from [<c08435d0>] (kobject_get+0xa0/0xa4)
[<c08435d0>] (kobject_get) from [<bf0a715c>] (tpm_try_get_ops+0x14/0x54 [tpm])
[<bf0a715c>] (tpm_try_get_ops [tpm]) from [<bf0a7d6c>] (tpm_common_write+0x38/0x60 [tpm])
[<bf0a7d6c>] (tpm_common_write [tpm]) from [<c05a7ac0>] (vfs_write+0xc4/0x3c0)
[<c05a7ac0>] (vfs_write) from [<c05a7ee4>] (ksys_write+0x58/0xcc)
[<c05a7ee4>] (ksys_write) from [<c04001a0>] (ret_fast_syscall+0x0/0x4c)
Exception stack(0xc226bfa8 to 0xc226bff0)
bfa0:                   00000000 000105b4 00000003 beafe664 00000014 00000000
bfc0: 00000000 000105b4 000103f8 00000004 00000000 00000000 b6f9c000 beafe684
bfe0: 0000006c beafe648 0001056c b6eb6944
---[ end trace d4b8409def9b8b1f ]---

The reason for this warning is the attempt to get the chip->dev reference
in tpm_common_write() although the reference counter is already zero.

Since commit 8979b02aaf1d ("tpm: Fix reference count to main device") the
extra reference used to prevent a premature zero counter is never taken,
because the required TPM_CHIP_FLAG_TPM2 flag is never set.

Fix this by moving the TPM 2 character device handling from
tpm_chip_alloc() to tpm_add_char_device() which is called at a later point
in time when the flag has been set in case of TPM2.

Commit fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm<n>")
already introduced function tpm_devs_release() to release the extra
reference but did not implement the required put on chip->devs that results
in the call of this function.

Fix this by putting chip->devs in tpm_chip_unregister().

Finally move the new implementation for the TPM 2 handling into a new
function to avoid multiple checks for the TPM_CHIP_FLAG_TPM2 flag in the
good case and error cases.

Cc: stable@vger.kernel.org
Fixes: fdc915f7f719 ("tpm: expose spaces via a device link /dev/tpmrm<n>")
Fixes: 8979b02aaf1d ("tpm: Fix reference count to main device")
Co-developed-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Tested-by: Stefan Berger <stefanb@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm-chip.c   | 46 +++++--------------------
 drivers/char/tpm/tpm.h        |  2 ++
 drivers/char/tpm/tpm2-space.c | 65 +++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 38 deletions(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index b009e7479b702..783d65fc71f07 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev)
 	kfree(chip);
 }
 
-static void tpm_devs_release(struct device *dev)
-{
-	struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
-
-	/* release the master device reference */
-	put_device(&chip->dev);
-}
-
 /**
  * tpm_class_shutdown() - prepare the TPM device for loss of power.
  * @dev: device to which the chip is associated.
@@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	chip->dev_num = rc;
 
 	device_initialize(&chip->dev);
-	device_initialize(&chip->devs);
 
 	chip->dev.class = tpm_class;
 	chip->dev.class->shutdown_pre = tpm_class_shutdown;
@@ -352,29 +343,12 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	chip->dev.parent = pdev;
 	chip->dev.groups = chip->groups;
 
-	chip->devs.parent = pdev;
-	chip->devs.class = tpmrm_class;
-	chip->devs.release = tpm_devs_release;
-	/* get extra reference on main device to hold on
-	 * behalf of devs.  This holds the chip structure
-	 * while cdevs is in use.  The corresponding put
-	 * is in the tpm_devs_release (TPM2 only)
-	 */
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		get_device(&chip->dev);
-
 	if (chip->dev_num == 0)
 		chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
 	else
 		chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
 
-	chip->devs.devt =
-		MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
-
 	rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num);
-	if (rc)
-		goto out;
-	rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
 	if (rc)
 		goto out;
 
@@ -382,9 +356,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 		chip->flags |= TPM_CHIP_FLAG_VIRTUAL;
 
 	cdev_init(&chip->cdev, &tpm_fops);
-	cdev_init(&chip->cdevs, &tpmrm_fops);
 	chip->cdev.owner = THIS_MODULE;
-	chip->cdevs.owner = THIS_MODULE;
 
 	rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE);
 	if (rc) {
@@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
 	return chip;
 
 out:
-	put_device(&chip->devs);
 	put_device(&chip->dev);
 	return ERR_PTR(rc);
 }
@@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip)
 	}
 
 	if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) {
-		rc = cdev_device_add(&chip->cdevs, &chip->devs);
-		if (rc) {
-			dev_err(&chip->devs,
-				"unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
-				dev_name(&chip->devs), MAJOR(chip->devs.devt),
-				MINOR(chip->devs.devt), rc);
-			return rc;
-		}
+		rc = tpm_devs_add(chip);
+		if (rc)
+			goto err_del_cdev;
 	}
 
 	/* Make the chip available. */
@@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip)
 	idr_replace(&dev_nums_idr, chip, chip->dev_num);
 	mutex_unlock(&idr_lock);
 
+	return 0;
+
+err_del_cdev:
+	cdev_device_del(&chip->cdev, &chip->dev);
 	return rc;
 }
 
@@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip)
 		hwrng_unregister(&chip->hwrng);
 	tpm_bios_log_teardown(chip);
 	if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
-		cdev_device_del(&chip->cdevs, &chip->devs);
+		tpm_devs_remove(chip);
 	tpm_del_char_device(chip);
 }
 EXPORT_SYMBOL_GPL(tpm_chip_unregister);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 283f78211c3a7..2163c6ee0d364 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd,
 		       size_t cmdsiz);
 int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf,
 		      size_t *bufsiz);
+int tpm_devs_add(struct tpm_chip *chip);
+void tpm_devs_remove(struct tpm_chip *chip);
 
 void tpm_bios_log_setup(struct tpm_chip *chip);
 void tpm_bios_log_teardown(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 97e916856cf3e..265ec72b1d816 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -574,3 +574,68 @@ out:
 	dev_err(&chip->dev, "%s: error %d\n", __func__, rc);
 	return rc;
 }
+
+/*
+ * Put the reference to the main device.
+ */
+static void tpm_devs_release(struct device *dev)
+{
+	struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
+
+	/* release the master device reference */
+	put_device(&chip->dev);
+}
+
+/*
+ * Remove the device file for exposed TPM spaces and release the device
+ * reference. This may also release the reference to the master device.
+ */
+void tpm_devs_remove(struct tpm_chip *chip)
+{
+	cdev_device_del(&chip->cdevs, &chip->devs);
+	put_device(&chip->devs);
+}
+
+/*
+ * Add a device file to expose TPM spaces. Also take a reference to the
+ * main device.
+ */
+int tpm_devs_add(struct tpm_chip *chip)
+{
+	int rc;
+
+	device_initialize(&chip->devs);
+	chip->devs.parent = chip->dev.parent;
+	chip->devs.class = tpmrm_class;
+
+	/*
+	 * Get extra reference on main device to hold on behalf of devs.
+	 * This holds the chip structure while cdevs is in use. The
+	 * corresponding put is in the tpm_devs_release.
+	 */
+	get_device(&chip->dev);
+	chip->devs.release = tpm_devs_release;
+	chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES);
+	cdev_init(&chip->cdevs, &tpmrm_fops);
+	chip->cdevs.owner = THIS_MODULE;
+
+	rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num);
+	if (rc)
+		goto err_put_devs;
+
+	rc = cdev_device_add(&chip->cdevs, &chip->devs);
+	if (rc) {
+		dev_err(&chip->devs,
+			"unable to cdev_device_add() %s, major %d, minor %d, err=%d\n",
+			dev_name(&chip->devs), MAJOR(chip->devs.devt),
+			MINOR(chip->devs.devt), rc);
+		goto err_put_devs;
+	}
+
+	return 0;
+
+err_put_devs:
+	put_device(&chip->devs);
+
+	return rc;
+}
-- 
GitLab


From d3cff4a95ed78ca192fc4bbb2743d13b7a6cc555 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 28 Jan 2022 11:56:55 -0800
Subject: [PATCH 1088/1586] KEYS: remove support for asym_tpm keys

asym_tpm keys are tied to TPM v1.2, which uses outdated crypto and has
been deprecated in favor of TPM v2.0 for over 7 years.  A very quick
look at this code also immediately found some memory safety bugs
(https://lore.kernel.org/r/20220113235440.90439-2-ebiggers@kernel.org).
Note that this code is reachable by unprivileged users.

According to Jarkko (one of the keyrings subsystem maintainers), this
code has no practical use cases, and he isn't willing to maintain it
(https://lore.kernel.org/r/YfFZPbKkgYJGWu1Q@iki.fi).

Therefore, let's remove it.

Note that this feature didn't have any documentation or tests, so we
don't need to worry about removing those.

Cc: David Howells <dhowells@redhat.com>
Cc: Denis Kenzior <denkenz@gmail.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/Kconfig      |  21 -
 crypto/asymmetric_keys/Makefile     |  12 -
 crypto/asymmetric_keys/asym_tpm.c   | 957 ----------------------------
 crypto/asymmetric_keys/tpm.asn1     |   5 -
 crypto/asymmetric_keys/tpm_parser.c | 102 ---
 include/crypto/asym_tpm_subtype.h   |  19 -
 6 files changed, 1116 deletions(-)
 delete mode 100644 crypto/asymmetric_keys/asym_tpm.c
 delete mode 100644 crypto/asymmetric_keys/tpm.asn1
 delete mode 100644 crypto/asymmetric_keys/tpm_parser.c
 delete mode 100644 include/crypto/asym_tpm_subtype.h

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 1f1f004dc7577..460bc5d0a828c 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -22,18 +22,6 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 	  appropriate hash algorithms (such as SHA-1) must be available.
 	  ENOPKG will be reported if the requisite algorithm is unavailable.
 
-config ASYMMETRIC_TPM_KEY_SUBTYPE
-	tristate "Asymmetric TPM backed private key subtype"
-	depends on TCG_TPM
-	depends on TRUSTED_KEYS
-	select CRYPTO_HMAC
-	select CRYPTO_SHA1
-	select CRYPTO_HASH_INFO
-	help
-	  This option provides support for TPM backed private key type handling.
-	  Operations such as sign, verify, encrypt, decrypt are performed by
-	  the TPM after the private key is loaded.
-
 config X509_CERTIFICATE_PARSER
 	tristate "X.509 certificate parser"
 	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
@@ -54,15 +42,6 @@ config PKCS8_PRIVATE_KEY_PARSER
 	  private key data and provides the ability to instantiate a crypto key
 	  from that data.
 
-config TPM_KEY_PARSER
-	tristate "TPM private key parser"
-	depends on ASYMMETRIC_TPM_KEY_SUBTYPE
-	select ASN1
-	help
-	  This option provides support for parsing TPM format blobs for
-	  private key data and provides the ability to instantiate a crypto key
-	  from that data.
-
 config PKCS7_MESSAGE_PARSER
 	tristate "PKCS#7 message parser"
 	depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 28b91adba2aed..c38424f55b08d 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -11,7 +11,6 @@ asymmetric_keys-y := \
 	signature.o
 
 obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
-obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
 
 #
 # X.509 Certificate handling
@@ -75,14 +74,3 @@ verify_signed_pefile-y := \
 
 $(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
 $(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
-
-#
-# TPM private key parsing
-#
-obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
-tpm_key_parser-y := \
-	tpm.asn1.o \
-	tpm_parser.o
-
-$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
-$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
deleted file mode 100644
index 0959613560b9e..0000000000000
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ /dev/null
@@ -1,957 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "ASYM-TPM: "fmt
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/seq_file.h>
-#include <linux/scatterlist.h>
-#include <linux/tpm.h>
-#include <linux/tpm_command.h>
-#include <crypto/akcipher.h>
-#include <crypto/hash.h>
-#include <crypto/sha1.h>
-#include <asm/unaligned.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/trusted_tpm.h>
-#include <crypto/asym_tpm_subtype.h>
-#include <crypto/public_key.h>
-
-#define TPM_ORD_FLUSHSPECIFIC	186
-#define TPM_ORD_LOADKEY2	65
-#define TPM_ORD_UNBIND		30
-#define TPM_ORD_SIGN		60
-
-#define TPM_RT_KEY                      0x00000001
-
-/*
- * Load a TPM key from the blob provided by userspace
- */
-static int tpm_loadkey2(struct tpm_buf *tb,
-			uint32_t keyhandle, unsigned char *keyauth,
-			const unsigned char *keyblob, int keybloblen,
-			uint32_t *newhandle)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_LOADKEY2);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   keybloblen, keyblob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append(tb, keyblob, keybloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
-			     SHA1_DIGEST_SIZE, 0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	*newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
-	return 0;
-}
-
-/*
- * Execute the FlushSpecific TPM command
- */
-static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
-{
-	tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC);
-	tpm_buf_append_u32(tb, handle);
-	tpm_buf_append_u32(tb, TPM_RT_KEY);
-
-	return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-}
-
-/*
- * Decrypt a blob provided by userspace using a specific key handle.
- * The handle is a well known handle or previously loaded by e.g. LoadKey2
- */
-static int tpm_unbind(struct tpm_buf *tb,
-			uint32_t keyhandle, unsigned char *keyauth,
-			const unsigned char *blob, uint32_t bloblen,
-			void *out, uint32_t outlen)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	uint32_t datalen;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_UNBIND);
-	datalen = htonl(bloblen);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   sizeof(uint32_t), &datalen,
-			   bloblen, blob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append_u32(tb, bloblen);
-	tpm_buf_append(tb, blob, bloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
-			     keyauth, SHA1_DIGEST_SIZE,
-			     sizeof(uint32_t), TPM_DATA_OFFSET,
-			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
-			     0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
-	       min(outlen, datalen));
-
-	return datalen;
-}
-
-/*
- * Sign a blob provided by userspace (that has had the hash function applied)
- * using a specific key handle.  The handle is assumed to have been previously
- * loaded by e.g. LoadKey2.
- *
- * Note that the key signature scheme of the used key should be set to
- * TPM_SS_RSASSAPKCS1v15_DER.  This allows the hashed input to be of any size
- * up to key_length_in_bytes - 11 and not be limited to size 20 like the
- * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
- */
-static int tpm_sign(struct tpm_buf *tb,
-		    uint32_t keyhandle, unsigned char *keyauth,
-		    const unsigned char *blob, uint32_t bloblen,
-		    void *out, uint32_t outlen)
-{
-	unsigned char nonceodd[TPM_NONCE_SIZE];
-	unsigned char enonce[TPM_NONCE_SIZE];
-	unsigned char authdata[SHA1_DIGEST_SIZE];
-	uint32_t authhandle = 0;
-	unsigned char cont = 0;
-	uint32_t ordinal;
-	uint32_t datalen;
-	int ret;
-
-	ordinal = htonl(TPM_ORD_SIGN);
-	datalen = htonl(bloblen);
-
-	/* session for loading the key */
-	ret = oiap(tb, &authhandle, enonce);
-	if (ret < 0) {
-		pr_info("oiap failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* generate odd nonce */
-	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
-	if (ret < 0) {
-		pr_info("tpm_get_random failed (%d)\n", ret);
-		return ret;
-	}
-
-	/* calculate authorization HMAC value */
-	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
-			   nonceodd, cont, sizeof(uint32_t), &ordinal,
-			   sizeof(uint32_t), &datalen,
-			   bloblen, blob, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	/* build the request buffer */
-	tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN);
-	tpm_buf_append_u32(tb, keyhandle);
-	tpm_buf_append_u32(tb, bloblen);
-	tpm_buf_append(tb, blob, bloblen);
-	tpm_buf_append_u32(tb, authhandle);
-	tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
-	tpm_buf_append_u8(tb, cont);
-	tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
-
-	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
-	if (ret < 0) {
-		pr_info("authhmac failed (%d)\n", ret);
-		return ret;
-	}
-
-	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
-
-	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
-			     keyauth, SHA1_DIGEST_SIZE,
-			     sizeof(uint32_t), TPM_DATA_OFFSET,
-			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
-			     0, 0);
-	if (ret < 0) {
-		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
-		return ret;
-	}
-
-	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
-	       min(datalen, outlen));
-
-	return datalen;
-}
-
-/* Room to fit two u32 zeros for algo id and parameters length. */
-#define SETKEY_PARAMS_SIZE (sizeof(u32) * 2)
-
-/*
- * Maximum buffer size for the BER/DER encoded public key.  The public key
- * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
- * bit key and e is usually 65537
- * The encoding overhead is:
- * - max 4 bytes for SEQUENCE
- *   - max 4 bytes for INTEGER n type/length
- *     - 257 bytes of n
- *   - max 2 bytes for INTEGER e type/length
- *     - 3 bytes of e
- * - 4+4 of zeros for set_pub_key parameters (SETKEY_PARAMS_SIZE)
- */
-#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3 + SETKEY_PARAMS_SIZE)
-
-/*
- * Provide a part of a description of the key for /proc/keys.
- */
-static void asym_tpm_describe(const struct key *asymmetric_key,
-			      struct seq_file *m)
-{
-	struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
-
-	if (!tk)
-		return;
-
-	seq_printf(m, "TPM1.2/Blob");
-}
-
-static void asym_tpm_destroy(void *payload0, void *payload3)
-{
-	struct tpm_key *tk = payload0;
-
-	if (!tk)
-		return;
-
-	kfree(tk->blob);
-	tk->blob_len = 0;
-
-	kfree(tk);
-}
-
-/* How many bytes will it take to encode the length */
-static inline uint32_t definite_length(uint32_t len)
-{
-	if (len <= 127)
-		return 1;
-	if (len <= 255)
-		return 2;
-	return 3;
-}
-
-static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
-					 uint32_t len)
-{
-	*buf++ = tag;
-
-	if (len <= 127) {
-		buf[0] = len;
-		return buf + 1;
-	}
-
-	if (len <= 255) {
-		buf[0] = 0x81;
-		buf[1] = len;
-		return buf + 2;
-	}
-
-	buf[0] = 0x82;
-	put_unaligned_be16(len, buf + 1);
-	return buf + 3;
-}
-
-static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
-{
-	uint8_t *cur = buf;
-	uint32_t n_len = definite_length(len) + 1 + len + 1;
-	uint32_t e_len = definite_length(3) + 1 + 3;
-	uint8_t e[3] = { 0x01, 0x00, 0x01 };
-
-	/* SEQUENCE */
-	cur = encode_tag_length(cur, 0x30, n_len + e_len);
-	/* INTEGER n */
-	cur = encode_tag_length(cur, 0x02, len + 1);
-	cur[0] = 0x00;
-	memcpy(cur + 1, pub_key, len);
-	cur += len + 1;
-	cur = encode_tag_length(cur, 0x02, sizeof(e));
-	memcpy(cur, e, sizeof(e));
-	cur += sizeof(e);
-	/* Zero parameters to satisfy set_pub_key ABI. */
-	memzero_explicit(cur, SETKEY_PARAMS_SIZE);
-
-	return cur - buf;
-}
-
-/*
- * Determine the crypto algorithm name.
- */
-static int determine_akcipher(const char *encoding, const char *hash_algo,
-			      char alg_name[CRYPTO_MAX_ALG_NAME])
-{
-	if (strcmp(encoding, "pkcs1") == 0) {
-		if (!hash_algo) {
-			strcpy(alg_name, "pkcs1pad(rsa)");
-			return 0;
-		}
-
-		if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
-			     hash_algo) >= CRYPTO_MAX_ALG_NAME)
-			return -EINVAL;
-
-		return 0;
-	}
-
-	if (strcmp(encoding, "raw") == 0) {
-		strcpy(alg_name, "rsa");
-		return 0;
-	}
-
-	return -ENOPKG;
-}
-
-/*
- * Query information about a key.
- */
-static int tpm_key_query(const struct kernel_pkey_params *params,
-			 struct kernel_pkey_query *info)
-{
-	struct tpm_key *tk = params->key->payload.data[asym_crypto];
-	int ret;
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_akcipher *tfm;
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int len;
-
-	/* TPM only works on private keys, public keys still done in software */
-	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	len = crypto_akcipher_maxsize(tfm);
-
-	info->key_size = tk->key_len;
-	info->max_data_size = tk->key_len / 8;
-	info->max_sig_size = len;
-	info->max_enc_size = len;
-	info->max_dec_size = tk->key_len / 8;
-
-	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
-			      KEYCTL_SUPPORTS_DECRYPT |
-			      KEYCTL_SUPPORTS_VERIFY |
-			      KEYCTL_SUPPORTS_SIGN;
-
-	ret = 0;
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
-/*
- * Encryption operation is performed with the public key.  Hence it is done
- * in software
- */
-static int tpm_key_encrypt(struct tpm_key *tk,
-			   struct kernel_pkey_params *params,
-			   const void *in, void *out)
-{
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	struct crypto_akcipher *tfm;
-	struct akcipher_request *req;
-	struct crypto_wait cwait;
-	struct scatterlist in_sg, out_sg;
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int ret;
-
-	pr_devel("==>%s()\n", __func__);
-
-	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	ret = -ENOMEM;
-	req = akcipher_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto error_free_tfm;
-
-	sg_init_one(&in_sg, in, params->in_len);
-	sg_init_one(&out_sg, out, params->out_len);
-	akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
-				   params->out_len);
-	crypto_init_wait(&cwait);
-	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
-				      CRYPTO_TFM_REQ_MAY_SLEEP,
-				      crypto_req_done, &cwait);
-
-	ret = crypto_akcipher_encrypt(req);
-	ret = crypto_wait_req(ret, &cwait);
-
-	if (ret == 0)
-		ret = req->dst_len;
-
-	akcipher_request_free(req);
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
-/*
- * Decryption operation is performed with the private key in the TPM.
- */
-static int tpm_key_decrypt(struct tpm_key *tk,
-			   struct kernel_pkey_params *params,
-			   const void *in, void *out)
-{
-	struct tpm_buf tb;
-	uint32_t keyhandle;
-	uint8_t srkauth[SHA1_DIGEST_SIZE];
-	uint8_t keyauth[SHA1_DIGEST_SIZE];
-	int r;
-
-	pr_devel("==>%s()\n", __func__);
-
-	if (params->hash_algo)
-		return -ENOPKG;
-
-	if (strcmp(params->encoding, "pkcs1"))
-		return -ENOPKG;
-
-	r = tpm_buf_init(&tb, 0, 0);
-	if (r)
-		return r;
-
-	/* TODO: Handle a non-all zero SRK authorization */
-	memset(srkauth, 0, sizeof(srkauth));
-
-	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
-				tk->blob, tk->blob_len, &keyhandle);
-	if (r < 0) {
-		pr_devel("loadkey2 failed (%d)\n", r);
-		goto error;
-	}
-
-	/* TODO: Handle a non-all zero key authorization */
-	memset(keyauth, 0, sizeof(keyauth));
-
-	r = tpm_unbind(&tb, keyhandle, keyauth,
-		       in, params->in_len, out, params->out_len);
-	if (r < 0)
-		pr_devel("tpm_unbind failed (%d)\n", r);
-
-	if (tpm_flushspecific(&tb, keyhandle) < 0)
-		pr_devel("flushspecific failed (%d)\n", r);
-
-error:
-	tpm_buf_destroy(&tb);
-	pr_devel("<==%s() = %d\n", __func__, r);
-	return r;
-}
-
-/*
- * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
- */
-static const u8 digest_info_md5[] = {
-	0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
-	0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
-	0x05, 0x00, 0x04, 0x10
-};
-
-static const u8 digest_info_sha1[] = {
-	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
-	0x2b, 0x0e, 0x03, 0x02, 0x1a,
-	0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_rmd160[] = {
-	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
-	0x2b, 0x24, 0x03, 0x02, 0x01,
-	0x05, 0x00, 0x04, 0x14
-};
-
-static const u8 digest_info_sha224[] = {
-	0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
-	0x05, 0x00, 0x04, 0x1c
-};
-
-static const u8 digest_info_sha256[] = {
-	0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
-	0x05, 0x00, 0x04, 0x20
-};
-
-static const u8 digest_info_sha384[] = {
-	0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
-	0x05, 0x00, 0x04, 0x30
-};
-
-static const u8 digest_info_sha512[] = {
-	0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
-	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
-	0x05, 0x00, 0x04, 0x40
-};
-
-static const struct asn1_template {
-	const char	*name;
-	const u8	*data;
-	size_t		size;
-} asn1_templates[] = {
-#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
-	_(md5),
-	_(sha1),
-	_(rmd160),
-	_(sha256),
-	_(sha384),
-	_(sha512),
-	_(sha224),
-	{ NULL }
-#undef _
-};
-
-static const struct asn1_template *lookup_asn1(const char *name)
-{
-	const struct asn1_template *p;
-
-	for (p = asn1_templates; p->name; p++)
-		if (strcmp(name, p->name) == 0)
-			return p;
-	return NULL;
-}
-
-/*
- * Sign operation is performed with the private key in the TPM.
- */
-static int tpm_key_sign(struct tpm_key *tk,
-			struct kernel_pkey_params *params,
-			const void *in, void *out)
-{
-	struct tpm_buf tb;
-	uint32_t keyhandle;
-	uint8_t srkauth[SHA1_DIGEST_SIZE];
-	uint8_t keyauth[SHA1_DIGEST_SIZE];
-	void *asn1_wrapped = NULL;
-	uint32_t in_len = params->in_len;
-	int r;
-
-	pr_devel("==>%s()\n", __func__);
-
-	if (strcmp(params->encoding, "pkcs1"))
-		return -ENOPKG;
-
-	if (params->hash_algo) {
-		const struct asn1_template *asn1 =
-						lookup_asn1(params->hash_algo);
-
-		if (!asn1)
-			return -ENOPKG;
-
-		/* request enough space for the ASN.1 template + input hash */
-		asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
-		if (!asn1_wrapped)
-			return -ENOMEM;
-
-		/* Copy ASN.1 template, then the input */
-		memcpy(asn1_wrapped, asn1->data, asn1->size);
-		memcpy(asn1_wrapped + asn1->size, in, in_len);
-
-		in = asn1_wrapped;
-		in_len += asn1->size;
-	}
-
-	if (in_len > tk->key_len / 8 - 11) {
-		r = -EOVERFLOW;
-		goto error_free_asn1_wrapped;
-	}
-
-	r = tpm_buf_init(&tb, 0, 0);
-	if (r)
-		goto error_free_asn1_wrapped;
-
-	/* TODO: Handle a non-all zero SRK authorization */
-	memset(srkauth, 0, sizeof(srkauth));
-
-	r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
-			 tk->blob, tk->blob_len, &keyhandle);
-	if (r < 0) {
-		pr_devel("loadkey2 failed (%d)\n", r);
-		goto error_free_tb;
-	}
-
-	/* TODO: Handle a non-all zero key authorization */
-	memset(keyauth, 0, sizeof(keyauth));
-
-	r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len);
-	if (r < 0)
-		pr_devel("tpm_sign failed (%d)\n", r);
-
-	if (tpm_flushspecific(&tb, keyhandle) < 0)
-		pr_devel("flushspecific failed (%d)\n", r);
-
-error_free_tb:
-	tpm_buf_destroy(&tb);
-error_free_asn1_wrapped:
-	kfree(asn1_wrapped);
-	pr_devel("<==%s() = %d\n", __func__, r);
-	return r;
-}
-
-/*
- * Do encryption, decryption and signing ops.
- */
-static int tpm_key_eds_op(struct kernel_pkey_params *params,
-			  const void *in, void *out)
-{
-	struct tpm_key *tk = params->key->payload.data[asym_crypto];
-	int ret = -EOPNOTSUPP;
-
-	/* Perform the encryption calculation. */
-	switch (params->op) {
-	case kernel_pkey_encrypt:
-		ret = tpm_key_encrypt(tk, params, in, out);
-		break;
-	case kernel_pkey_decrypt:
-		ret = tpm_key_decrypt(tk, params, in, out);
-		break;
-	case kernel_pkey_sign:
-		ret = tpm_key_sign(tk, params, in, out);
-		break;
-	default:
-		BUG();
-	}
-
-	return ret;
-}
-
-/*
- * Verify a signature using a public key.
- */
-static int tpm_key_verify_signature(const struct key *key,
-				    const struct public_key_signature *sig)
-{
-	const struct tpm_key *tk = key->payload.data[asym_crypto];
-	struct crypto_wait cwait;
-	struct crypto_akcipher *tfm;
-	struct akcipher_request *req;
-	struct scatterlist src_sg[2];
-	char alg_name[CRYPTO_MAX_ALG_NAME];
-	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
-	uint32_t der_pub_key_len;
-	int ret;
-
-	pr_devel("==>%s()\n", __func__);
-
-	BUG_ON(!tk);
-	BUG_ON(!sig);
-	BUG_ON(!sig->s);
-
-	if (!sig->digest)
-		return -ENOPKG;
-
-	ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
-	if (ret < 0)
-		return ret;
-
-	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
-					 der_pub_key);
-
-	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
-	if (ret < 0)
-		goto error_free_tfm;
-
-	ret = -ENOMEM;
-	req = akcipher_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto error_free_tfm;
-
-	sg_init_table(src_sg, 2);
-	sg_set_buf(&src_sg[0], sig->s, sig->s_size);
-	sg_set_buf(&src_sg[1], sig->digest, sig->digest_size);
-	akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size,
-				   sig->digest_size);
-	crypto_init_wait(&cwait);
-	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
-				      CRYPTO_TFM_REQ_MAY_SLEEP,
-				      crypto_req_done, &cwait);
-	ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
-
-	akcipher_request_free(req);
-error_free_tfm:
-	crypto_free_akcipher(tfm);
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	if (WARN_ON_ONCE(ret > 0))
-		ret = -EINVAL;
-	return ret;
-}
-
-/*
- * Parse enough information out of TPM_KEY structure:
- * TPM_STRUCT_VER -> 4 bytes
- * TPM_KEY_USAGE -> 2 bytes
- * TPM_KEY_FLAGS -> 4 bytes
- * TPM_AUTH_DATA_USAGE -> 1 byte
- * TPM_KEY_PARMS -> variable
- * UINT32 PCRInfoSize -> 4 bytes
- * BYTE* -> PCRInfoSize bytes
- * TPM_STORE_PUBKEY
- * UINT32 encDataSize;
- * BYTE* -> encDataSize;
- *
- * TPM_KEY_PARMS:
- * TPM_ALGORITHM_ID -> 4 bytes
- * TPM_ENC_SCHEME -> 2 bytes
- * TPM_SIG_SCHEME -> 2 bytes
- * UINT32 parmSize -> 4 bytes
- * BYTE* -> variable
- */
-static int extract_key_parameters(struct tpm_key *tk)
-{
-	const void *cur = tk->blob;
-	uint32_t len = tk->blob_len;
-	const void *pub_key;
-	uint32_t sz;
-	uint32_t key_len;
-
-	if (len < 11)
-		return -EBADMSG;
-
-	/* Ensure this is a legacy key */
-	if (get_unaligned_be16(cur + 4) != 0x0015)
-		return -EBADMSG;
-
-	/* Skip to TPM_KEY_PARMS */
-	cur += 11;
-	len -= 11;
-
-	if (len < 12)
-		return -EBADMSG;
-
-	/* Make sure this is an RSA key */
-	if (get_unaligned_be32(cur) != 0x00000001)
-		return -EBADMSG;
-
-	/* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
-	if (get_unaligned_be16(cur + 4) != 0x0002)
-		return -EBADMSG;
-
-	/* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
-	if (get_unaligned_be16(cur + 6) != 0x0003)
-		return -EBADMSG;
-
-	sz = get_unaligned_be32(cur + 8);
-	if (len < sz + 12)
-		return -EBADMSG;
-
-	/* Move to TPM_RSA_KEY_PARMS */
-	len -= 12;
-	cur += 12;
-
-	/* Grab the RSA key length */
-	key_len = get_unaligned_be32(cur);
-
-	switch (key_len) {
-	case 512:
-	case 1024:
-	case 1536:
-	case 2048:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* Move just past TPM_KEY_PARMS */
-	cur += sz;
-	len -= sz;
-
-	if (len < 4)
-		return -EBADMSG;
-
-	sz = get_unaligned_be32(cur);
-	if (len < 4 + sz)
-		return -EBADMSG;
-
-	/* Move to TPM_STORE_PUBKEY */
-	cur += 4 + sz;
-	len -= 4 + sz;
-
-	/* Grab the size of the public key, it should jive with the key size */
-	sz = get_unaligned_be32(cur);
-	if (sz > 256)
-		return -EINVAL;
-
-	pub_key = cur + 4;
-
-	tk->key_len = key_len;
-	tk->pub_key = pub_key;
-	tk->pub_key_len = sz;
-
-	return 0;
-}
-
-/* Given the blob, parse it and load it into the TPM */
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
-{
-	int r;
-	struct tpm_key *tk;
-
-	r = tpm_is_tpm2(NULL);
-	if (r < 0)
-		goto error;
-
-	/* We don't support TPM2 yet */
-	if (r > 0) {
-		r = -ENODEV;
-		goto error;
-	}
-
-	r = -ENOMEM;
-	tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
-	if (!tk)
-		goto error;
-
-	tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
-	if (!tk->blob)
-		goto error_memdup;
-
-	tk->blob_len = blob_len;
-
-	r = extract_key_parameters(tk);
-	if (r < 0)
-		goto error_extract;
-
-	return tk;
-
-error_extract:
-	kfree(tk->blob);
-	tk->blob_len = 0;
-error_memdup:
-	kfree(tk);
-error:
-	return ERR_PTR(r);
-}
-EXPORT_SYMBOL_GPL(tpm_key_create);
-
-/*
- * TPM-based asymmetric key subtype
- */
-struct asymmetric_key_subtype asym_tpm_subtype = {
-	.owner			= THIS_MODULE,
-	.name			= "asym_tpm",
-	.name_len		= sizeof("asym_tpm") - 1,
-	.describe		= asym_tpm_describe,
-	.destroy		= asym_tpm_destroy,
-	.query			= tpm_key_query,
-	.eds_op			= tpm_key_eds_op,
-	.verify_signature	= tpm_key_verify_signature,
-};
-EXPORT_SYMBOL_GPL(asym_tpm_subtype);
-
-MODULE_DESCRIPTION("TPM based asymmetric key subtype");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
deleted file mode 100644
index d7f194232f30a..0000000000000
--- a/crypto/asymmetric_keys/tpm.asn1
+++ /dev/null
@@ -1,5 +0,0 @@
---
--- Unencryted TPM Blob.  For details of the format, see:
--- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
---
-PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
deleted file mode 100644
index 96405d8dcd98d..0000000000000
--- a/crypto/asymmetric_keys/tpm_parser.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) "TPM-PARSER: "fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <keys/asymmetric-subtype.h>
-#include <keys/asymmetric-parser.h>
-#include <crypto/asym_tpm_subtype.h>
-#include "tpm.asn1.h"
-
-struct tpm_parse_context {
-	const void	*blob;
-	u32		blob_len;
-};
-
-/*
- * Note the key data of the ASN.1 blob.
- */
-int tpm_note_key(void *context, size_t hdrlen,
-		   unsigned char tag,
-		   const void *value, size_t vlen)
-{
-	struct tpm_parse_context *ctx = context;
-
-	ctx->blob = value;
-	ctx->blob_len = vlen;
-
-	return 0;
-}
-
-/*
- * Parse a TPM-encrypted private key blob.
- */
-static struct tpm_key *tpm_parse(const void *data, size_t datalen)
-{
-	struct tpm_parse_context ctx;
-	long ret;
-
-	memset(&ctx, 0, sizeof(ctx));
-
-	/* Attempt to decode the private key */
-	ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
-	if (ret < 0)
-		goto error;
-
-	return tpm_key_create(ctx.blob, ctx.blob_len);
-
-error:
-	return ERR_PTR(ret);
-}
-/*
- * Attempt to parse a data blob for a key as a TPM private key blob.
- */
-static int tpm_key_preparse(struct key_preparsed_payload *prep)
-{
-	struct tpm_key *tk;
-
-	/*
-	 * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
-	 * more than 4x that
-	 */
-	if (prep->datalen > 256 * 4)
-		return -EMSGSIZE;
-
-	tk = tpm_parse(prep->data, prep->datalen);
-
-	if (IS_ERR(tk))
-		return PTR_ERR(tk);
-
-	/* We're pinning the module by being linked against it */
-	__module_get(asym_tpm_subtype.owner);
-	prep->payload.data[asym_subtype] = &asym_tpm_subtype;
-	prep->payload.data[asym_key_ids] = NULL;
-	prep->payload.data[asym_crypto] = tk;
-	prep->payload.data[asym_auth] = NULL;
-	prep->quotalen = 100;
-	return 0;
-}
-
-static struct asymmetric_key_parser tpm_key_parser = {
-	.owner	= THIS_MODULE,
-	.name	= "tpm_parser",
-	.parse	= tpm_key_preparse,
-};
-
-static int __init tpm_key_init(void)
-{
-	return register_asymmetric_key_parser(&tpm_key_parser);
-}
-
-static void __exit tpm_key_exit(void)
-{
-	unregister_asymmetric_key_parser(&tpm_key_parser);
-}
-
-module_init(tpm_key_init);
-module_exit(tpm_key_exit);
-
-MODULE_DESCRIPTION("TPM private key-blob parser");
-MODULE_LICENSE("GPL v2");
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
deleted file mode 100644
index 48198c36d6b9b..0000000000000
--- a/include/crypto/asym_tpm_subtype.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
-#define _LINUX_ASYM_TPM_SUBTYPE_H
-
-#include <linux/keyctl.h>
-
-struct tpm_key {
-	void *blob;
-	u32 blob_len;
-	uint16_t key_len; /* Size in bits of the key */
-	const void *pub_key; /* pointer inside blob to the public key bytes */
-	uint16_t pub_key_len; /* length of the public key */
-};
-
-struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
-
-extern struct asymmetric_key_subtype asym_tpm_subtype;
-
-#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
-- 
GitLab


From 75a1b44a54bd97500e524cf42e8c81cc632672b3 Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Mon, 7 Mar 2022 22:25:17 +0530
Subject: [PATCH 1089/1586] spi: tegra210-quad: add acpi support

Add ACPI ID for Tegra QUAD SPI. Switch to common device property calls.
Skip clock calls that are not updated in ACPI boot.
Runtime PM support is not yet enabled with ACPI boot.

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Link: https://lore.kernel.org/r/20220307165519.38380-2-kyarlagadda@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 52 ++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 3725ee5331ae0..517a348d1c198 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -21,6 +21,8 @@
 #include <linux/of_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
+#include <linux/acpi.h>
+#include <linux/property.h>
 
 #define QSPI_COMMAND1				0x000
 #define QSPI_BIT_LENGTH(x)			(((x) & 0x1f) << 0)
@@ -771,7 +773,7 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran
 	u32 tx_tap = 0, rx_tap = 0;
 	int req_mode;
 
-	if (speed != tqspi->cur_speed) {
+	if (!has_acpi_companion(tqspi->dev) && speed != tqspi->cur_speed) {
 		clk_set_rate(tqspi->clk, speed);
 		tqspi->cur_speed = speed;
 	}
@@ -879,16 +881,16 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi,
 static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi)
 {
 	struct tegra_qspi_client_data *cdata;
-	struct device_node *slave_np = spi->dev.of_node;
 
 	cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL);
 	if (!cdata)
 		return NULL;
 
-	of_property_read_u32(slave_np, "nvidia,tx-clk-tap-delay",
-			     &cdata->tx_clk_tap_delay);
-	of_property_read_u32(slave_np, "nvidia,rx-clk-tap-delay",
-			     &cdata->rx_clk_tap_delay);
+	device_property_read_u32(&spi->dev, "nvidia,tx-clk-tap-delay",
+				 &cdata->tx_clk_tap_delay);
+	device_property_read_u32(&spi->dev, "nvidia,rx-clk-tap-delay",
+				 &cdata->rx_clk_tap_delay);
+
 	return cdata;
 }
 
@@ -1227,6 +1229,24 @@ static const struct of_device_id tegra_qspi_of_match[] = {
 
 MODULE_DEVICE_TABLE(of, tegra_qspi_of_match);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id tegra_qspi_acpi_match[] = {
+	{
+		.id = "NVDA1213",
+		.driver_data = (kernel_ulong_t)&tegra210_qspi_soc_data,
+	}, {
+		.id = "NVDA1313",
+		.driver_data = (kernel_ulong_t)&tegra186_qspi_soc_data,
+	}, {
+		.id = "NVDA1413",
+		.driver_data = (kernel_ulong_t)&tegra234_qspi_soc_data,
+	},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, tegra_qspi_acpi_match);
+#endif
+
 static int tegra_qspi_probe(struct platform_device *pdev)
 {
 	struct spi_master	*master;
@@ -1269,11 +1289,14 @@ static int tegra_qspi_probe(struct platform_device *pdev)
 		return qspi_irq;
 	tqspi->irq = qspi_irq;
 
-	tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
-	if (IS_ERR(tqspi->clk)) {
-		ret = PTR_ERR(tqspi->clk);
-		dev_err(&pdev->dev, "failed to get clock: %d\n", ret);
-		return ret;
+	if (!has_acpi_companion(tqspi->dev)) {
+		tqspi->clk = devm_clk_get(&pdev->dev, "qspi");
+		if (IS_ERR(tqspi->clk)) {
+			ret = PTR_ERR(tqspi->clk);
+			dev_err(&pdev->dev, "failed to get clock: %d\n", ret);
+			return ret;
+		}
+
 	}
 
 	tqspi->max_buf_size = QSPI_FIFO_DEPTH << 2;
@@ -1377,6 +1400,9 @@ static int __maybe_unused tegra_qspi_runtime_suspend(struct device *dev)
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
 
+	/* Runtime pm disabled with ACPI */
+	if (has_acpi_companion(tqspi->dev))
+		return 0;
 	/* flush all write which are in PPSB queue by reading back */
 	tegra_qspi_readl(tqspi, QSPI_COMMAND1);
 
@@ -1391,6 +1417,9 @@ static int __maybe_unused tegra_qspi_runtime_resume(struct device *dev)
 	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
 	int ret;
 
+	/* Runtime pm disabled with ACPI */
+	if (has_acpi_companion(tqspi->dev))
+		return 0;
 	ret = clk_prepare_enable(tqspi->clk);
 	if (ret < 0)
 		dev_err(tqspi->dev, "failed to enable clock: %d\n", ret);
@@ -1408,6 +1437,7 @@ static struct platform_driver tegra_qspi_driver = {
 		.name		= "tegra-qspi",
 		.pm		= &tegra_qspi_pm_ops,
 		.of_match_table	= tegra_qspi_of_match,
+		.acpi_match_table = ACPI_PTR(tegra_qspi_acpi_match),
 	},
 	.probe =	tegra_qspi_probe,
 	.remove =	tegra_qspi_remove,
-- 
GitLab


From 1b8342cc4a387933780c50f0cf51c94455be7d11 Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Mon, 7 Mar 2022 22:25:18 +0530
Subject: [PATCH 1090/1586] spi: tegra210-quad: combined sequence mode

Add combined sequence mode supported by Tegra QSPI controller.
For commands which contain cmd, addr, data parts to it, controller
can accept all 3 transfers at once and avoid interrupt for each
transfer. This would improve read & write performance.

Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20220307165519.38380-3-kyarlagadda@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 238 +++++++++++++++++++++++++++++++-
 1 file changed, 233 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 517a348d1c198..66f647f32876e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -121,14 +121,39 @@
 #define QSPI_NUM_DUMMY_CYCLE(x)			(((x) & 0xff) << 0)
 #define QSPI_DUMMY_CYCLES_MAX			0xff
 
+#define QSPI_CMB_SEQ_CMD			0x19c
+#define QSPI_COMMAND_VALUE_SET(X)		(((x) & 0xFF) << 0)
+
+#define QSPI_CMB_SEQ_CMD_CFG			0x1a0
+#define QSPI_COMMAND_X1_X2_X4(x)		(((x) & 0x3) << 13)
+#define QSPI_COMMAND_X1_X2_X4_MASK		(0x03 << 13)
+#define QSPI_COMMAND_SDR_DDR			BIT(12)
+#define QSPI_COMMAND_SIZE_SET(x)		(((x) & 0xFF) << 0)
+
+#define QSPI_GLOBAL_CONFIG			0X1a4
+#define QSPI_CMB_SEQ_EN				BIT(0)
+
+#define QSPI_CMB_SEQ_ADDR			0x1a8
+#define QSPI_ADDRESS_VALUE_SET(X)		(((x) & 0xFFFF) << 0)
+
+#define QSPI_CMB_SEQ_ADDR_CFG			0x1ac
+#define QSPI_ADDRESS_X1_X2_X4(x)		(((x) & 0x3) << 13)
+#define QSPI_ADDRESS_X1_X2_X4_MASK		(0x03 << 13)
+#define QSPI_ADDRESS_SDR_DDR			BIT(12)
+#define QSPI_ADDRESS_SIZE_SET(x)		(((x) & 0xFF) << 0)
+
 #define DATA_DIR_TX				BIT(0)
 #define DATA_DIR_RX				BIT(1)
 
 #define QSPI_DMA_TIMEOUT			(msecs_to_jiffies(1000))
 #define DEFAULT_QSPI_DMA_BUF_LEN		(64 * 1024)
+#define CMD_TRANSFER				0
+#define ADDR_TRANSFER				1
+#define DATA_TRANSFER				2
 
 struct tegra_qspi_soc_data {
 	bool has_dma;
+	bool cmb_xfer_capable;
 };
 
 struct tegra_qspi_client_data {
@@ -912,7 +937,6 @@ static int tegra_qspi_setup(struct spi_device *spi)
 		cdata = tegra_qspi_parse_cdata_dt(spi);
 		spi->controller_data = cdata;
 	}
-
 	spin_lock_irqsave(&tqspi->lock, flags);
 
 	/* keep default cs state to inactive */
@@ -971,19 +995,179 @@ static void tegra_qspi_transfer_end(struct spi_device *spi)
 	tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
 }
 
-static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi_message *msg)
+static u32 tegra_qspi_cmd_config(bool is_ddr, u8 bus_width, u8 len)
+{
+	u32 cmd_config = 0;
+
+	/* Extract Command configuration and value */
+	if (is_ddr)
+		cmd_config |= QSPI_COMMAND_SDR_DDR;
+	else
+		cmd_config &= ~QSPI_COMMAND_SDR_DDR;
+
+	cmd_config |= QSPI_COMMAND_X1_X2_X4(bus_width);
+	cmd_config |= QSPI_COMMAND_SIZE_SET((len * 8) - 1);
+
+	return cmd_config;
+}
+
+static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
+{
+	u32 addr_config = 0;
+
+	/* Extract Address configuration and value */
+	is_ddr = 0; //Only SDR mode supported
+	bus_width = 0; //X1 mode
+
+	if (is_ddr)
+		addr_config |= QSPI_ADDRESS_SDR_DDR;
+	else
+		addr_config &= ~QSPI_ADDRESS_SDR_DDR;
+
+	addr_config |= QSPI_ADDRESS_X1_X2_X4(bus_width);
+	addr_config |= QSPI_ADDRESS_SIZE_SET((len * 8) - 1);
+
+	return addr_config;
+}
+
+static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
+					struct spi_message *msg)
+{
+	bool is_first_msg = true;
+	struct spi_transfer *xfer;
+	struct spi_device *spi = msg->spi;
+	u8 transfer_phase = 0;
+	u32 cmd1 = 0, dma_ctl = 0;
+	int ret = 0;
+	u32 address_value = 0;
+	u32 cmd_config = 0, addr_config = 0;
+	u8 cmd_value = 0, val = 0;
+
+	/* Enable Combined sequence mode */
+	val = tegra_qspi_readl(tqspi, QSPI_GLOBAL_CONFIG);
+	val |= QSPI_CMB_SEQ_EN;
+	tegra_qspi_writel(tqspi, val, QSPI_GLOBAL_CONFIG);
+	/* Process individual transfer list */
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		switch (transfer_phase) {
+		case CMD_TRANSFER:
+			/* X1 SDR mode */
+			cmd_config = tegra_qspi_cmd_config(false, 0,
+							   xfer->len);
+			cmd_value = *((const u8 *)(xfer->tx_buf));
+			break;
+		case ADDR_TRANSFER:
+			/* X1 SDR mode */
+			addr_config = tegra_qspi_addr_config(false, 0,
+							     xfer->len);
+			address_value = *((const u32 *)(xfer->tx_buf));
+			break;
+		case DATA_TRANSFER:
+			/* Program Command, Address value in register */
+			tegra_qspi_writel(tqspi, cmd_value, QSPI_CMB_SEQ_CMD);
+			tegra_qspi_writel(tqspi, address_value,
+					  QSPI_CMB_SEQ_ADDR);
+			/* Program Command and Address config in register */
+			tegra_qspi_writel(tqspi, cmd_config,
+					  QSPI_CMB_SEQ_CMD_CFG);
+			tegra_qspi_writel(tqspi, addr_config,
+					  QSPI_CMB_SEQ_ADDR_CFG);
+
+			reinit_completion(&tqspi->xfer_completion);
+			cmd1 = tegra_qspi_setup_transfer_one(spi, xfer,
+							     is_first_msg);
+			ret = tegra_qspi_start_transfer_one(spi, xfer,
+							    cmd1);
+
+			if (ret < 0) {
+				dev_err(tqspi->dev, "Failed to start transfer-one: %d\n",
+					ret);
+				return ret;
+			}
+
+			is_first_msg = false;
+			ret = wait_for_completion_timeout
+					(&tqspi->xfer_completion,
+					QSPI_DMA_TIMEOUT);
+
+			if (WARN_ON(ret == 0)) {
+				dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n",
+					ret);
+				if (tqspi->is_curr_dma_xfer &&
+				    (tqspi->cur_direction & DATA_DIR_TX))
+					dmaengine_terminate_all
+						(tqspi->tx_dma_chan);
+
+				if (tqspi->is_curr_dma_xfer &&
+				    (tqspi->cur_direction & DATA_DIR_RX))
+					dmaengine_terminate_all
+						(tqspi->rx_dma_chan);
+
+				/* Abort transfer by resetting pio/dma bit */
+				if (!tqspi->is_curr_dma_xfer) {
+					cmd1 = tegra_qspi_readl
+							(tqspi,
+							 QSPI_COMMAND1);
+					cmd1 &= ~QSPI_PIO;
+					tegra_qspi_writel
+							(tqspi, cmd1,
+							 QSPI_COMMAND1);
+				} else {
+					dma_ctl = tegra_qspi_readl
+							(tqspi,
+							 QSPI_DMA_CTL);
+					dma_ctl &= ~QSPI_DMA_EN;
+					tegra_qspi_writel(tqspi, dma_ctl,
+							  QSPI_DMA_CTL);
+				}
+
+				/* Reset controller if timeout happens */
+				if (device_reset(tqspi->dev) < 0)
+					dev_warn_once(tqspi->dev,
+						      "device reset failed\n");
+				ret = -EIO;
+				goto exit;
+			}
+
+			if (tqspi->tx_status ||  tqspi->rx_status) {
+				dev_err(tqspi->dev, "QSPI Transfer failed\n");
+				tqspi->tx_status = 0;
+				tqspi->rx_status = 0;
+				ret = -EIO;
+				goto exit;
+			}
+			break;
+		default:
+			ret = -EINVAL;
+			goto exit;
+		}
+		msg->actual_length += xfer->len;
+		transfer_phase++;
+	}
+
+exit:
+	msg->status = ret;
+
+	return ret;
+}
+
+static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
+					    struct spi_message *msg)
 {
-	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
 	struct spi_device *spi = msg->spi;
 	struct spi_transfer *transfer;
 	bool is_first_msg = true;
-	int ret;
+	int ret = 0, val = 0;
 
 	msg->status = 0;
 	msg->actual_length = 0;
 	tqspi->tx_status = 0;
 	tqspi->rx_status = 0;
 
+	/* Disable Combined sequence mode */
+	val = tegra_qspi_readl(tqspi, QSPI_GLOBAL_CONFIG);
+	val &= ~QSPI_CMB_SEQ_EN;
+	tegra_qspi_writel(tqspi, val, QSPI_GLOBAL_CONFIG);
 	list_for_each_entry(transfer, &msg->transfers, transfer_list) {
 		struct spi_transfer *xfer = transfer;
 		u8 dummy_bytes = 0;
@@ -1021,7 +1205,6 @@ static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi
 			goto complete_xfer;
 		}
 
-		is_first_msg = false;
 		ret = wait_for_completion_timeout(&tqspi->xfer_completion,
 						  QSPI_DMA_TIMEOUT);
 		if (WARN_ON(ret == 0)) {
@@ -1066,7 +1249,48 @@ complete_xfer:
 	ret = 0;
 exit:
 	msg->status = ret;
+
+	return ret;
+}
+
+static bool tegra_qspi_validate_cmb_seq(struct tegra_qspi *tqspi,
+					struct spi_message *msg)
+{
+	int transfer_count = 0;
+	struct spi_transfer *xfer;
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		transfer_count++;
+	}
+	if (!tqspi->soc_data->cmb_xfer_capable || transfer_count != 3)
+		return false;
+	xfer = list_first_entry(&msg->transfers, typeof(*xfer),
+				transfer_list);
+	if (xfer->len > 2)
+		return false;
+	xfer = list_next_entry(xfer, transfer_list);
+	if (xfer->len > 4 || xfer->len < 3)
+		return false;
+	xfer = list_next_entry(xfer, transfer_list);
+	if (!tqspi->soc_data->has_dma || xfer->len > (QSPI_FIFO_DEPTH << 2))
+		return false;
+
+	return true;
+}
+
+static int tegra_qspi_transfer_one_message(struct spi_master *master,
+					   struct spi_message *msg)
+{
+	struct tegra_qspi *tqspi = spi_master_get_devdata(master);
+	int ret;
+
+	if (tegra_qspi_validate_cmb_seq(tqspi, msg))
+		ret = tegra_qspi_combined_seq_xfer(tqspi, msg);
+	else
+		ret = tegra_qspi_non_combined_seq_xfer(tqspi, msg);
+
 	spi_finalize_current_message(master);
+
 	return ret;
 }
 
@@ -1200,14 +1424,17 @@ static irqreturn_t tegra_qspi_isr_thread(int irq, void *context_data)
 
 static struct tegra_qspi_soc_data tegra210_qspi_soc_data = {
 	.has_dma = true,
+	.cmb_xfer_capable = false,
 };
 
 static struct tegra_qspi_soc_data tegra186_qspi_soc_data = {
 	.has_dma = true,
+	.cmb_xfer_capable = true,
 };
 
 static struct tegra_qspi_soc_data tegra234_qspi_soc_data = {
 	.has_dma = false,
+	.cmb_xfer_capable = true,
 };
 
 static const struct of_device_id tegra_qspi_of_match[] = {
@@ -1278,6 +1505,7 @@ static int tegra_qspi_probe(struct platform_device *pdev)
 	tqspi->dev = &pdev->dev;
 	spin_lock_init(&tqspi->lock);
 
+	tqspi->soc_data = device_get_match_data(&pdev->dev);
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tqspi->base = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(tqspi->base))
-- 
GitLab


From 80ab9012bbf1011f57c06b3c6e4ac3816c4a86f5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Mar 2022 19:37:40 +0200
Subject: [PATCH 1091/1586] spi: topcliff-pch: Prevent usage of potentially
 stale DMA device

DMA device is expected to be available while SPI transfer is ongoing.

Prevent usage of potentially stale DMA device by keeping reference
count till the end of the transfer.

Fixes: 4d986ffa036a ("spi: add missing pci_dev_put() before return")
Reported-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20220307173740.80996-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-topcliff-pch.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 8e1cc345810a6..dfaa1d79a78be 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -103,6 +103,7 @@
 static int use_dma = 1;
 
 struct pch_spi_dma_ctrl {
+	struct pci_dev		*dma_dev;
 	struct dma_async_tx_descriptor	*desc_tx;
 	struct dma_async_tx_descriptor	*desc_rx;
 	struct pch_dma_slave		param_tx;
@@ -876,7 +877,6 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
 	if (!chan) {
 		dev_err(&data->master->dev,
 			"ERROR: dma_request_channel FAILS(Tx)\n");
-		data->use_dma = 0;
 		goto out;
 	}
 	dma->chan_tx = chan;
@@ -893,13 +893,15 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw)
 			"ERROR: dma_request_channel FAILS(Rx)\n");
 		dma_release_channel(dma->chan_tx);
 		dma->chan_tx = NULL;
-		data->use_dma = 0;
 		goto out;
 	}
 	dma->chan_rx = chan;
 
+	dma->dma_dev = dma_dev;
+	return;
 out:
 	pci_dev_put(dma_dev);
+	data->use_dma = 0;
 }
 
 static void pch_spi_release_dma(struct pch_spi_data *data)
@@ -915,6 +917,8 @@ static void pch_spi_release_dma(struct pch_spi_data *data)
 		dma_release_channel(dma->chan_rx);
 		dma->chan_rx = NULL;
 	}
+
+	pci_dev_put(dma->dma_dev);
 }
 
 static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
-- 
GitLab


From a82c25c366b0963d33ddf699196e6cf57f6d89b1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Mar 2022 13:52:11 +0100
Subject: [PATCH 1092/1586] Revert "netfilter: nat: force port remap to prevent
 shadowing well-known ports"

This reverts commit 878aed8db324bec64f3c3f956e64d5ae7375a5de.

This change breaks existing setups where conntrack is used with
asymmetric paths.

In these cases, the NAT transformation occurs on the syn-ack instead of
the syn:

1. SYN    x:12345 -> y -> 443 // sent by initiator, receiverd by responder
2. SYNACK y:443 -> x:12345 // First packet seen by conntrack, as sent by responder
3. tuple_force_port_remap() gets called, sees:
  'tcp from 443 to port 12345 NAT' -> pick a new source port, inititor receives
4. SYNACK y:$RANDOM -> x:12345   // connection is never established

While its possible to avoid the breakage with NOTRACK rules, a kernel
update should not break working setups.

An alternative to the revert is to augment conntrack to tag
mid-stream connections plus more code in the nat core to skip NAT
for such connections, however, this leads to more interaction/integration
between conntrack and NAT.

Therefore, revert, users will need to add explicit nat rules to avoid
port shadowing.

Link: https://lore.kernel.org/netfilter-devel/20220302105908.GA5852@breakpoint.cc/#R
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2051413
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_nat_core.c                  | 43 ++------------------
 tools/testing/selftests/netfilter/nft_nat.sh |  5 +--
 2 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2d06a66899b26..ffcf6529afc3c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -494,38 +494,6 @@ another_round:
 	goto another_round;
 }
 
-static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
-{
-	u16 sp, dp;
-
-	switch (tuple->dst.protonum) {
-	case IPPROTO_TCP:
-		sp = ntohs(tuple->src.u.tcp.port);
-		dp = ntohs(tuple->dst.u.tcp.port);
-		break;
-	case IPPROTO_UDP:
-	case IPPROTO_UDPLITE:
-		sp = ntohs(tuple->src.u.udp.port);
-		dp = ntohs(tuple->dst.u.udp.port);
-		break;
-	default:
-		return false;
-	}
-
-	/* IANA: System port range: 1-1023,
-	 *         user port range: 1024-49151,
-	 *      private port range: 49152-65535.
-	 *
-	 * Linux default ephemeral port range is 32768-60999.
-	 *
-	 * Enforce port remapping if sport is significantly lower
-	 * than dport to prevent NAT port shadowing, i.e.
-	 * accidental match of 'new' inbound connection vs.
-	 * existing outbound one.
-	 */
-	return sp < 16384 && dp >= 32768;
-}
-
 /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
  * we change the source to map into the range. For NF_INET_PRE_ROUTING
  * and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
-	bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
 	const struct nf_conntrack_zone *zone;
 	struct net *net = nf_ct_net(ct);
 
 	zone = nf_ct_zone(ct);
 
-	if (maniptype == NF_NAT_MANIP_SRC &&
-	    !random_port &&
-	    !ct->local_origin)
-		random_port = tuple_force_port_remap(orig_tuple);
-
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	 * and that same mapping gives a unique tuple within the given
 	 * range, use that.
@@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 * So far, we don't do local source mappings, so multiple
 	 * manips not an issue.
 	 */
-	if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		/* try the original tuple first */
 		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	 */
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!random_port) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 79fe627b9e817..eb8543b9a5c40 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -880,9 +880,8 @@ EOF
 		return $ksft_skip
 	fi
 
-	# test default behaviour. Packet from ns1 to ns0 is not redirected
-	# due to automatic port translation.
-	test_port_shadow "default" "ROUTER"
+	# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+	test_port_shadow "default" "CLIENT"
 
 	# test packet filter based mitigation: prevent forwarding of
 	# packets claiming to come from the service port.
-- 
GitLab


From a1b6f487cb47ad99ee14730f03c80d3eb4c71e8a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Tue, 8 Mar 2022 13:41:01 +0200
Subject: [PATCH 1093/1586] turbostat: fix PC6 displaying on some systems

'MSR_PKG_CST_CONFIG_CONTROL' encodes the deepest allowed package C-state limit,
and turbostat decodes it.

Before this patch: turbostat does not recognize value "3" on Ice Lake Xeon
(ICX) and Sapphire Rapids Xeon (SPR), treats it as "unknown", and does not
display any package C-states in the results table.

After this patch: turbostat recognizes value 3 on ICX and SPR, treats it as
"PC6", and correctly displays package C-states in the results table.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 47d3ba895d6d9..bc5ae0872fed9 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2323,7 +2323,7 @@ int skx_pkg_cstate_limits[16] =
 };
 
 int icx_pkg_cstate_limits[16] =
-    { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+    { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
 	PCLRSV, PCLRSV
 };
 
-- 
GitLab


From 1280f12f56a15abde23503ba876343e5f201c9c2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 8 Feb 2022 18:56:03 +0000
Subject: [PATCH 1094/1586] drivers/perf: arm_pmu: Handle 47 bit counters

The current ARM PMU framework can only deal with 32 or 64bit counters.
Teach it about a 47bit flavour.

Yes, this is odd.

Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c       | 2 ++
 include/linux/perf/arm_pmu.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 295cc7952d0ed..0a9ed1a061aca 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
 	if (event->hw.flags & ARMPMU_EVT_64BIT)
 		return GENMASK_ULL(63, 0);
+	else if (event->hw.flags & ARMPMU_EVT_47BIT)
+		return GENMASK_ULL(46, 0);
 	else
 		return GENMASK_ULL(31, 0);
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 2512e2f9cd4e7..0407a38b470ab 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -26,6 +26,8 @@
  */
 /* Event uses a 64bit counter */
 #define ARMPMU_EVT_64BIT		1
+/* Event uses a 47bit counter */
+#define ARMPMU_EVT_47BIT		2
 
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
-- 
GitLab


From a639027a1be1d68437e1c2cac6ed16306c84ab3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 8 Feb 2022 18:56:04 +0000
Subject: [PATCH 1095/1586] drivers/perf: Add Apple icestorm/firestorm CPU PMU
 driver

Add a new, weird and wonderful driver for the equally weird Apple
PMU HW. Although the PMU itself is functional, we don't know much
about the events yet, so this can be considered as yet another
random number generator...

Nonetheless, it can reliably count at least cycles and instructions
in the usually wonky big-little way. For anything else, it of course
supports raw event numbers.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/apple_m1_pmu.h |  45 ++
 drivers/perf/Kconfig                  |   7 +
 drivers/perf/Makefile                 |   1 +
 drivers/perf/apple_m1_cpu_pmu.c       | 584 ++++++++++++++++++++++++++
 4 files changed, 637 insertions(+)
 create mode 100644 drivers/perf/apple_m1_cpu_pmu.c

diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
index b848af7faadc9..99483b19b99fc 100644
--- a/arch/arm64/include/asm/apple_m1_pmu.h
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -6,8 +6,21 @@
 #include <linux/bits.h>
 #include <asm/sysreg.h>
 
+/* Counters */
+#define SYS_IMP_APL_PMC0_EL1	sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1_EL1	sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2_EL1	sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3_EL1	sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4_EL1	sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5_EL1	sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6_EL1	sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7_EL1	sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8_EL1	sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9_EL1	sys_reg(3, 2, 15, 10, 0)
+
 /* Core PMC control register */
 #define SYS_IMP_APL_PMCR0_EL1	sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_ENABLE_0_7	GENMASK(7, 0)
 #define PMCR0_IMODE		GENMASK(10, 8)
 #define PMCR0_IMODE_OFF		0
 #define PMCR0_IMODE_PMI		1
@@ -15,5 +28,37 @@
 #define PMCR0_IMODE_HALT	3
 #define PMCR0_IMODE_FIQ		4
 #define PMCR0_IACT		BIT(11)
+#define PMCR0_PMI_ENABLE_0_7	GENMASK(19, 12)
+#define PMCR0_STOP_CNT_ON_PMI	BIT(20)
+#define PMCR0_CNT_GLOB_L2C_EVT	BIT(21)
+#define PMCR0_DEFER_PMI_TO_ERET	BIT(22)
+#define PMCR0_ALLOW_CNT_EN_EL0	BIT(30)
+#define PMCR0_CNT_ENABLE_8_9	GENMASK(33, 32)
+#define PMCR0_PMI_ENABLE_8_9	GENMASK(45, 44)
+
+#define SYS_IMP_APL_PMCR1_EL1	sys_reg(3, 1, 15, 1, 0)
+#define PMCR1_COUNT_A64_EL0_0_7	GENMASK(15, 8)
+#define PMCR1_COUNT_A64_EL1_0_7	GENMASK(23, 16)
+#define PMCR1_COUNT_A64_EL0_8_9	GENMASK(41, 40)
+#define PMCR1_COUNT_A64_EL1_8_9	GENMASK(49, 48)
+
+#define SYS_IMP_APL_PMCR2_EL1	sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3_EL1	sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4_EL1	sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0_EL1	sys_reg(3, 1, 15, 5, 0)
+#define PMESR0_EVT_CNT_2	GENMASK(7, 0)
+#define PMESR0_EVT_CNT_3	GENMASK(15, 8)
+#define PMESR0_EVT_CNT_4	GENMASK(23, 16)
+#define PMESR0_EVT_CNT_5	GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMESR1_EL1	sys_reg(3, 1, 15, 6, 0)
+#define PMESR1_EVT_CNT_6	GENMASK(7, 0)
+#define PMESR1_EVT_CNT_7	GENMASK(15, 8)
+#define PMESR1_EVT_CNT_8	GENMASK(23, 16)
+#define PMESR1_EVT_CNT_9	GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMSR_EL1	sys_reg(3, 1, 15, 13, 0)
+#define PMSR_OVERFLOW		GENMASK(9, 0)
 
 #endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e1a0c44bc6864..d4fa0dabb05ff 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -146,6 +146,13 @@ config MARVELL_CN10K_TAD_PMU
 	  Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
 	  performance monitors on CN10K family silicons.
 
+config APPLE_M1_CPU_PMU
+	bool "Apple M1 CPU PMU support"
+	depends on ARM_PMU && ARCH_APPLE
+	help
+	  Provides support for the non-architectural CPU PMUs present on
+	  the Apple M1 SoCs and derivatives.
+
 source "drivers/perf/hisilicon/Kconfig"
 
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 2db5418d5b0aa..21ad0832e3d4e 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
 obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
+obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
new file mode 100644
index 0000000000000..979a7c2b4f569
--- /dev/null
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CPU PMU driver for the Apple M1 and derivatives
+ *
+ * Copyright (C) 2021 Google LLC
+ *
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Most of the information used in this driver was provided by the
+ * Asahi Linux project. The rest was experimentally discovered.
+ */
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+#include <asm/apple_m1_pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+
+#define M1_PMU_NR_COUNTERS		10
+
+#define M1_PMU_CFG_EVENT		GENMASK(7, 0)
+
+#define ANY_BUT_0_1			GENMASK(9, 2)
+#define ONLY_2_TO_7			GENMASK(7, 2)
+#define ONLY_2_4_6			(BIT(2) | BIT(4) | BIT(6))
+#define ONLY_5_6_7			(BIT(5) | BIT(6) | BIT(7))
+
+/*
+ * Description of the events we actually know about, as well as those with
+ * a specific counter affinity. Yes, this is a grand total of two known
+ * counters, and the rest is anybody's guess.
+ *
+ * Not all counters can count all events. Counters #0 and #1 are wired to
+ * count cycles and instructions respectively, and some events have
+ * bizarre mappings (every other counter, or even *one* counter). These
+ * restrictions equally apply to both P and E cores.
+ *
+ * It is worth noting that the PMUs attached to P and E cores are likely
+ * to be different because the underlying uarches are different. At the
+ * moment, we don't really need to distinguish between the two because we
+ * know next to nothing about the events themselves, and we already have
+ * per cpu-type PMU abstractions.
+ *
+ * If we eventually find out that the events are different across
+ * implementations, we'll have to introduce per cpu-type tables.
+ */
+enum m1_pmu_events {
+	M1_PMU_PERFCTR_UNKNOWN_01	= 0x01,
+	M1_PMU_PERFCTR_CPU_CYCLES	= 0x02,
+	M1_PMU_PERFCTR_INSTRUCTIONS	= 0x8c,
+	M1_PMU_PERFCTR_UNKNOWN_8d	= 0x8d,
+	M1_PMU_PERFCTR_UNKNOWN_8e	= 0x8e,
+	M1_PMU_PERFCTR_UNKNOWN_8f	= 0x8f,
+	M1_PMU_PERFCTR_UNKNOWN_90	= 0x90,
+	M1_PMU_PERFCTR_UNKNOWN_93	= 0x93,
+	M1_PMU_PERFCTR_UNKNOWN_94	= 0x94,
+	M1_PMU_PERFCTR_UNKNOWN_95	= 0x95,
+	M1_PMU_PERFCTR_UNKNOWN_96	= 0x96,
+	M1_PMU_PERFCTR_UNKNOWN_97	= 0x97,
+	M1_PMU_PERFCTR_UNKNOWN_98	= 0x98,
+	M1_PMU_PERFCTR_UNKNOWN_99	= 0x99,
+	M1_PMU_PERFCTR_UNKNOWN_9a	= 0x9a,
+	M1_PMU_PERFCTR_UNKNOWN_9b	= 0x9b,
+	M1_PMU_PERFCTR_UNKNOWN_9c	= 0x9c,
+	M1_PMU_PERFCTR_UNKNOWN_9f	= 0x9f,
+	M1_PMU_PERFCTR_UNKNOWN_bf	= 0xbf,
+	M1_PMU_PERFCTR_UNKNOWN_c0	= 0xc0,
+	M1_PMU_PERFCTR_UNKNOWN_c1	= 0xc1,
+	M1_PMU_PERFCTR_UNKNOWN_c4	= 0xc4,
+	M1_PMU_PERFCTR_UNKNOWN_c5	= 0xc5,
+	M1_PMU_PERFCTR_UNKNOWN_c6	= 0xc6,
+	M1_PMU_PERFCTR_UNKNOWN_c8	= 0xc8,
+	M1_PMU_PERFCTR_UNKNOWN_ca	= 0xca,
+	M1_PMU_PERFCTR_UNKNOWN_cb	= 0xcb,
+	M1_PMU_PERFCTR_UNKNOWN_f5	= 0xf5,
+	M1_PMU_PERFCTR_UNKNOWN_f6	= 0xf6,
+	M1_PMU_PERFCTR_UNKNOWN_f7	= 0xf7,
+	M1_PMU_PERFCTR_UNKNOWN_f8	= 0xf8,
+	M1_PMU_PERFCTR_UNKNOWN_fd	= 0xfd,
+	M1_PMU_PERFCTR_LAST		= M1_PMU_CFG_EVENT,
+
+	/*
+	 * From this point onwards, these are not actual HW events,
+	 * but attributes that get stored in hw->config_base.
+	 */
+	M1_PMU_CFG_COUNT_USER		= BIT(8),
+	M1_PMU_CFG_COUNT_KERNEL		= BIT(9),
+};
+
+/*
+ * Per-event affinity table. Most events can be installed on counter
+ * 2-9, but there are a number of exceptions. Note that this table
+ * has been created experimentally, and I wouldn't be surprised if more
+ * counters had strange affinities.
+ */
+static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
+	[0 ... M1_PMU_PERFCTR_LAST]	= ANY_BUT_0_1,
+	[M1_PMU_PERFCTR_UNKNOWN_01]	= BIT(7),
+	[M1_PMU_PERFCTR_CPU_CYCLES]	= ANY_BUT_0_1 | BIT(0),
+	[M1_PMU_PERFCTR_INSTRUCTIONS]	= BIT(7) | BIT(1),
+	[M1_PMU_PERFCTR_UNKNOWN_8d]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8e]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8f]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_90]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_93]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_94]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_95]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_96]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_97]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_98]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_99]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9a]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_9b]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9c]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9f]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_bf]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c0]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c1]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c4]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c5]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c6]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c8]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_ca]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_cb]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_f5]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f6]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f7]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f8]	= ONLY_2_TO_7,
+	[M1_PMU_PERFCTR_UNKNOWN_fd]	= ONLY_2_4_6,
+};
+
+static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	= M1_PMU_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	= M1_PMU_PERFCTR_INSTRUCTIONS,
+	/* No idea about the rest yet */
+};
+
+/* sysfs definitions */
+static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define M1_PMU_EVENT_ATTR(name, config)					\
+	PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
+
+static struct attribute *m1_pmu_event_attrs[] = {
+	M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
+	M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = m1_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *m1_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = m1_pmu_format_attrs,
+};
+
+/* Low level accessors. No synchronisation. */
+#define PMU_READ_COUNTER(_idx)						\
+	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
+
+#define PMU_WRITE_COUNTER(_val, _idx)					\
+	case _idx:							\
+		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
+		return
+
+static u64 m1_pmu_read_hw_counter(unsigned int index)
+{
+	switch (index) {
+		PMU_READ_COUNTER(0);
+		PMU_READ_COUNTER(1);
+		PMU_READ_COUNTER(2);
+		PMU_READ_COUNTER(3);
+		PMU_READ_COUNTER(4);
+		PMU_READ_COUNTER(5);
+		PMU_READ_COUNTER(6);
+		PMU_READ_COUNTER(7);
+		PMU_READ_COUNTER(8);
+		PMU_READ_COUNTER(9);
+	}
+
+	BUG();
+}
+
+static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
+{
+	switch (index) {
+		PMU_WRITE_COUNTER(val, 0);
+		PMU_WRITE_COUNTER(val, 1);
+		PMU_WRITE_COUNTER(val, 2);
+		PMU_WRITE_COUNTER(val, 3);
+		PMU_WRITE_COUNTER(val, 4);
+		PMU_WRITE_COUNTER(val, 5);
+		PMU_WRITE_COUNTER(val, 6);
+		PMU_WRITE_COUNTER(val, 7);
+		PMU_WRITE_COUNTER(val, 8);
+		PMU_WRITE_COUNTER(val, 9);
+	}
+
+	BUG();
+}
+
+#define get_bit_offset(index, mask)	(__ffs(mask) + (index))
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, true);
+}
+
+static void m1_pmu_disable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, false);
+}
+
+static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, true);
+}
+
+static void m1_pmu_disable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, false);
+}
+
+static void m1_pmu_configure_counter(unsigned int index, u8 event,
+				     bool user, bool kernel)
+{
+	u64 val, user_bit, kernel_bit;
+	int shift;
+
+	switch (index) {
+	case 0 ... 7:
+		user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7));
+		kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7));
+		break;
+	case 8 ... 9:
+		user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9));
+		kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
+
+	if (user)
+		val |= user_bit;
+	else
+		val &= ~user_bit;
+
+	if (kernel)
+		val |= kernel_bit;
+	else
+		val &= ~kernel_bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+
+	/*
+	 * Counters 0 and 1 have fixed events. For anything else,
+	 * place the event at the expected location in the relevant
+	 * register (PMESR0 holds the event configuration for counters
+	 * 2-5, resp. PMESR1 for counters 6-9).
+	 */
+	switch (index) {
+	case 0 ... 1:
+		break;
+	case 2 ... 5:
+		shift = (index - 2) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+		break;
+	case 6 ... 9:
+		shift = (index - 6) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+		break;
+	}
+}
+
+/* arm_pmu backend */
+static void m1_pmu_enable_event(struct perf_event *event)
+{
+	bool user, kernel;
+	u8 evt;
+
+	evt = event->hw.config_base & M1_PMU_CFG_EVENT;
+	user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
+	kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
+
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+
+	m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+	m1_pmu_enable_counter(event->hw.idx);
+	m1_pmu_enable_counter_interrupt(event->hw.idx);
+	isb();
+}
+
+static void m1_pmu_disable_event(struct perf_event *event)
+{
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+}
+
+static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	u64 overflow, state;
+	int idx;
+
+	overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1);
+	if (!overflow) {
+		/* Spurious interrupt? */
+		state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+		state &= ~PMCR0_IACT;
+		write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1);
+		isb();
+		return IRQ_NONE;
+	}
+
+	cpu_pmu->stop(cpu_pmu);
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct perf_sample_data data;
+
+		if (!event)
+			continue;
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			m1_pmu_disable_event(event);
+	}
+
+	cpu_pmu->start(cpu_pmu);
+
+	return IRQ_HANDLED;
+}
+
+static u64 m1_pmu_read_counter(struct perf_event *event)
+{
+	return m1_pmu_read_hw_counter(event->hw.idx);
+}
+
+static void m1_pmu_write_counter(struct perf_event *event, u64 value)
+{
+	m1_pmu_write_hw_counter(value, event->hw.idx);
+	isb();
+}
+
+static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT;
+	unsigned long affinity = m1_pmu_event_affinity[evtype];
+	int idx;
+
+	/*
+	 * Place the event on the first free counter that can count
+	 * this event.
+	 *
+	 * We could do a better job if we had a view of all the events
+	 * counting on the PMU at any given time, and by placing the
+	 * most constraining events first.
+	 */
+	for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	return -EAGAIN;
+}
+
+static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void __m1_pmu_set_mode(u8 mode)
+{
+	u64 val;
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+	val &= ~(PMCR0_IMODE | PMCR0_IACT);
+	val |= FIELD_PREP(PMCR0_IMODE, mode);
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	isb();
+}
+
+static void m1_pmu_start(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_FIQ);
+}
+
+static void m1_pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+}
+
+static int m1_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Although the counters are 48bit wide, bit 47 is what
+	 * triggers the overflow interrupt. Advertise the counters
+	 * being 47bit wide to mimick the behaviour of the ARM PMU.
+	 */
+	event->hw.flags |= ARMPMU_EVT_47BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static void m1_pmu_reset(void *info)
+{
+	int i;
+
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+
+	for (i = 0; i < M1_PMU_NR_COUNTERS; i++) {
+		m1_pmu_disable_counter(i);
+		m1_pmu_disable_counter_interrupt(i);
+		m1_pmu_write_hw_counter(0, i);
+	}
+
+	isb();
+}
+
+static int m1_pmu_set_event_filter(struct hw_perf_event *event,
+				   struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (!attr->exclude_guest)
+		return -EINVAL;
+	if (!attr->exclude_kernel)
+		config_base |= M1_PMU_CFG_COUNT_KERNEL;
+	if (!attr->exclude_user)
+		config_base |= M1_PMU_CFG_COUNT_USER;
+
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	  = m1_pmu_handle_irq;
+	cpu_pmu->enable		  = m1_pmu_enable_event;
+	cpu_pmu->disable	  = m1_pmu_disable_event;
+	cpu_pmu->read_counter	  = m1_pmu_read_counter;
+	cpu_pmu->write_counter	  = m1_pmu_write_counter;
+	cpu_pmu->get_event_idx	  = m1_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx  = m1_pmu_clear_event_idx;
+	cpu_pmu->start		  = m1_pmu_start;
+	cpu_pmu->stop		  = m1_pmu_stop;
+	cpu_pmu->map_event	  = m1_pmu_map_event;
+	cpu_pmu->reset		  = m1_pmu_reset;
+	cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
+
+	cpu_pmu->num_events	  = M1_PMU_NR_COUNTERS;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
+	return 0;
+}
+
+/* Device driver gunk */
+static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_icestorm_pmu";
+	return m1_pmu_init(cpu_pmu);
+}
+
+static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_firestorm_pmu";
+	return m1_pmu_init(cpu_pmu);
+}
+
+static const struct of_device_id m1_pmu_of_device_ids[] = {
+	{ .compatible = "apple,icestorm-pmu",	.data = m1_pmu_ice_init, },
+	{ .compatible = "apple,firestorm-pmu",	.data = m1_pmu_fire_init, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids);
+
+static int m1_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver m1_pmu_driver = {
+	.driver		= {
+		.name			= "apple-m1-cpu-pmu",
+		.of_match_table		= m1_pmu_of_device_ids,
+		.suppress_bind_attrs	= true,
+	},
+	.probe		= m1_pmu_device_probe,
+};
+
+module_platform_driver(m1_pmu_driver);
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From f38312c9b569322edf4baae467568206fe46d57b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 23 Feb 2022 14:50:31 +0100
Subject: [PATCH 1096/1586] ACPI / x86: Add skip i2c clients quirk for Nextbook
 Ares 8

The Nextbook Ares 8 is a x86 ACPI tablet which ships with Android x86
as factory OS. Its DSDT contains a bunch of I2C devices which are not
actually there, causing various resource conflicts (the Android x86
kernel fork ignores I2C devices described in the DSDT).

Add a ACPI_QUIRK_SKIP_I2C_CLIENTS for the Nextbook Ares 8 to the
acpi_quirk_skip_dmi_ids table to woraround this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index ffdeed5334d6f..9b991294f1e55 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -284,6 +284,15 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
 					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
 	},
+	{
+		/* Nextbook Ares 8 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+	},
 	{
 		/* Whitelabel (sold as various brands) TM800A550L */
 		.matches = {
-- 
GitLab


From 4fecb1e93e4914fc0bc1fb467ca79741f9f94abb Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 23 Feb 2022 14:50:32 +0100
Subject: [PATCH 1097/1586] ACPI / x86: Add skip i2c clients quirk for Lenovo
 Yoga Tablet 1050F/L

The Yoga Tablet 1050F/L is a x86 ACPI tablet which ships with Android x86
as factory OS. Its DSDT contains a bunch of I2C devices which are not
actually there, causing various resource conflicts (the Android x86
kernel fork ignores I2C devices described in the DSDT).

Add a ACPI_QUIRK_SKIP_I2C_CLIENTS for the Nextbook Ares 8 to the
acpi_quirk_skip_dmi_ids table to woraround this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/utils.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index 9b991294f1e55..664070fc83498 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -284,6 +284,18 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
 					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
 	},
+	{
+		/* Lenovo Yoga Tablet 1050F/L */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+			DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+			/* Partial match on beginning of BIOS version */
+			DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+	},
 	{
 		/* Nextbook Ares 8 */
 		.matches = {
-- 
GitLab


From ee0a4dc9f317fb9a97f20037d219802ca8de939b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Mar 2022 17:28:38 +0100
Subject: [PATCH 1098/1586] Revert "netfilter: conntrack: tag conntracks picked
 up in local out hook"

This was a prerequisite for the ill-fated
"netfilter: nat: force port remap to prevent shadowing well-known ports".

As this has been reverted, this change can be backed out too.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_conntrack.h | 1 -
 net/netfilter/nf_conntrack_core.c    | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8731d5bcb47dd..b08b70989d2cf 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -97,7 +97,6 @@ struct nf_conn {
 	unsigned long status;
 
 	u16		cpu;
-	u16		local_origin:1;
 	possible_net_t ct_net;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d6aa5b47031eb..bf1e17c678f13 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1748,9 +1748,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
 			return 0;
 		if (IS_ERR(h))
 			return PTR_ERR(h);
-
-		ct = nf_ct_tuplehash_to_ctrack(h);
-		ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
 	}
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
-- 
GitLab


From 92abe50490d36ac2ee56f63db24adee219ed63c5 Mon Sep 17 00:00:00 2001
From: "Minghao Chi (CGEL ZTE)" <chi.minghao@zte.com.cn>
Date: Mon, 7 Mar 2022 03:36:31 +0000
Subject: [PATCH 1099/1586] hwmon: (vexpress-hwmon) Use
 of_device_get_match_data()

Use of_device_get_match_data() to simplify the code.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi (CGEL ZTE) <chi.minghao@zte.com.cn>
Link: https://lore.kernel.org/r/20220307033631.2075160-1-chi.minghao@zte.com.cn
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/vexpress-hwmon.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/vexpress-hwmon.c b/drivers/hwmon/vexpress-hwmon.c
index 44d798be3d599..2ac5fb96bba48 100644
--- a/drivers/hwmon/vexpress-hwmon.c
+++ b/drivers/hwmon/vexpress-hwmon.c
@@ -207,7 +207,6 @@ MODULE_DEVICE_TABLE(of, vexpress_hwmon_of_match);
 
 static int vexpress_hwmon_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *match;
 	struct vexpress_hwmon_data *data;
 	const struct vexpress_hwmon_type *type;
 
@@ -216,10 +215,9 @@ static int vexpress_hwmon_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	platform_set_drvdata(pdev, data);
 
-	match = of_match_device(vexpress_hwmon_of_match, &pdev->dev);
-	if (!match)
+	type = of_device_get_match_data(&pdev->dev);
+	if (!type)
 		return -ENODEV;
-	type = match->data;
 
 	data->reg = devm_regmap_init_vexpress_config(&pdev->dev);
 	if (IS_ERR(data->reg))
-- 
GitLab


From 5d4a2ea96b79e7e11f1551130bb9166258b71243 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 8 Mar 2022 14:54:08 +0100
Subject: [PATCH 1100/1586] hwmon: (axi-fan-control) Use hwmon_notify_event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of directly accessing kobj directly from the driver, use the
hwmon notify API.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20220308135408.440744-1-nuno.sa@analog.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/axi-fan-control.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/axi-fan-control.c b/drivers/hwmon/axi-fan-control.c
index d2092c17d9935..96c4a5c45291f 100644
--- a/drivers/hwmon/axi-fan-control.c
+++ b/drivers/hwmon/axi-fan-control.c
@@ -339,7 +339,8 @@ static irqreturn_t axi_fan_control_irq_handler(int irq, void *data)
 			ctl->update_tacho_params = true;
 		} else {
 			ctl->hw_pwm_req = false;
-			sysfs_notify(&ctl->hdev->kobj, NULL, "pwm1");
+			hwmon_notify_event(ctl->hdev, hwmon_pwm,
+					   hwmon_pwm_input, 0);
 		}
 	}
 
-- 
GitLab


From 2594703044ad2ef7ea7d463bf2cf82a13f7fc266 Mon Sep 17 00:00:00 2001
From: Georgi Vlaev <g-vlaev@ti.com>
Date: Tue, 8 Mar 2022 15:44:54 +0200
Subject: [PATCH 1101/1586] regulator: Convert TPS62360 binding to json-schema

Convert the TPS62360 regulator binding to DT schema format
using json-schema.

This also removes redundant "ti,enable-force-pwm" property
from the example. The property description was removed from the
original text binding file by commit 9a00630c3db8 ("regulator:
tps62360: support force PWM mode via regulator mode"), but a
reference to it remained in the example.

Signed-off-by: Georgi Vlaev <g-vlaev@ti.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Laxman Dewangan <ldewangan@nvidia.com>
Link: https://lore.kernel.org/r/20220308134454.14504-1-g-vlaev@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/ti,tps62360.yaml       | 98 +++++++++++++++++++
 .../bindings/regulator/tps62360-regulator.txt | 44 ---------
 2 files changed, 98 insertions(+), 44 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/ti,tps62360.yaml
 delete mode 100644 Documentation/devicetree/bindings/regulator/tps62360-regulator.txt

diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml
new file mode 100644
index 0000000000000..12aeddedde05c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62360.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/ti,tps62360.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TPS6236x Voltage Regulators
+
+maintainers:
+  - Laxman Dewangan <ldewangan@nvidia.com>
+
+description: |
+  The TPS6236x are a family of step down dc-dc converter with
+  an input voltage range of 2.5V to 5.5V. The devices provide
+  up to 3A peak load current, and an output voltage range of
+  0.77V to 1.4V (TPS62360/62) and 0.5V to 1.77V (TPS62361B/63).
+
+  Datasheet is available at:
+  https://www.ti.com/lit/gpn/tps62360
+
+allOf:
+  - $ref: "regulator.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - ti,tps62360
+      - ti,tps62361
+      - ti,tps62362
+      - ti,tps62363
+
+  reg:
+    maxItems: 1
+
+  ti,vsel0-gpio:
+    description: |
+      GPIO for controlling VSEL0 line. If this property
+      is missing, then assume that there is no GPIO for
+      VSEL0 control.
+    maxItems: 1
+
+  ti,vsel1-gpio:
+    description: |
+      GPIO for controlling VSEL1 line. If this property
+      is missing, then assume that there is no GPIO for
+      VSEL1 control.
+    maxItems: 1
+
+  ti,enable-vout-discharge:
+    description: Enable output discharge.
+    type: boolean
+
+  ti,enable-pull-down:
+    description: Enable pull down.
+    type: boolean
+
+  ti,vsel0-state-high:
+    description: |
+      Initial state of VSEL0 input is high. If this property
+      is missing, then assume the state as low.
+    type: boolean
+
+  ti,vsel1-state-high:
+    description: |
+      Initial state of VSEL1 input is high. If this property
+      is missing, then assume the state as low.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        regulator@60 {
+            compatible = "ti,tps62361";
+            reg = <0x60>;
+            regulator-name = "tps62361-vout";
+            regulator-min-microvolt = <500000>;
+            regulator-max-microvolt = <1500000>;
+            regulator-boot-on;
+            ti,vsel0-gpio = <&gpio1 16 GPIO_ACTIVE_HIGH>;
+            ti,vsel1-gpio = <&gpio1 17 GPIO_ACTIVE_HIGH>;
+            ti,vsel0-state-high;
+            ti,vsel1-state-high;
+            ti,enable-pull-down;
+            ti,enable-vout-discharge;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt b/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
deleted file mode 100644
index 1b20c3dbcdb89..0000000000000
--- a/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-TPS62360 Voltage regulators
-
-Required properties:
-- compatible: Must be one of the following.
-	"ti,tps62360"
-	"ti,tps62361",
-	"ti,tps62362",
-	"ti,tps62363",
-- reg: I2C slave address
-
-Optional properties:
-- ti,enable-vout-discharge: Enable output discharge. This is boolean value.
-- ti,enable-pull-down: Enable pull down. This is boolean value.
-- ti,vsel0-gpio: GPIO for controlling VSEL0 line.
-  If this property is missing, then assume that there is no GPIO
-  for vsel0 control.
-- ti,vsel1-gpio: Gpio for controlling VSEL1 line.
-  If this property is missing, then assume that there is no GPIO
-  for vsel1 control.
-- ti,vsel0-state-high: Initial state of vsel0 input is high.
-  If this property is missing, then assume the state as low (0).
-- ti,vsel1-state-high: Initial state of vsel1 input is high.
-  If this property is missing, then assume the state as low (0).
-
-Any property defined as part of the core regulator binding, defined in
-regulator.txt, can also be used.
-
-Example:
-
-	abc: tps62360 {
-		compatible = "ti,tps62361";
-		reg =  <0x60>;
-		regulator-name = "tps62361-vout";
-		regulator-min-microvolt = <500000>;
-		regulator-max-microvolt = <1500000>;
-		regulator-boot-on
-		ti,vsel0-gpio = <&gpio1 16 0>;
-		ti,vsel1-gpio = <&gpio1 17 0>;
-		ti,vsel0-state-high;
-		ti,vsel1-state-high;
-		ti,enable-pull-down;
-		ti,enable-force-pwm;
-		ti,enable-vout-discharge;
-	};
-- 
GitLab


From 363d3c51bc5b3243b5b035a1f50d6d994a1b203f Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Tue, 8 Mar 2022 17:46:39 +0530
Subject: [PATCH 1102/1586] spi: dt-bindings: samsung: Add fsd spi compatible

Adds spi controller dt-binding compatible information for
Tesla Full Self-Driving (FSD) SoC.

Cc: linux-fsd@tesla.com
Signed-off-by: Adithya K V <adithya.kv@samsung.com>
Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Andi Shyti <andi@etezian.org>
Link: https://lore.kernel.org/r/20220308121640.27344-1-alim.akhtar@samsung.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/samsung,spi.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
index 61c77088e8ee6..bf9a76d931d24 100644
--- a/Documentation/devicetree/bindings/spi/samsung,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
@@ -21,6 +21,7 @@ properties:
           - samsung,s3c6410-spi
           - samsung,s5pv210-spi # for S5PV210 and S5PC110
           - samsung,exynos5433-spi
+          - tesla,fsd-spi
       - const: samsung,exynos7-spi
         deprecated: true
 
-- 
GitLab


From 4ebb15a15799da4954f1d4926fcd3263ea46e417 Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Tue, 8 Mar 2022 17:46:40 +0530
Subject: [PATCH 1103/1586] spi: s3c64xx: Add spi port configuration for Tesla
 FSD SoC

Add compatible and port configuration for spi controller
for Tesla Full Self-Driving SoC.

Cc: linux-fsd@tesla.com
Signed-off-by: Aswani Reddy <aswani.reddy@samsung.com>
Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Andi Shyti <andi@etezian.org>
Link: https://lore.kernel.org/r/20220308121640.27344-2-alim.akhtar@samsung.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-s3c64xx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 386550fca81cc..423518bf02704 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -1440,6 +1440,16 @@ static const struct s3c64xx_spi_port_config exynos5433_spi_port_config = {
 	.quirks		= S3C64XX_SPI_QUIRK_CS_AUTO,
 };
 
+static struct s3c64xx_spi_port_config fsd_spi_port_config = {
+	.fifo_lvl_mask	= { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f},
+	.rx_lvl_offset	= 15,
+	.tx_st_done	= 25,
+	.high_speed	= true,
+	.clk_from_cmu	= true,
+	.clk_ioclk	= false,
+	.quirks		= S3C64XX_SPI_QUIRK_CS_AUTO,
+};
+
 static const struct platform_device_id s3c64xx_spi_driver_ids[] = {
 	{
 		.name		= "s3c2443-spi",
@@ -1470,6 +1480,9 @@ static const struct of_device_id s3c64xx_spi_dt_match[] = {
 	{ .compatible = "samsung,exynos5433-spi",
 			.data = (void *)&exynos5433_spi_port_config,
 	},
+	{ .compatible = "tesla,fsd-spi",
+			.data = (void *)&fsd_spi_port_config,
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, s3c64xx_spi_dt_match);
-- 
GitLab


From fd8af343a299df264c41b1219358f502c7b5b18a Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 14 Feb 2022 18:14:47 +0800
Subject: [PATCH 1104/1586] x86/ACPI: CPPC: Rename cppc_msr.c to cppc.c

Rename the cppc_msr.c to cppc.c in x86 ACPI, that expects to use this file
to cover more function implementation for ACPI CPPC beside MSR helpers.
Naming as "cppc" is more straightforward as one of the functionalities
under ACPI subsystem.

Signed-off-by: Huang Rui <ray.huang@amd.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/Makefile               | 2 +-
 arch/x86/kernel/acpi/{cppc_msr.c => cppc.c} | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename arch/x86/kernel/acpi/{cppc_msr.c => cppc.c} (96%)

diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index cf340d85946a8..fc17b3f136fec 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -3,7 +3,7 @@
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
-obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc_msr.o
+obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc.c
similarity index 96%
rename from arch/x86/kernel/acpi/cppc_msr.c
rename to arch/x86/kernel/acpi/cppc.c
index b961de569e7e5..08d823d72586e 100644
--- a/arch/x86/kernel/acpi/cppc_msr.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * cppc_msr.c:  MSR Interface for CPPC
+ * cppc.c: CPPC Interface for x86
  * Copyright (c) 2016, Intel Corporation.
  */
 
-- 
GitLab


From 82d89369141b8fab56deff540121b8d09d289bfd Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 14 Feb 2022 18:14:48 +0800
Subject: [PATCH 1105/1586] x86/ACPI: CPPC: Move AMD maximum frequency ratio
 setting function into x86 CPPC

The AMD maximum frequency ratio setting function depends on CPPC, so the
x86 CPPC implementation file is better space for this function.

Signed-off-by: Huang Rui <ray.huang@amd.com>
[ rjw: Subject adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/topology.h |  9 +++++++
 arch/x86/kernel/acpi/cppc.c     | 40 ++++++++++++++++++++++++++++++
 arch/x86/kernel/smpboot.c       | 44 +--------------------------------
 3 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 2f0b6be8eaabc..168ade7d40078 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,4 +226,13 @@ void init_freq_invariance_cppc(void);
 #define init_freq_invariance_cppc init_freq_invariance_cppc
 #endif
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+bool amd_set_max_freq_ratio(u64 *ratio);
+#else
+static inline bool amd_set_max_freq_ratio(u64 *ratio)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index 08d823d72586e..f0be5058e3e3e 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -6,6 +6,8 @@
 
 #include <acpi/cppc_acpi.h>
 #include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/topology.h>
 
 /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
 
@@ -47,3 +49,41 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
 	}
 	return err;
 }
+
+bool amd_set_max_freq_ratio(u64 *ratio)
+{
+	struct cppc_perf_caps perf_caps;
+	u64 highest_perf, nominal_perf;
+	u64 perf_ratio;
+	int rc;
+
+	if (!ratio)
+		return false;
+
+	rc = cppc_get_perf_caps(0, &perf_caps);
+	if (rc) {
+		pr_debug("Could not retrieve perf counters (%d)\n", rc);
+		return false;
+	}
+
+	highest_perf = amd_get_highest_perf();
+	nominal_perf = perf_caps.nominal_perf;
+
+	if (!highest_perf || !nominal_perf) {
+		pr_debug("Could not retrieve highest or nominal performance\n");
+		return false;
+	}
+
+	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	/* midpoint between max_boost and max_P */
+	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
+	if (!perf_ratio) {
+		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+		return false;
+	}
+
+	*ratio = perf_ratio;
+	arch_set_max_freq_ratio(false);
+
+	return true;
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 617012f4619f0..0718cc7649a44 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2097,48 +2097,6 @@ out:
 	return true;
 }
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-static bool amd_set_max_freq_ratio(void)
-{
-	struct cppc_perf_caps perf_caps;
-	u64 highest_perf, nominal_perf;
-	u64 perf_ratio;
-	int rc;
-
-	rc = cppc_get_perf_caps(0, &perf_caps);
-	if (rc) {
-		pr_debug("Could not retrieve perf counters (%d)\n", rc);
-		return false;
-	}
-
-	highest_perf = amd_get_highest_perf();
-	nominal_perf = perf_caps.nominal_perf;
-
-	if (!highest_perf || !nominal_perf) {
-		pr_debug("Could not retrieve highest or nominal performance\n");
-		return false;
-	}
-
-	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
-	/* midpoint between max_boost and max_P */
-	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
-	if (!perf_ratio) {
-		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
-		return false;
-	}
-
-	arch_turbo_freq_ratio = perf_ratio;
-	arch_set_max_freq_ratio(false);
-
-	return true;
-}
-#else
-static bool amd_set_max_freq_ratio(void)
-{
-	return false;
-}
-#endif
-
 static void init_counter_refs(void)
 {
 	u64 aperf, mperf;
@@ -2187,7 +2145,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
 		if (!cppc_ready) {
 			return;
 		}
-		ret = amd_set_max_freq_ratio();
+		ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
 	}
 
 	if (ret) {
-- 
GitLab


From 666f6ecf35bc3a14ab01fa761a7e17afac04dcd5 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 14 Feb 2022 18:14:49 +0800
Subject: [PATCH 1106/1586] x86: Expose init_freq_invariance() to topology
 header

The function init_freq_invariance will be used on x86 CPPC, so expose it in
the topology header.

Signed-off-by: Huang Rui <ray.huang@amd.com>
[ rjw: Subject adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/topology.h | 4 ++++
 arch/x86/kernel/smpboot.c       | 8 +-------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 168ade7d40078..c35005a037683 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -215,10 +215,14 @@ extern void arch_scale_freq_tick(void);
 #define arch_scale_freq_tick arch_scale_freq_tick
 
 extern void arch_set_max_freq_ratio(bool turbo_disabled);
+void init_freq_invariance(bool secondary, bool cppc_ready);
 #else
 static inline void arch_set_max_freq_ratio(bool turbo_disabled)
 {
 }
+static inline void init_freq_invariance(bool secondary, bool cppc_ready)
+{
+}
 #endif
 
 #if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0718cc7649a44..8f2ff9be0fcc3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -155,8 +155,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
 	*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
 }
 
-static void init_freq_invariance(bool secondary, bool cppc_ready);
-
 /*
  * Report back to the Boot Processor during boot time or to the caller processor
  * during CPU online.
@@ -2125,7 +2123,7 @@ static void register_freq_invariance_syscore_ops(void)
 static inline void register_freq_invariance_syscore_ops(void) {}
 #endif
 
-static void init_freq_invariance(bool secondary, bool cppc_ready)
+void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 	bool ret = false;
 
@@ -2222,8 +2220,4 @@ error:
 	pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
 	schedule_work(&disable_freq_invariance_work);
 }
-#else
-static inline void init_freq_invariance(bool secondary, bool cppc_ready)
-{
-}
 #endif /* CONFIG_X86_64 */
-- 
GitLab


From eb5616d4adeebf958a42822ee440d79e9f803bf7 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 14 Feb 2022 18:14:50 +0800
Subject: [PATCH 1107/1586] x86/ACPI: CPPC: Move init_freq_invariance_cppc()
 into x86 CPPC

The init_freq_invariance_cppc code actually doesn't need the SMP
functionality. So setting the CONFIG_SMP as the check condition for
init_freq_invariance_cppc may cause the confusion to misunderstand the
CPPC. And the x86 CPPC file is better space to store the CPPC related
functions, while the init_freq_invariance_cppc is out of smpboot, that
means, the CONFIG_SMP won't be mandatory condition any more. And It's more
clear than before.

Signed-off-by: Huang Rui <ray.huang@amd.com>
[ rjw: Subject adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/topology.h |  4 +---
 arch/x86/kernel/acpi/cppc.c     | 14 ++++++++++++++
 arch/x86/kernel/smpboot.c       | 20 --------------------
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c35005a037683..9c73d62f72760 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -225,12 +225,10 @@ static inline void init_freq_invariance(bool secondary, bool cppc_ready)
 }
 #endif
 
-#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP)
+#ifdef CONFIG_ACPI_CPPC_LIB
 void init_freq_invariance_cppc(void);
 #define init_freq_invariance_cppc init_freq_invariance_cppc
-#endif
 
-#ifdef CONFIG_ACPI_CPPC_LIB
 bool amd_set_max_freq_ratio(u64 *ratio);
 #else
 static inline bool amd_set_max_freq_ratio(u64 *ratio)
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index f0be5058e3e3e..df1644d9b3b66 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -87,3 +87,17 @@ bool amd_set_max_freq_ratio(u64 *ratio)
 
 	return true;
 }
+
+static DEFINE_MUTEX(freq_invariance_lock);
+
+void init_freq_invariance_cppc(void)
+{
+	static bool secondary;
+
+	mutex_lock(&freq_invariance_lock);
+
+	init_freq_invariance(secondary, true);
+	secondary = true;
+
+	mutex_unlock(&freq_invariance_lock);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8f2ff9be0fcc3..2ef14772dc047 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -83,10 +83,6 @@
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-#include <acpi/cppc_acpi.h>
-#endif
-
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -2156,22 +2152,6 @@ void init_freq_invariance(bool secondary, bool cppc_ready)
 	}
 }
 
-#ifdef CONFIG_ACPI_CPPC_LIB
-static DEFINE_MUTEX(freq_invariance_lock);
-
-void init_freq_invariance_cppc(void)
-{
-	static bool secondary;
-
-	mutex_lock(&freq_invariance_lock);
-
-	init_freq_invariance(secondary, true);
-	secondary = true;
-
-	mutex_unlock(&freq_invariance_lock);
-}
-#endif
-
 static void disable_freq_invariance_workfn(struct work_struct *work)
 {
 	static_branch_disable(&arch_scale_freq_key);
-- 
GitLab


From f3303ff649dbf7dcdc6a6e1a922235b12b3028f4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 5 Mar 2022 18:46:20 -0800
Subject: [PATCH 1108/1586] ACPI: APEI: fix return value of __setup handlers

__setup() handlers should return 1 to indicate that the boot option
has been handled. Returning 0 causes a boot option to be listed in
the Unknown kernel command line parameters and also added to init's
arg list (if no '=' sign) or environment list (if of the form 'a=b').

Unknown kernel command line parameters "erst_disable
  bert_disable hest_disable BOOT_IMAGE=/boot/bzImage-517rc6", will be
  passed to user space.

 Run /sbin/init as init process
   with arguments:
     /sbin/init
     erst_disable
     bert_disable
     hest_disable
   with environment:
     HOME=/
     TERM=linux
     BOOT_IMAGE=/boot/bzImage-517rc6

Fixes: a3e2acc5e37b ("ACPI / APEI: Add Boot Error Record Table (BERT) support")
Fixes: a08f82d08053 ("ACPI, APEI, Error Record Serialization Table (ERST) support")
Fixes: 9dc966641677 ("ACPI, APEI, HEST table parsing")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/bert.c | 2 +-
 drivers/acpi/apei/erst.c | 2 +-
 drivers/acpi/apei/hest.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 19e50fcbf4d6f..86211422f4eee 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -77,7 +77,7 @@ static int __init setup_bert_disable(char *str)
 {
 	bert_disable = 1;
 
-	return 0;
+	return 1;
 }
 __setup("bert_disable", setup_bert_disable);
 
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 242f3c2d55330..698d67cee0527 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear);
 static int __init setup_erst_disable(char *str)
 {
 	erst_disable = 1;
-	return 0;
+	return 1;
 }
 
 __setup("erst_disable", setup_erst_disable);
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 0edc1ed476737..6aef1ee5e1bdb 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -224,7 +224,7 @@ err:
 static int __init setup_hest_disable(char *str)
 {
 	hest_disable = HEST_DISABLED;
-	return 0;
+	return 1;
 }
 
 __setup("hest_disable", setup_hest_disable);
-- 
GitLab


From a99a3e2efaf1f4454eb5c9176f47e66de075b134 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 31 Jan 2022 11:50:46 -0600
Subject: [PATCH 1109/1586] coredump: Move definition of struct coredump_params
 into coredump.h

Move the definition of struct coredump_params into coredump.h where
it belongs.

Remove the slightly errorneous comment explaining why struct
coredump_params was declared in binfmts.h.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_flat.c         |  1 +
 include/linux/binfmts.h  | 13 +------------
 include/linux/coredump.h | 12 +++++++++++-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5f0bf24bb3b85..208cdce16de1b 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -37,6 +37,7 @@
 #include <linux/flat.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/coredump.h>
 
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 5d651c219c99b..3dc20c4f394cf 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -8,6 +8,7 @@
 #include <uapi/linux/binfmts.h>
 
 struct filename;
+struct coredump_params;
 
 #define CORENAME_MAX_SIZE 128
 
@@ -77,18 +78,6 @@ struct linux_binprm {
 #define BINPRM_FLAGS_PRESERVE_ARGV0_BIT 3
 #define BINPRM_FLAGS_PRESERVE_ARGV0 (1 << BINPRM_FLAGS_PRESERVE_ARGV0_BIT)
 
-/* Function parameter for binfmt->coredump */
-struct coredump_params {
-	const kernel_siginfo_t *siginfo;
-	struct pt_regs *regs;
-	struct file *file;
-	unsigned long limit;
-	unsigned long mm_flags;
-	loff_t written;
-	loff_t pos;
-	loff_t to_skip;
-};
-
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 248a68c668b45..2ee1460a1d66e 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,11 +14,21 @@ struct core_vma_metadata {
 	unsigned long dump_size;
 };
 
+struct coredump_params {
+	const kernel_siginfo_t *siginfo;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+	unsigned long mm_flags;
+	loff_t written;
+	loff_t pos;
+	loff_t to_skip;
+};
+
 /*
  * These are the only things you should do on a core-file: use only these
  * functions to write out all the necessary info.
  */
-struct coredump_params;
 extern void dump_skip_to(struct coredump_params *cprm, unsigned long to);
 extern void dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
-- 
GitLab


From e702196bf85778f2c5527ca47f33ef2e2fca8297 Mon Sep 17 00:00:00 2001
From: Mark Cilissen <mark@yotsuba.nl>
Date: Mon, 7 Mar 2022 04:16:58 +0100
Subject: [PATCH 1110/1586] ACPI / x86: Work around broken XSDT on Advantech
 DAC-BJ01 board

On this board the ACPI RSDP structure points to both a RSDT and an XSDT,
but the XSDT points to a truncated FADT. This causes all sorts of trouble
and usually a complete failure to boot after the following error occurs:

  ACPI Error: Unsupported address space: 0x20 (*/hwregs-*)
  ACPI Error: AE_SUPPORT, Unable to initialize fixed events (*/evevent-*)
  ACPI: Unable to start ACPI Interpreter

This leaves the ACPI implementation in such a broken state that subsequent
kernel subsystem initialisations go wrong, resulting in among others
mismapped PCI memory, SATA and USB enumeration failures, and freezes.

As this is an older embedded platform that will likely never see any BIOS
updates to address this issue and its default shipping OS only complies to
ACPI 1.0, work around this by forcing `acpi=rsdt`. This patch, applied on
top of Linux 5.10.102, was confirmed on real hardware to fix the issue.

Signed-off-by: Mark Cilissen <mark@yotsuba.nl>
Cc: All applicable <stable@vger.kernel.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5b6d1a95776f0..0d01e7f5078c2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d)
 	return 0;
 }
 
+static int __init disable_acpi_xsdt(const struct dmi_system_id *d)
+{
+	if (!acpi_force) {
+		pr_notice("%s detected: force use of acpi=rsdt\n", d->ident);
+		acpi_gbl_do_not_use_xsdt = TRUE;
+	} else {
+		pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n");
+	}
+	return 0;
+}
+
 static int __init dmi_disable_acpi(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
@@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = {
 		     DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
 		     },
 	 },
+	/*
+	 * Boxes that need ACPI XSDT use disabled due to corrupted tables
+	 */
+	{
+	 .callback = disable_acpi_xsdt,
+	 .ident = "Advantech DAC-BJ01",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"),
+		     DMI_MATCH(DMI_BIOS_VERSION, "V1.12"),
+		     DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"),
+		     },
+	 },
 	{}
 };
 
-- 
GitLab


From 95c5436a4883841588dae86fb0b9325f47ba5ad3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 8 Mar 2022 12:55:29 -0600
Subject: [PATCH 1111/1586] coredump: Snapshot the vmas in do_coredump

Move the call of dump_vma_snapshot and kvfree(vma_meta) out of the
individual coredump routines into do_coredump itself.  This makes
the code less error prone and easier to maintain.

Make the vma snapshot available to the coredump routines
in struct coredump_params.  This makes it easier to
change and update what is captures in the vma snapshot
and will be needed for fixing fill_file_notes.

Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_elf.c          | 20 +++++++-------------
 fs/binfmt_elf_fdpic.c    | 18 ++++++------------
 fs/coredump.c            | 41 ++++++++++++++++++++++------------------
 include/linux/coredump.h |  6 +++---
 4 files changed, 39 insertions(+), 46 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index eaf39b1bdbbb8..5710467cf4b60 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2191,8 +2191,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
-	int vma_count, segs, i;
-	size_t vma_data_size;
+	int segs, i;
 	struct elfhdr elf;
 	loff_t offset = 0, dataoff;
 	struct elf_note_info info = { };
@@ -2200,16 +2199,12 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
-	struct core_vma_metadata *vma_meta;
-
-	if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
-		return 0;
 
 	/*
 	 * The number of segs are recored into ELF header as 16bit value.
 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
 	 */
-	segs = vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs();
 
 	/* for notes section */
 	segs++;
@@ -2248,7 +2243,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	offset += vma_data_size;
+	offset += cprm->vma_data_size;
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -2268,8 +2263,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
-	for (i = 0; i < vma_count; i++) {
-		struct core_vma_metadata *meta = vma_meta + i;
+	for (i = 0; i < cprm->vma_count; i++) {
+		struct core_vma_metadata *meta = cprm->vma_meta + i;
 		struct elf_phdr phdr;
 
 		phdr.p_type = PT_LOAD;
@@ -2306,8 +2301,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 	/* Align to page */
 	dump_skip_to(cprm, dataoff);
 
-	for (i = 0; i < vma_count; i++) {
-		struct core_vma_metadata *meta = vma_meta + i;
+	for (i = 0; i < cprm->vma_count; i++) {
+		struct core_vma_metadata *meta = cprm->vma_meta + i;
 
 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
 			goto end_coredump;
@@ -2324,7 +2319,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 end_coredump:
 	free_note_info(&info);
 	kfree(shdr4extnum);
-	kvfree(vma_meta);
 	kfree(phdr4note);
 	return has_dumped;
 }
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7fa6e6632d9df..08d0c8797828c 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1465,7 +1465,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
 static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
-	int vma_count, segs;
+	int segs;
 	int i;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff;
@@ -1480,8 +1480,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	elf_addr_t e_shoff;
 	struct core_thread *ct;
 	struct elf_thread_status *tmp;
-	struct core_vma_metadata *vma_meta = NULL;
-	size_t vma_data_size;
 
 	/* alloc memory for large data structures: too large to be on stack */
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
@@ -1491,9 +1489,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (!psinfo)
 		goto end_coredump;
 
-	if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
-		goto end_coredump;
-
 	for (ct = current->signal->core_state->dumper.next;
 					ct; ct = ct->next) {
 		tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
@@ -1513,7 +1508,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	tmp->next = thread_list;
 	thread_list = tmp;
 
-	segs = vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs();
 
 	/* for notes section */
 	segs++;
@@ -1558,7 +1553,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	/* Page-align dumped data */
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	offset += vma_data_size;
+	offset += cprm->vma_data_size;
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -1578,8 +1573,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* write program headers for segments dump */
-	for (i = 0; i < vma_count; i++) {
-		struct core_vma_metadata *meta = vma_meta + i;
+	for (i = 0; i < cprm->vma_count; i++) {
+		struct core_vma_metadata *meta = cprm->vma_meta + i;
 		struct elf_phdr phdr;
 		size_t sz;
 
@@ -1628,7 +1623,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 	dump_skip_to(cprm, dataoff);
 
-	if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
+	if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count))
 		goto end_coredump;
 
 	if (!elf_core_write_extra_data(cprm))
@@ -1652,7 +1647,6 @@ end_coredump:
 		thread_list = thread_list->next;
 		kfree(tmp);
 	}
-	kvfree(vma_meta);
 	kfree(phdr4note);
 	kfree(elf);
 	kfree(psinfo);
diff --git a/fs/coredump.c b/fs/coredump.c
index b73817712dd25..88ac67994d03d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -54,6 +54,8 @@
 
 #include <trace/events/sched.h>
 
+static bool dump_vma_snapshot(struct coredump_params *cprm);
+
 static int core_uses_pid;
 static unsigned int core_pipe_limit;
 static char core_pattern[CORENAME_MAX_SIZE] = "core";
@@ -532,6 +534,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 		 * by any locks.
 		 */
 		.mm_flags = mm->flags,
+		.vma_meta = NULL,
 	};
 
 	audit_core_dumps(siginfo->si_signo);
@@ -746,6 +749,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 			pr_info("Core dump to |%s disabled\n", cn.corename);
 			goto close_fail;
 		}
+		if (!dump_vma_snapshot(&cprm))
+			goto close_fail;
+
 		file_start_write(cprm.file);
 		core_dumped = binfmt->core_dump(&cprm);
 		/*
@@ -759,6 +765,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 			dump_emit(&cprm, "", 1);
 		}
 		file_end_write(cprm.file);
+		kvfree(cprm.vma_meta);
 	}
 	if (ispipe && core_pipe_limit)
 		wait_for_dump_helpers(cprm.file);
@@ -1096,14 +1103,11 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  * Under the mmap_lock, take a snapshot of relevant information about the task's
  * VMAs.
  */
-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
-		      struct core_vma_metadata **vma_meta,
-		      size_t *vma_data_size_ptr)
+static bool dump_vma_snapshot(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma, *gate_vma;
 	struct mm_struct *mm = current->mm;
 	int i;
-	size_t vma_data_size = 0;
 
 	/*
 	 * Once the stack expansion code is fixed to not change VMA bounds
@@ -1111,20 +1115,21 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 	 * mmap_lock in read mode.
 	 */
 	if (mmap_write_lock_killable(mm))
-		return -EINTR;
+		return false;
 
+	cprm->vma_data_size = 0;
 	gate_vma = get_gate_vma(mm);
-	*vma_count = mm->map_count + (gate_vma ? 1 : 0);
+	cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
 
-	*vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
-	if (!*vma_meta) {
+	cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
+	if (!cprm->vma_meta) {
 		mmap_write_unlock(mm);
-		return -ENOMEM;
+		return false;
 	}
 
 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma), i++) {
-		struct core_vma_metadata *m = (*vma_meta) + i;
+		struct core_vma_metadata *m = cprm->vma_meta + i;
 
 		m->start = vma->vm_start;
 		m->end = vma->vm_end;
@@ -1134,13 +1139,14 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 
 	mmap_write_unlock(mm);
 
-	if (WARN_ON(i != *vma_count)) {
-		kvfree(*vma_meta);
-		return -EFAULT;
+	if (WARN_ON(i != cprm->vma_count)) {
+		kvfree(cprm->vma_meta);
+		return false;
 	}
 
-	for (i = 0; i < *vma_count; i++) {
-		struct core_vma_metadata *m = (*vma_meta) + i;
+
+	for (i = 0; i < cprm->vma_count; i++) {
+		struct core_vma_metadata *m = cprm->vma_meta + i;
 
 		if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
 			char elfmag[SELFMAG];
@@ -1153,9 +1159,8 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 			}
 		}
 
-		vma_data_size += m->dump_size;
+		cprm->vma_data_size += m->dump_size;
 	}
 
-	*vma_data_size_ptr = vma_data_size;
-	return 0;
+	return true;
 }
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 2ee1460a1d66e..7d05370e555ee 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -23,6 +23,9 @@ struct coredump_params {
 	loff_t written;
 	loff_t pos;
 	loff_t to_skip;
+	int vma_count;
+	size_t vma_data_size;
+	struct core_vma_metadata *vma_meta;
 };
 
 /*
@@ -35,9 +38,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 int dump_user_range(struct coredump_params *cprm, unsigned long start,
 		    unsigned long len);
-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
-		      struct core_vma_metadata **vma_meta,
-		      size_t *vma_data_size_ptr);
 extern void do_coredump(const kernel_siginfo_t *siginfo);
 #else
 static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
-- 
GitLab


From a759de6991b35ad437adba32b5f0cb2fd9e75929 Mon Sep 17 00:00:00 2001
From: Youngjin Jang <yj84.jang@samsung.com>
Date: Tue, 8 Mar 2022 04:07:39 +0900
Subject: [PATCH 1112/1586] PM: sleep: Add device name to
 suspend_report_result()

Currently, suspend_report_result() prints only function information.

If any driver uses a common PM function, nobody knows who exactly
called the failing function.

A device pinter is needed to recognize the failing device.

For example:

 PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0
 PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0

become after the change:

 serial 00:05: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 0
 pci 0000:00:01.3: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x150 returns 0

Signed-off-by: Youngjin Jang <yj84.jang@samsung.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c  | 10 +++++-----
 drivers/pci/pci-driver.c   | 14 +++++++-------
 drivers/pnp/driver.c       |  2 +-
 drivers/usb/core/hcd-pci.c |  4 ++--
 include/linux/pm.h         |  8 ++++----
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 04ea92cbd9cfd..41e17b8c2c209 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -485,7 +485,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -1568,7 +1568,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
 	trace_device_pm_callback_end(dev, error);
-	suspend_report_result(cb, error);
+	suspend_report_result(dev, cb, error);
 
 	initcall_debug_report(dev, calltime, cb, error);
 
@@ -1855,7 +1855,7 @@ unlock:
 	device_unlock(dev);
 
 	if (ret < 0) {
-		suspend_report_result(callback, ret);
+		suspend_report_result(dev, callback, ret);
 		pm_runtime_put(dev);
 		return ret;
 	}
@@ -1960,10 +1960,10 @@ int dpm_suspend_start(pm_message_t state)
 }
 EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
-void __suspend_report_result(const char *function, void *fn, int ret)
+void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
 {
 	if (ret)
-		pr_err("%s(): %pS returns %d\n", function, fn, ret);
+		dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret);
 }
 EXPORT_SYMBOL_GPL(__suspend_report_result);
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 588588cfda481..415f7664b0108 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -596,7 +596,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 		int error;
 
 		error = drv->suspend(pci_dev, state);
-		suspend_report_result(drv->suspend, error);
+		suspend_report_result(dev, drv->suspend, error);
 		if (error)
 			return error;
 
@@ -775,7 +775,7 @@ static int pci_pm_suspend(struct device *dev)
 		int error;
 
 		error = pm->suspend(dev);
-		suspend_report_result(pm->suspend, error);
+		suspend_report_result(dev, pm->suspend, error);
 		if (error)
 			return error;
 
@@ -821,7 +821,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
 		int error;
 
 		error = pm->suspend_noirq(dev);
-		suspend_report_result(pm->suspend_noirq, error);
+		suspend_report_result(dev, pm->suspend_noirq, error);
 		if (error)
 			return error;
 
@@ -1010,7 +1010,7 @@ static int pci_pm_freeze(struct device *dev)
 		int error;
 
 		error = pm->freeze(dev);
-		suspend_report_result(pm->freeze, error);
+		suspend_report_result(dev, pm->freeze, error);
 		if (error)
 			return error;
 	}
@@ -1030,7 +1030,7 @@ static int pci_pm_freeze_noirq(struct device *dev)
 		int error;
 
 		error = pm->freeze_noirq(dev);
-		suspend_report_result(pm->freeze_noirq, error);
+		suspend_report_result(dev, pm->freeze_noirq, error);
 		if (error)
 			return error;
 	}
@@ -1116,7 +1116,7 @@ static int pci_pm_poweroff(struct device *dev)
 		int error;
 
 		error = pm->poweroff(dev);
-		suspend_report_result(pm->poweroff, error);
+		suspend_report_result(dev, pm->poweroff, error);
 		if (error)
 			return error;
 	}
@@ -1154,7 +1154,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 		int error;
 
 		error = pm->poweroff_noirq(dev);
-		suspend_report_result(pm->poweroff_noirq, error);
+		suspend_report_result(dev, pm->poweroff_noirq, error);
 		if (error)
 			return error;
 	}
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index cc6757dfa3f13..c02e7bf643a6b 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -171,7 +171,7 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state)
 
 	if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) {
 		error = pnp_drv->driver.pm->suspend(dev);
-		suspend_report_result(pnp_drv->driver.pm->suspend, error);
+		suspend_report_result(dev, pnp_drv->driver.pm->suspend, error);
 		if (error)
 			return error;
 	}
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index d630cccd2e6ea..dd44e37a454ab 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -446,7 +446,7 @@ static int suspend_common(struct device *dev, bool do_wakeup)
 				HCD_WAKEUP_PENDING(hcd->shared_hcd))
 			return -EBUSY;
 		retval = hcd->driver->pci_suspend(hcd, do_wakeup);
-		suspend_report_result(hcd->driver->pci_suspend, retval);
+		suspend_report_result(dev, hcd->driver->pci_suspend, retval);
 
 		/* Check again in case wakeup raced with pci_suspend */
 		if ((retval == 0 && do_wakeup && HCD_WAKEUP_PENDING(hcd)) ||
@@ -556,7 +556,7 @@ static int hcd_pci_suspend_noirq(struct device *dev)
 		dev_dbg(dev, "--> PCI %s\n",
 				pci_power_name(pci_dev->current_state));
 	} else {
-		suspend_report_result(pci_prepare_to_sleep, retval);
+		suspend_report_result(dev, pci_prepare_to_sleep, retval);
 		return retval;
 	}
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index f7d2be6863595..e65b3ab28377b 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -770,11 +770,11 @@ extern int dpm_suspend_late(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
 extern int dpm_prepare(pm_message_t state);
 
-extern void __suspend_report_result(const char *function, void *fn, int ret);
+extern void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret);
 
-#define suspend_report_result(fn, ret)					\
+#define suspend_report_result(dev, fn, ret)				\
 	do {								\
-		__suspend_report_result(__func__, fn, ret);		\
+		__suspend_report_result(__func__, dev, fn, ret);	\
 	} while (0)
 
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
@@ -814,7 +814,7 @@ static inline int dpm_suspend_start(pm_message_t state)
 	return 0;
 }
 
-#define suspend_report_result(fn, ret)		do {} while (0)
+#define suspend_report_result(dev, fn, ret)	do {} while (0)
 
 static inline int device_pm_wait_for_dev(struct device *a, struct device *b)
 {
-- 
GitLab


From 49c1866348f364478a0c4d3dd13fd08bb82d3a5b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 8 Mar 2022 13:01:19 -0600
Subject: [PATCH 1113/1586] coredump: Remove the WARN_ON in dump_vma_snapshot

The condition is impossible and to the best of my knowledge has never
triggered.

We are in deep trouble if that conditions happens and we walk past
the end of our allocated array.

So delete the WARN_ON and the code that makes it look like the kernel
can handle the case of walking past the end of it's vma_meta array.

Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/coredump.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 88ac67994d03d..7f100a637264a 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1139,12 +1139,6 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
 
 	mmap_write_unlock(mm);
 
-	if (WARN_ON(i != cprm->vma_count)) {
-		kvfree(cprm->vma_meta);
-		return false;
-	}
-
-
 	for (i = 0; i < cprm->vma_count; i++) {
 		struct core_vma_metadata *m = cprm->vma_meta + i;
 
-- 
GitLab


From 9ec7d3230717b4fe9b6c7afeb4811909c23fa1d7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 31 Jan 2022 12:17:38 -0600
Subject: [PATCH 1114/1586] coredump/elf: Pass coredump_params into
 fill_note_info

Instead of individually passing cprm->siginfo and cprm->regs
into fill_note_info pass all of struct coredump_params.

This is preparation to allow fill_files_note to use the existing
vma snapshot.

Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_elf.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5710467cf4b60..7f0c391832cf3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1822,7 +1822,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
+			  struct coredump_params *cprm)
 {
 	struct task_struct *dump_task = current;
 	const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1894,7 +1894,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	 * Now fill in each thread's information.
 	 */
 	for (t = info->thread; t != NULL; t = t->next)
-		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
+		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size))
 			return 0;
 
 	/*
@@ -1903,7 +1903,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
 	info->size += notesize(&info->psinfo);
 
-	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
+	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
 	info->size += notesize(&info->signote);
 
 	fill_auxv_note(&info->auxv, current->mm);
@@ -2051,7 +2051,7 @@ static int elf_note_info_init(struct elf_note_info *info)
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
+			  struct coredump_params *cprm)
 {
 	struct core_thread *ct;
 	struct elf_thread_status *ets;
@@ -2072,13 +2072,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	list_for_each_entry(ets, &info->thread_list, list) {
 		int sz;
 
-		sz = elf_dump_thread_status(siginfo->si_signo, ets);
+		sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
 		info->thread_status_size += sz;
 	}
 	/* now collect the dump for the current */
 	memset(info->prstatus, 0, sizeof(*info->prstatus));
-	fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
-	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
+	fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
+	elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
 
 	/* Set up header */
 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
@@ -2094,7 +2094,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
 		  sizeof(*info->psinfo), info->psinfo);
 
-	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
+	fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
 	fill_auxv_note(info->notes + 3, current->mm);
 	info->numnote = 4;
 
@@ -2104,8 +2104,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	}
 
 	/* Try to dump the FPU. */
-	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
-							       info->fpu);
+	info->prstatus->pr_fpvalid =
+		elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
 	if (info->prstatus->pr_fpvalid)
 		fill_note(info->notes + info->numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
@@ -2218,7 +2218,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * Collect all the non-memory information about the process for the
 	 * notes.  This also sets up the file header.
 	 */
-	if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
+	if (!fill_note_info(&elf, e_phnum, &info, cprm))
 		goto end_coredump;
 
 	has_dumped = 1;
-- 
GitLab


From 390031c942116d4733310f0684beb8db19885fe6 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 8 Mar 2022 13:04:19 -0600
Subject: [PATCH 1115/1586] coredump: Use the vma snapshot in fill_files_note

Matthew Wilcox reported that there is a missing mmap_lock in
file_files_note that could possibly lead to a user after free.

Solve this by using the existing vma snapshot for consistency
and to avoid the need to take the mmap_lock anywhere in the
coredump code except for dump_vma_snapshot.

Update the dump_vma_snapshot to capture vm_pgoff and vm_file
that are neeeded by fill_files_note.

Add free_vma_snapshot to free the captured values of vm_file.

Reported-by: Matthew Wilcox <willy@infradead.org>
Link: https://lkml.kernel.org/r/20220131153740.2396974-1-willy@infradead.org
Cc: stable@vger.kernel.org
Fixes: a07279c9a8cd ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot")
Fixes: 2aa362c49c31 ("coredump: extend core dump note section to contain file names of mapped files")
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_elf.c          | 24 ++++++++++++------------
 fs/coredump.c            | 22 +++++++++++++++++++++-
 include/linux/coredump.h |  2 ++
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7f0c391832cf3..ca5296cae9791 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1641,17 +1641,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
  *   long file_ofs
  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
  */
-static int fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
 {
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
 	unsigned count, size, names_ofs, remaining, n;
 	user_long_t *data;
 	user_long_t *start_end_ofs;
 	char *name_base, *name_curpos;
+	int i;
 
 	/* *Estimated* file count and total data size needed */
-	count = mm->map_count;
+	count = cprm->vma_count;
 	if (count > UINT_MAX / 64)
 		return -EINVAL;
 	size = count * 64;
@@ -1673,11 +1672,12 @@ static int fill_files_note(struct memelfnote *note)
 	name_base = name_curpos = ((char *)data) + names_ofs;
 	remaining = size - names_ofs;
 	count = 0;
-	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+	for (i = 0; i < cprm->vma_count; i++) {
+		struct core_vma_metadata *m = &cprm->vma_meta[i];
 		struct file *file;
 		const char *filename;
 
-		file = vma->vm_file;
+		file = m->file;
 		if (!file)
 			continue;
 		filename = file_path(file, name_curpos, remaining);
@@ -1697,9 +1697,9 @@ static int fill_files_note(struct memelfnote *note)
 		memmove(name_curpos, filename, n);
 		name_curpos += n;
 
-		*start_end_ofs++ = vma->vm_start;
-		*start_end_ofs++ = vma->vm_end;
-		*start_end_ofs++ = vma->vm_pgoff;
+		*start_end_ofs++ = m->start;
+		*start_end_ofs++ = m->end;
+		*start_end_ofs++ = m->pgoff;
 		count++;
 	}
 
@@ -1710,7 +1710,7 @@ static int fill_files_note(struct memelfnote *note)
 	 * Count usually is less than mm->map_count,
 	 * we need to move filenames down.
 	 */
-	n = mm->map_count - count;
+	n = cprm->vma_count - count;
 	if (n != 0) {
 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
 		memmove(name_base - shift_bytes, name_base,
@@ -1909,7 +1909,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	fill_auxv_note(&info->auxv, current->mm);
 	info->size += notesize(&info->auxv);
 
-	if (fill_files_note(&info->files) == 0)
+	if (fill_files_note(&info->files, cprm) == 0)
 		info->size += notesize(&info->files);
 
 	return 1;
@@ -2098,7 +2098,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	fill_auxv_note(info->notes + 3, current->mm);
 	info->numnote = 4;
 
-	if (fill_files_note(info->notes + info->numnote) == 0) {
+	if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
 		info->notes_files = info->notes + info->numnote;
 		info->numnote++;
 	}
diff --git a/fs/coredump.c b/fs/coredump.c
index 7f100a637264a..7ed7d601e5e00 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -55,6 +55,7 @@
 #include <trace/events/sched.h>
 
 static bool dump_vma_snapshot(struct coredump_params *cprm);
+static void free_vma_snapshot(struct coredump_params *cprm);
 
 static int core_uses_pid;
 static unsigned int core_pipe_limit;
@@ -765,7 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 			dump_emit(&cprm, "", 1);
 		}
 		file_end_write(cprm.file);
-		kvfree(cprm.vma_meta);
+		free_vma_snapshot(&cprm);
 	}
 	if (ispipe && core_pipe_limit)
 		wait_for_dump_helpers(cprm.file);
@@ -1099,6 +1100,20 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
 	return gate_vma;
 }
 
+static void free_vma_snapshot(struct coredump_params *cprm)
+{
+	if (cprm->vma_meta) {
+		int i;
+		for (i = 0; i < cprm->vma_count; i++) {
+			struct file *file = cprm->vma_meta[i].file;
+			if (file)
+				fput(file);
+		}
+		kvfree(cprm->vma_meta);
+		cprm->vma_meta = NULL;
+	}
+}
+
 /*
  * Under the mmap_lock, take a snapshot of relevant information about the task's
  * VMAs.
@@ -1135,6 +1150,11 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
 		m->end = vma->vm_end;
 		m->flags = vma->vm_flags;
 		m->dump_size = vma_dump_size(vma, cprm->mm_flags);
+		m->pgoff = vma->vm_pgoff;
+
+		m->file = vma->vm_file;
+		if (m->file)
+			get_file(m->file);
 	}
 
 	mmap_write_unlock(mm);
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 7d05370e555ee..08a1d3e7e46d0 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -12,6 +12,8 @@ struct core_vma_metadata {
 	unsigned long start, end;
 	unsigned long flags;
 	unsigned long dump_size;
+	unsigned long pgoff;
+	struct file   *file;
 };
 
 struct coredump_params {
-- 
GitLab


From 830751d54b4abb2d4d9e5e8072dffdd8ce195f1f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 8 Mar 2022 16:05:59 +0200
Subject: [PATCH 1116/1586] ACPI: docs: gpio-properties: Unify ASL style for
 GPIO examples

GPIO examples of ASL in the board.rst, enumeration.rst and gpio-properties.rst
are not unified. Unify them for better reader experience.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/gpio/board.rst       | 21 +++++++--------
 .../firmware-guide/acpi/enumeration.rst       | 22 ++++------------
 .../firmware-guide/acpi/gpio-properties.rst   | 26 ++++++++++---------
 3 files changed, 28 insertions(+), 41 deletions(-)

diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index 191fa867826a4..4e3adf31c8d1d 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -71,14 +71,14 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
 
 	Device (FOO) {
 		Name (_CRS, ResourceTemplate () {
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {15} // red
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {16} // green
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {17} // blue
-			GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
-				"\\_SB.GPI0") {1} // power
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 15 } // red
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 16 } // green
+			GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 17 } // blue
+			GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+				"\\_SB.GPI0", 0, ResourceConsumer) { 1 } // power
 		})
 
 		Name (_DSD, Package () {
@@ -92,10 +92,7 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
 						^FOO, 2, 0, 1,
 					}
 				},
-				Package () {
-					"power-gpios",
-					Package () {^FOO, 3, 0, 0},
-				},
+				Package () { "power-gpios", Package () { ^FOO, 3, 0, 0 } },
 			}
 		})
 	}
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index a946643244f73..c414646a1bb43 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -295,26 +295,13 @@ For example::
 		{
 			Name (SBUF, ResourceTemplate()
 			{
-				...
 				// Used to power on/off the device
-				GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
-					IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
-					0x00, ResourceConsumer,,)
-				{
-					// Pin List
-					0x0055
-				}
+				GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly,
+					"\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 85 }
 
 				// Interrupt for the device
-				GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
-					0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
-				{
-					// Pin list
-					0x0058
-				}
-
-				...
-
+				GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone, 0,
+					 "\\_SB.PCI0.GPI0", 0, ResourceConsumer) { 88 }
 			}
 
 			Return (SBUF)
@@ -331,6 +318,7 @@ For example::
 			}
 		})
 		...
+	}
 
 These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
 specifies the path to the controller. In order to use these GPIOs in Linux
diff --git a/Documentation/firmware-guide/acpi/gpio-properties.rst b/Documentation/firmware-guide/acpi/gpio-properties.rst
index df4b711053ee9..eaec732cc77cf 100644
--- a/Documentation/firmware-guide/acpi/gpio-properties.rst
+++ b/Documentation/firmware-guide/acpi/gpio-properties.rst
@@ -21,18 +21,18 @@ index, like the ASL example below shows::
       Name (_CRS, ResourceTemplate ()
       {
           GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {15}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
           GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {27, 31}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 27, 31 }
       })
 
       Name (_DSD, Package ()
       {
           ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
           Package ()
-	  {
-              Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
-              Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
+          {
+              Package () { "reset-gpios", Package () { ^BTH, 1, 1, 0 } },
+              Package () { "shutdown-gpios", Package () { ^BTH, 0, 0, 0 } },
           }
       })
   }
@@ -123,17 +123,17 @@ Example::
       // _DSD Hierarchical Properties Extension UUID
       ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
       Package () {
-          Package () {"hog-gpio8", "G8PU"}
+          Package () { "hog-gpio8", "G8PU" }
       }
   })
 
   Name (G8PU, Package () {
       ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
       Package () {
-          Package () {"gpio-hog", 1},
-          Package () {"gpios", Package () {8, 0}},
-          Package () {"output-high", 1},
-          Package () {"line-name", "gpio8-pullup"},
+          Package () { "gpio-hog", 1 },
+          Package () { "gpios", Package () { 8, 0 } },
+          Package () { "output-high", 1 },
+          Package () { "line-name", "gpio8-pullup" },
       }
   })
 
@@ -266,15 +266,17 @@ have a device like below::
 
       Name (_CRS, ResourceTemplate () {
           GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {15}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 15 }
           GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionNone,
-                  "\\_SB.GPO0", 0, ResourceConsumer) {27}
+                  "\\_SB.GPO0", 0, ResourceConsumer) { 27 }
       })
   }
 
 The driver might expect to get the right GPIO when it does::
 
   desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+  if (IS_ERR(desc))
+	...error handling...
 
 but since there is no way to know the mapping between "reset" and
 the GpioIo() in _CRS desc will hold ERR_PTR(-ENOENT).
-- 
GitLab


From 0fb74d0d21aaea1513eb8bfdbbde60113be4fcbc Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Sat, 22 Jan 2022 14:25:53 +0100
Subject: [PATCH 1117/1586] dt-bindings: thermal: samsung: Convert to dtschema

Convert the Samsung Exynos SoC Thermal Management Unit bindings to DT
schema format.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220122132554.65192-2-krzysztof.kozlowski@canonical.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/thermal/exynos-thermal.txt       | 106 ----------
 .../thermal/samsung,exynos-thermal.yaml       | 184 ++++++++++++++++++
 2 files changed, 184 insertions(+), 106 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/thermal/exynos-thermal.txt
 create mode 100644 Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml

diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
deleted file mode 100644
index 33004ce7e5dfa..0000000000000
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-* Exynos Thermal Management Unit (TMU)
-
-** Required properties:
-
-- compatible : One of the following:
-	       "samsung,exynos3250-tmu"
-	       "samsung,exynos4412-tmu"
-	       "samsung,exynos4210-tmu"
-	       "samsung,exynos5250-tmu"
-	       "samsung,exynos5260-tmu"
-	       "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
-	       "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
-			Exynos5420 (Must pass triminfo base and triminfo clock)
-               "samsung,exynos5433-tmu"
-	       "samsung,exynos7-tmu"
-- reg : Address range of the thermal registers. For soc's which has multiple
-	instances of TMU and some registers are shared across all TMU's like
-	interrupt related then 2 set of register has to supplied. First set
-	belongs	to register set of TMU instance and second set belongs to
-	registers shared with the TMU instance.
-
-  NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
-	channels 2, 3 and 4
-	Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
-	register, also provide clock to access that base.
-
-	TRIMINFO at 0x1006c000 contains data for TMU channel 3
-	TRIMINFO at 0x100a0000 contains data for TMU channel 4
-	TRIMINFO at 0x10068000 contains data for TMU channel 2
-
-- interrupts : Should contain interrupt for thermal system
-- clocks : The main clocks for TMU device
-	-- 1. operational clock for TMU channel
-	-- 2. optional clock to access the shared registers of TMU channel
-	-- 3. optional special clock for functional operation
-- clock-names : Thermal system clock name
-	-- "tmu_apbif" operational clock for current TMU channel
-	-- "tmu_triminfo_apbif" clock to access the shared triminfo register
-		for current TMU channel
-	-- "tmu_sclk" clock for functional operation of the current TMU
-		channel
-
-The Exynos TMU supports generating interrupts when reaching given
-temperature thresholds. Number of supported thermal trip points depends
-on the SoC (only first trip points defined in DT will be configured):
- - most of SoC: 4
- - samsung,exynos5433-tmu: 8
- - samsung,exynos7-tmu: 8
-
-** Optional properties:
-
-- vtmu-supply: This entry is optional and provides the regulator node supplying
-		voltage to TMU. If needed this entry can be placed inside
-		board/platform specific dts file.
-
-Example 1):
-
-	tmu@100c0000 {
-		compatible = "samsung,exynos4412-tmu";
-		interrupt-parent = <&combiner>;
-		reg = <0x100C0000 0x100>;
-		interrupts = <2 4>;
-		clocks = <&clock 383>;
-		clock-names = "tmu_apbif";
-		vtmu-supply = <&tmu_regulator_node>;
-		#thermal-sensor-cells = <0>;
-	};
-
-Example 2): (In case of Exynos5420 "with misplaced TRIMINFO register")
-	tmu_cpu2: tmu@10068000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x10068000 0x100>, <0x1006c000 0x4>;
-		interrupts = <0 184 0>;
-		clocks = <&clock 318>, <&clock 318>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-	tmu_cpu3: tmu@1006c000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
-		interrupts = <0 185 0>;
-		clocks = <&clock 318>, <&clock 319>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-	tmu_gpu: tmu@100a0000 {
-		compatible = "samsung,exynos5420-tmu-ext-triminfo";
-		reg = <0x100a0000 0x100>, <0x10068000 0x4>;
-		interrupts = <0 215 0>;
-		clocks = <&clock 319>, <&clock 318>;
-		clock-names = "tmu_apbif", "tmu_triminfo_apbif";
-		#thermal-sensor-cells = <0>;
-	};
-
-Note: For multi-instance tmu each instance should have an alias correctly
-numbered in "aliases" node.
-
-Example:
-
-aliases {
-	tmuctrl0 = &tmuctrl_0;
-	tmuctrl1 = &tmuctrl_1;
-	tmuctrl2 = &tmuctrl_2;
-};
diff --git a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
new file mode 100644
index 0000000000000..17129f75d9624
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/samsung,exynos-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Thermal Management Unit (TMU)
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+  For multi-instance tmu each instance should have an alias correctly numbered
+  in "aliases" node.
+
+properties:
+  compatible:
+    enum:
+      - samsung,exynos3250-tmu
+      - samsung,exynos4412-tmu
+      - samsung,exynos4210-tmu
+      - samsung,exynos5250-tmu
+      - samsung,exynos5260-tmu
+        # For TMU channel 0, 1 on Exynos5420:
+      - samsung,exynos5420-tmu
+        # For TMU channels 2, 3 and 4 of Exynos5420:
+      - samsung,exynos5420-tmu-ext-triminfo
+      - samsung,exynos5433-tmu
+      - samsung,exynos7-tmu
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    minItems: 1
+    maxItems: 3
+
+  interrupts:
+    description: |
+      The Exynos TMU supports generating interrupts when reaching given
+      temperature thresholds. Number of supported thermal trip points depends
+      on the SoC (only first trip points defined in DT will be configured)::
+       - most of SoC: 4
+       - samsung,exynos5433-tmu: 8
+       - samsung,exynos7-tmu: 8
+    maxItems: 1
+
+  reg:
+    items:
+      - description: TMU instance registers.
+      - description: |
+          Shared TMU registers.
+
+          Note:: On Exynos5420, the TRIMINFO register is misplaced for TMU
+          channels 2, 3 and 4 Use "samsung,exynos5420-tmu-ext-triminfo" in
+          cases, there is a misplaced register, also provide clock to access
+          that base.
+          TRIMINFO at 0x1006c000 contains data for TMU channel 3
+          TRIMINFO at 0x100a0000 contains data for TMU channel 4
+          TRIMINFO at 0x10068000 contains data for TMU channel 2
+    minItems: 1
+
+  '#thermal-sensor-cells': true
+
+  vtmu-supply:
+    description: The regulator node supplying voltage to TMU.
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - interrupts
+  - reg
+
+allOf:
+  - $ref: /schemas/thermal/thermal-sensor.yaml
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,exynos5420-tmu-ext-triminfo
+    then:
+      properties:
+        clocks:
+          items:
+            - description:
+                Operational clock for TMU channel.
+            - description:
+                Optional clock to access the shared registers (e.g. TRIMINFO) of TMU
+                channel.
+        clock-names:
+          items:
+            - const: tmu_apbif
+            - const: tmu_triminfo_apbif
+        reg:
+          minItems: 2
+          maxItems: 2
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos5433-tmu
+              - samsung,exynos7-tmu
+    then:
+      properties:
+        clocks:
+          items:
+            - description:
+                Operational clock for TMU channel.
+            - description:
+                Optional special clock for functional operation of TMU channel.
+        clock-names:
+          items:
+            - const: tmu_apbif
+            - const: tmu_sclk
+        reg:
+          minItems: 1
+          maxItems: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - samsung,exynos3250-tmu
+              - samsung,exynos4412-tmu
+              - samsung,exynos4210-tmu
+              - samsung,exynos5250-tmu
+              - samsung,exynos5260-tmu
+              - samsung,exynos5420-tmu
+    then:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 1
+        reg:
+          minItems: 1
+          maxItems: 1
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos4.h>
+
+    tmu@100c0000 {
+        compatible = "samsung,exynos4412-tmu";
+        reg = <0x100C0000 0x100>;
+        interrupt-parent = <&combiner>;
+        interrupts = <2 4>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&clock CLK_TMU_APBIF>;
+        clock-names = "tmu_apbif";
+        vtmu-supply = <&ldo10_reg>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    tmu@10068000 {
+        compatible = "samsung,exynos5420-tmu-ext-triminfo";
+        reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+        interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&clock 318>, <&clock 318>; /* CLK_TMU */
+        clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+        vtmu-supply = <&ldo7_reg>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    tmu@10060000 {
+        compatible = "samsung,exynos5433-tmu";
+        reg = <0x10060000 0x200>;
+        interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+        #thermal-sensor-cells = <0>;
+        clocks = <&cmu_peris 3>, /* CLK_PCLK_TMU0_APBIF */
+                 <&cmu_peris 35>; /* CLK_SCLK_TMU0 */
+        clock-names = "tmu_apbif", "tmu_sclk";
+        vtmu-supply = <&ldo3_reg>;
+    };
-- 
GitLab


From 62db1e8e9701a947a00f3d8987ca3298827def56 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Sat, 22 Jan 2022 14:25:54 +0100
Subject: [PATCH 1118/1586] MAINTAINERS: thermal: samsung: Drop obsolete
 properties

Update the Samsung Exynos SoC thermal driver entry to match reality and
add Krzysztof Kozlowski as co-maintainer (as he maintains entire Samsung
SoC).  The rationale:
1. Bartlomiej's Samsung email bounces, since he is not working in
   Samsung for some time.
2. The mentioned Lukasz Majewski's Git tree was not updated
   since 2015.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220122132554.65192-3-krzysztof.kozlowski@canonical.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 MAINTAINERS | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1ba1e4af2cbc8..82fe389618d04 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17135,11 +17135,12 @@ S:	Supported
 F:	drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
-M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+M:	Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
+M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 L:	linux-pm@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
-S:	Supported
-T:	git https://github.com/lmajewski/linux-samsung-thermal.git
+S:	Maintained
+F:	Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
 F:	drivers/thermal/samsung/
 
 SAMSUNG USB2 PHY DRIVER
-- 
GitLab


From 8556e19d4939fbdb15aee9391f035453c5b2255a Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 29 Jan 2022 21:07:50 +0300
Subject: [PATCH 1119/1586] thermal/drivers/tsens: register thermal zones as
 hwmon sensors

Register thermal zones as hwmon sensors to let userspace read
CPU temperatures using standard hwmon interface.

Acked-by: Amit Kucheria <amitk@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220129180750.1882310-1-dmitry.baryshkov@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/qcom/tsens.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 99a8d9f3e03ca..154d3cb19c88d 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -18,6 +18,7 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
+#include "../thermal_hwmon.h"
 #include "tsens.h"
 
 /**
@@ -1060,6 +1061,10 @@ static int tsens_register(struct tsens_priv *priv)
 		priv->sensor[i].tzd = tzd;
 		if (priv->ops->enable)
 			priv->ops->enable(priv, i);
+
+		if (devm_thermal_add_hwmon_sysfs(tzd))
+			dev_warn(priv->dev,
+				 "Failed to add hwmon sysfs attributes\n");
 	}
 
 	/* VER_0 require to set MIN and MAX THRESH
-- 
GitLab


From cf0c54db636c9107b0c1f1b57ab7f18fa2fef478 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 6 Jan 2022 12:31:36 -0500
Subject: [PATCH 1120/1586] thermal/drivers/qcom/lmh: Add support for sm8150

Add compatible to support LMh for sm8150 SoC.
sm8150 does not require explicit enabling for various LMh subsystems.
Add a variable indicating the same as match data which is set for sdm845.
Execute the piece of code enabling various LMh subsystems only if
enable algorithm match data is present.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20220106173138.411097-2-thara.gopinath@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/qcom/lmh.c | 62 +++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index eafa7526eb8b4..c7f91cbdccc7b 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -28,6 +28,8 @@
 
 #define LMH_REG_DCVS_INTR_CLR		0x8
 
+#define LMH_ENABLE_ALGOS		1
+
 struct lmh_hw_data {
 	void __iomem *base;
 	struct irq_domain *domain;
@@ -90,6 +92,7 @@ static int lmh_probe(struct platform_device *pdev)
 	struct device_node *cpu_node;
 	struct lmh_hw_data *lmh_data;
 	int temp_low, temp_high, temp_arm, cpu_id, ret;
+	unsigned int enable_alg;
 	u32 node_id;
 
 	lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
@@ -141,32 +144,36 @@ static int lmh_probe(struct platform_device *pdev)
 	if (!qcom_scm_lmh_dcvsh_available())
 		return -EINVAL;
 
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling current subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret)
-		dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
-
-	ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
-				 LMH_NODE_DCVS, node_id, 0);
-	if (ret) {
-		dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
-		return ret;
-	}
-
-	ret = qcom_scm_lmh_profile_change(0x1);
-	if (ret) {
-		dev_err(dev, "Error %d changing profile\n", ret);
-		return ret;
+	enable_alg = (uintptr_t)of_device_get_match_data(dev);
+
+	if (enable_alg) {
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret)
+			dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+		ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+					 LMH_NODE_DCVS, node_id, 0);
+		if (ret) {
+			dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+			return ret;
+		}
+
+		ret = qcom_scm_lmh_profile_change(0x1);
+		if (ret) {
+			dev_err(dev, "Error %d changing profile\n", ret);
+			return ret;
+		}
 	}
 
 	/* Set default thermal trips */
@@ -213,7 +220,8 @@ static int lmh_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id lmh_table[] = {
-	{ .compatible = "qcom,sdm845-lmh", },
+	{ .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS},
+	{ .compatible = "qcom,sm8150-lmh", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, lmh_table);
-- 
GitLab


From 1f43fad1018a47a3c1723608e64fa01c12bafb81 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 6 Jan 2022 12:31:38 -0500
Subject: [PATCH 1121/1586] dt-bindings: thermal: Add sm8150 compatible string
 for LMh

Extend the LMh dt binding document to include compatible string
supporting sm8150 SoC.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20220106173138.411097-4-thara.gopinath@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 Documentation/devicetree/bindings/thermal/qcom-lmh.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
index 289e9a8456003..a9b7388ca9aca 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -19,6 +19,7 @@ properties:
   compatible:
     enum:
       - qcom,sdm845-lmh
+      - qcom,sm8150-lmh
 
   reg:
     items:
-- 
GitLab


From e8ec6bb302074e41268f9764bf6328599682b84f Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca@z3ntu.xyz>
Date: Sun, 20 Feb 2022 21:18:55 +0100
Subject: [PATCH 1122/1586] dt-bindings: thermal: tsens: Add msm8953 compatible

Document the compatible string for tsens found in msm8953.

Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
Acked-by: Konrad Dybcio <konrad.dybcio@somainline.org>
Acked-by: Amit Kucheria <amitk@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20220220201909.445468-3-luca@z3ntu.xyz
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 Documentation/devicetree/bindings/thermal/qcom-tsens.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index d3b9e9b600a28..b6406bcc683f0 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -43,6 +43,7 @@ properties:
       - description: v2 of TSENS
         items:
           - enum:
+              - qcom,msm8953-tsens
               - qcom,msm8996-tsens
               - qcom,msm8998-tsens
               - qcom,sc7180-tsens
-- 
GitLab


From 3a9abd6cece8321be821bf84406a94e3c483f31e Mon Sep 17 00:00:00 2001
From: Romain Naour <romain.naour@smile.fr>
Date: Fri, 18 Feb 2022 11:47:25 +0100
Subject: [PATCH 1123/1586] drivers/thermal/ti-soc-thermal: Add hwmon support

Expose ti-soc-thermal thermal sensors as HWMON devices.

  # sensors
  cpu_thermal-virtual-0
  Adapter: Virtual device
  temp1:        +54.2 C  (crit = +105.0 C)

  dspeve_thermal-virtual-0
  Adapter: Virtual device
  temp1:        +51.4 C  (crit = +105.0 C)

  gpu_thermal-virtual-0
  Adapter: Virtual device
  temp1:        +54.2 C  (crit = +105.0 C)

  iva_thermal-virtual-0
  Adapter: Virtual device
  temp1:        +54.6 C  (crit = +105.0 C)

  core_thermal-virtual-0
  Adapter: Virtual device
  temp1:        +52.6 C  (crit = +105.0 C)

Similar to imx_sc_thermal d2bc4dd91da6095a769fdc9bc519d3be7ad5f97a.

No need to take care of thermal_remove_hwmon_sysfs() since
devm_thermal_add_hwmon_sysfs() (a wrapper around devres) is
used. See c7fc403e40b0ea18976a59e968c23439a80809e8.

Signed-off-by: Romain Naour <romain.naour@smile.fr>
Link: https://lore.kernel.org/r/20220218104725.2718904-1-romain.naour@smile.fr
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index f84375865c976..0959632b21708 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -21,6 +21,7 @@
 
 #include "ti-thermal.h"
 #include "ti-bandgap.h"
+#include "../thermal_hwmon.h"
 
 /* common data structures */
 struct ti_thermal_data {
@@ -189,6 +190,9 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 	ti_bandgap_set_sensor_data(bgp, id, data);
 	ti_bandgap_write_update_interval(bgp, data->sensor_id, interval);
 
+	if (devm_thermal_add_hwmon_sysfs(data->ti_thermal))
+		dev_warn(bgp->dev, "failed to add hwmon sysfs attributes\n");
+
 	return 0;
 }
 
-- 
GitLab


From 1a7c9213d5f9bfd5e1bf4f87df1abc166ce48ffc Mon Sep 17 00:00:00 2001
From: Mikko Perttunen <mperttunen@nvidia.com>
Date: Wed, 15 Sep 2021 11:55:13 +0300
Subject: [PATCH 1124/1586] thermal: tegra-bpmp: Handle errors in BPMP response

The return value from tegra_bpmp_transfer indicates the success or
failure of the IPC transaction with BPMP. If the transaction
succeeded, we also need to check the actual command's result code.
Add code to do this.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Link: https://lore.kernel.org/r/20210915085517.1669675-1-mperttunen@nvidia.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/tegra/tegra-bpmp-thermal.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c
index 94f1da1dcd696..5affc3d196be8 100644
--- a/drivers/thermal/tegra/tegra-bpmp-thermal.c
+++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c
@@ -52,6 +52,8 @@ static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp)
 	err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
 	if (err)
 		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
 
 	*out_temp = reply.get_temp.temp;
 
@@ -63,6 +65,7 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
 	struct tegra_bpmp_thermal_zone *zone = data;
 	struct mrq_thermal_host_to_bpmp_request req;
 	struct tegra_bpmp_message msg;
+	int err;
 
 	memset(&req, 0, sizeof(req));
 	req.type = CMD_THERMAL_SET_TRIP;
@@ -76,7 +79,13 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
 	msg.tx.data = &req;
 	msg.tx.size = sizeof(req);
 
-	return tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+	err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+	if (err)
+		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
+
+	return 0;
 }
 
 static void tz_device_update_work_fn(struct work_struct *work)
@@ -140,6 +149,8 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp,
 	err = tegra_bpmp_transfer(bpmp, &msg);
 	if (err)
 		return err;
+	if (msg.rx.ret)
+		return -EINVAL;
 
 	*num_zones = reply.get_num_zones.num;
 
-- 
GitLab


From 7eab0b9d48a825e8e845003f81eeaaf0bcb239fd Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Mar 2022 10:14:12 -0800
Subject: [PATCH 1125/1586] thermal/drivers/brcmstb_thermal: Interrupt is
 optional

Utilize platform_get_irq_optional() to silence these messages:

brcmstb_thermal a581500.thermal: IRQ index 0 not found

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220301181412.2008044-1-f.fainelli@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/broadcom/brcmstb_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 8df5edef1ded1..0cedb8b4f00a4 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -351,7 +351,7 @@ static int brcmstb_thermal_probe(struct platform_device *pdev)
 
 	priv->thermal = thermal;
 
-	irq = platform_get_irq(pdev, 0);
+	irq = platform_get_irq_optional(pdev, 0);
 	if (irq >= 0) {
 		ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
 						brcmstb_tmon_irq_thread,
-- 
GitLab


From 1d6aab36a26ba44b114d7f8a857c430c9e0c32c9 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 5 Mar 2022 20:50:47 +0800
Subject: [PATCH 1126/1586] thermal/drivers/ti-soc-thermal: Remove unused
 function ti_thermal_get_temp()

commit b263b473bf62 ("thermal: ti-soc-thermal: Remove redundant code")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Bryan Brattlof <bb@ti.com>
Link: https://lore.kernel.org/r/20220305125047.26948-1-yuehaibing@huawei.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 0959632b21708..703039d8b937f 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -107,14 +107,6 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
 	return ret;
 }
 
-static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
-				      int *temp)
-{
-	struct ti_thermal_data *data = thermal->devdata;
-
-	return __ti_thermal_get_temp(data, temp);
-}
-
 static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
 {
 	struct ti_thermal_data *data = p;
-- 
GitLab


From a7da8a0955cbe81433afecad229d2a2ba60c3f0a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 8 Mar 2022 07:56:48 +0100
Subject: [PATCH 1127/1586] MAINTAINERS: thermal: samsung: update Krzysztof
 Kozlowski's email

Use Krzysztof Kozlowski's @kernel.org account in dt-bindings maintainer
entry.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20220308065648.6443-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 82fe389618d04..e82b6aafa62e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17136,7 +17136,7 @@ F:	drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
 M:	Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
 L:	linux-pm@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
-- 
GitLab


From daae161fd2e568b4f481b177b8be34374df98b68 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Date: Mon, 17 Jan 2022 12:38:47 +0100
Subject: [PATCH 1128/1586] md: raid1/raid10: drop pending_cnt

Those counters are not necessary after commit 11bb45e8aaf6 ("md: drop queue
limitation for RAID1 and RAID10"). Remove them from all code (conf and
plug structs). raid1_plug_cb and raid10_plug_cb are identical, so move
definition of raid1_plug_cb to common raid1-10 definitions and use it for
RAID10 too.

Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid1-10.c |  5 +++++
 drivers/md/raid1.c    | 11 -----------
 drivers/md/raid1.h    |  1 -
 drivers/md/raid10.c   | 17 +++--------------
 drivers/md/raid10.h   |  1 -
 5 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 83f9a4f3d82e0..e61f6cad4e08e 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -28,6 +28,11 @@ struct resync_pages {
 	struct page	*pages[RESYNC_PAGES];
 };
 
+struct raid1_plug_cb {
+	struct blk_plug_cb	cb;
+	struct bio_list		pending;
+};
+
 static void rbio_pool_free(void *rbio, void *data)
 {
 	kfree(rbio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3288d46948de..0fb4d252034b9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -824,7 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
 		struct bio *bio;
 
 		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
 
 		/*
@@ -1167,12 +1166,6 @@ free_pages:
 	bio_put(behind_bio);
 }
 
-struct raid1_plug_cb {
-	struct blk_plug_cb	cb;
-	struct bio_list		pending;
-	int			pending_cnt;
-};
-
 static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 {
 	struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
@@ -1184,7 +1177,6 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	if (from_schedule || current->bio_list) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
-		conf->pending_count += plug->pending_cnt;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_barrier);
 		md_wakeup_thread(mddev->thread);
@@ -1588,11 +1580,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			plug = NULL;
 		if (plug) {
 			bio_list_add(&plug->pending, mbio);
-			plug->pending_cnt++;
 		} else {
 			spin_lock_irqsave(&conf->device_lock, flags);
 			bio_list_add(&conf->pending_bio_list, mbio);
-			conf->pending_count++;
 			spin_unlock_irqrestore(&conf->device_lock, flags);
 			md_wakeup_thread(mddev->thread);
 		}
@@ -3058,7 +3048,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	init_waitqueue_head(&conf->wait_barrier);
 
 	bio_list_init(&conf->pending_bio_list);
-	conf->pending_count = 0;
 	conf->recovery_disabled = mddev->recovery_disabled - 1;
 
 	err = -EIO;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index ccf10e59b1168..ebb6788820e79 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -87,7 +87,6 @@ struct r1conf {
 
 	/* queue pending writes to be submitted on unplug */
 	struct bio_list		pending_bio_list;
-	int			pending_count;
 
 	/* for use when syncing mirrors:
 	 * We don't allow both normal IO and resync/recovery IO at
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5dd2e17e1d0ea..b369ebb965a95 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf)
 		struct bio *bio;
 
 		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
 
 		/*
@@ -1054,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio,
 		return rdev->new_data_offset;
 }
 
-struct raid10_plug_cb {
-	struct blk_plug_cb	cb;
-	struct bio_list		pending;
-	int			pending_cnt;
-};
-
 static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 {
-	struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
-						   cb);
+	struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb);
 	struct mddev *mddev = plug->cb.data;
 	struct r10conf *conf = mddev->private;
 	struct bio *bio;
@@ -1071,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	if (from_schedule || current->bio_list) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
-		conf->pending_count += plug->pending_cnt;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_barrier);
 		md_wakeup_thread(mddev->thread);
@@ -1238,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
 	unsigned long flags;
 	struct blk_plug_cb *cb;
-	struct raid10_plug_cb *plug = NULL;
+	struct raid1_plug_cb *plug = NULL;
 	struct r10conf *conf = mddev->private;
 	struct md_rdev *rdev;
 	int devnum = r10_bio->devs[n_copy].devnum;
@@ -1280,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 
 	cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
 	if (cb)
-		plug = container_of(cb, struct raid10_plug_cb, cb);
+		plug = container_of(cb, struct raid1_plug_cb, cb);
 	else
 		plug = NULL;
 	if (plug) {
 		bio_list_add(&plug->pending, mbio);
-		plug->pending_cnt++;
 	} else {
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		md_wakeup_thread(mddev->thread);
 	}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index c34bb196790e5..5c0804d8bb1fe 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -75,7 +75,6 @@ struct r10conf {
 
 	/* queue pending writes and submit them on unplug */
 	struct bio_list		pending_bio_list;
-	int			pending_count;
 
 	spinlock_t		resync_lock;
 	atomic_t		nr_pending;
-- 
GitLab


From a5359ddd052860bacf957e65fe819c63e974b3a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dirk=20M=C3=BCller?= <dmueller@suse.de>
Date: Tue, 8 Feb 2022 17:50:50 +0100
Subject: [PATCH 1129/1586] lib/raid6/test: fix multiple definition linking
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 10+ defaults to -fno-common, which enforces proper declaration of
external references using "extern". without this change a link would
fail with:

  lib/raid6/test/algos.c:28: multiple definition of `raid6_call';
  lib/raid6/test/test.c:22: first defined here

the pq.h header that is included already includes an extern declaration
so we can just remove the redundant one here.

Cc: <stable@vger.kernel.org>
Signed-off-by: Dirk Müller <dmueller@suse.de>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 lib/raid6/test/test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index a3cf071941ab4..841a55242abaa 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -19,7 +19,6 @@
 #define NDISKS		16	/* Including P and Q */
 
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
-struct raid6_calls raid6_call;
 
 char *dataptrs[NDISKS];
 char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
-- 
GitLab


From 633174a7046ec3b4572bec24ef98e6ee89bce14b Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Tue, 8 Feb 2022 16:21:48 +0100
Subject: [PATCH 1130/1586] lib/raid6/test/Makefile: Use $(pound) instead of \#
 for Make 4.3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Buidling raid6test on Ubuntu 21.10 (ppc64le) with GNU Make 4.3 shows the
errors below:

    $ cd lib/raid6/test/
    $ make
    <stdin>:1:1: error: stray ‘\’ in program
    <stdin>:1:2: error: stray ‘#’ in program
    <stdin>:1:11: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ \
        before ‘<’ token

    [...]

The errors come from the HAS_ALTIVEC test, which fails, and the POWER
optimized versions are not built. That’s also reason nobody noticed on the
other architectures.

GNU Make 4.3 does not remove the backslash anymore. From the 4.3 release
announcment:

> * WARNING: Backward-incompatibility!
>   Number signs (#) appearing inside a macro reference or function invocation
>   no longer introduce comments and should not be escaped with backslashes:
>   thus a call such as:
>     foo := $(shell echo '#')
>   is legal.  Previously the number sign needed to be escaped, for example:
>     foo := $(shell echo '\#')
>   Now this latter will resolve to "\#".  If you want to write makefiles
>   portable to both versions, assign the number sign to a variable:
>     H := \#
>     foo := $(shell echo '$H')
>   This was claimed to be fixed in 3.81, but wasn't, for some reason.
>   To detect this change search for 'nocomment' in the .FEATURES variable.

So, do the same as commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd
files for future Make") and commit 929bef467771 ("bpf: Use $(pound) instead
of \# in Makefiles") and define and use a $(pound) variable.

Reference for the change in make:
https://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57

Cc: Matt Brown <matthew.brown.dev@gmail.com>
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 lib/raid6/test/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index a4c7cd74cff58..4fb7700a741bd 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -4,6 +4,8 @@
 # from userspace.
 #
 
+pound := \#
+
 CC	 = gcc
 OPTFLAGS = -O2			# Adjust as desired
 CFLAGS	 = -I.. -I ../../../include -g $(OPTFLAGS)
@@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes)
         OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
-        HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
+        HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
                 CFLAGS += -I../../../arch/powerpc/include
-- 
GitLab


From 5b401e4e9ac4c6a62b9f6562d0250c1a56477ff2 Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Tue, 8 Feb 2022 16:21:50 +0100
Subject: [PATCH 1131/1586] lib/raid6: Include <asm/ppc-opcode.h> for VPERMXOR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Ubuntu 21.10 (ppc64le) building raid6test with gcc (Ubuntu
11.2.0-7ubuntu2) 11.2.0 fails with the error below.

    gcc -I.. -I ../../../include -g -O2                       \
             -I../../../arch/powerpc/include -DCONFIG_ALTIVEC \
             -c -o vpermxor1.o vpermxor1.c
    vpermxor1.c: In function ‘raid6_vpermxor1_gen_syndrome_real’:
    vpermxor1.c:64:29: error: expected string literal before ‘VPERMXOR’
       64 |   asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq0):"v"(gf_high), "v"(gf_low), "v"(wq0));
          |       ^~~~~~~~
    make: *** [Makefile:58: vpermxor1.o] Error 1

So, include the header asm/ppc-opcode.h defining this macro also when
not building the Linux kernel but only this too.

Cc: Matt Brown <matthew.brown.dev@gmail.com>
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 lib/raid6/vpermxor.uc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
index 10475dc423c15..1bfb127fbfe81 100644
--- a/lib/raid6/vpermxor.uc
+++ b/lib/raid6/vpermxor.uc
@@ -24,9 +24,9 @@
 #ifdef CONFIG_ALTIVEC
 
 #include <altivec.h>
+#include <asm/ppc-opcode.h>
 #ifdef __KERNEL__
 #include <asm/cputable.h>
-#include <asm/ppc-opcode.h>
 #include <asm/switch_to.h>
 #endif
 
-- 
GitLab


From 7d959f6e978cbbca90e26a192cc39480e977182f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Mar 2022 15:19:33 -0800
Subject: [PATCH 1132/1586] md: use msleep() in md_notify_reboot()

Calling mdelay(1000) from process context, even while a reboot
is in progress, does not make sense.

Using msleep() allows other threads to make progress.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: linux-raid@vger.kernel.org
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f88a9e948f3eb..d059e21f3c356 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9582,7 +9582,7 @@ static int md_notify_reboot(struct notifier_block *this,
 	 * driver, we do want to have a safe RAID driver ...
 	 */
 	if (need_delay)
-		mdelay(1000*1);
+		msleep(1000);
 
 	return NOTIFY_DONE;
 }
-- 
GitLab


From 15729ff8143f8135b03988a100a19e66d7cb7ecd Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 25 Nov 2021 19:15:10 +0100
Subject: [PATCH 1133/1586] Revert "Revert "block, bfq: honor already-setup
 queue merges""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A crash [1] happened to be triggered in conjunction with commit
2d52c58b9c9b ("block, bfq: honor already-setup queue merges"). The
latter was then reverted by commit ebc69e897e17 ("Revert "block, bfq:
honor already-setup queue merges""). Yet, the reverted commit was not
the one introducing the bug. In fact, it actually triggered a UAF
introduced by a different commit, and now fixed by commit d29bd41428cf
("block, bfq: reset last_bfqq_created on group change").

So, there is no point in keeping commit 2d52c58b9c9b ("block, bfq:
honor already-setup queue merges") out. This commit restores it.

[1] https://bugzilla.kernel.org/show_bug.cgi?id=214503

Reported-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20211125181510.15004-1-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e234eb408ed0a..975fd30a07c93 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2782,6 +2782,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
 	 * are likely to increase the throughput.
 	 */
 	bfqq->new_bfqq = new_bfqq;
+	/*
+	 * The above assignment schedules the following redirections:
+	 * each time some I/O for bfqq arrives, the process that
+	 * generated that I/O is disassociated from bfqq and
+	 * associated with new_bfqq. Here we increases new_bfqq->ref
+	 * in advance, adding the number of processes that are
+	 * expected to be associated with new_bfqq as they happen to
+	 * issue I/O.
+	 */
 	new_bfqq->ref += process_refs;
 	return new_bfqq;
 }
@@ -2844,6 +2853,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 {
 	struct bfq_queue *in_service_bfqq, *new_bfqq;
 
+	/* if a merge has already been setup, then proceed with that first */
+	if (bfqq->new_bfqq)
+		return bfqq->new_bfqq;
+
 	/*
 	 * Check delayed stable merge for rotational or non-queueing
 	 * devs. For this branch to be executed, bfqq must not be
@@ -2945,9 +2958,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	if (bfq_too_late_for_merging(bfqq))
 		return NULL;
 
-	if (bfqq->new_bfqq)
-		return bfqq->new_bfqq;
-
 	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
 		return NULL;
 
-- 
GitLab


From 4d805131abf219e0019715f1cf29763c613aae07 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:14 +0800
Subject: [PATCH 1134/1586] blk-mq: figure out correct numa node for hw queue

The current code always uses default queue map and hw queue index
for figuring out the numa node for hw queue, this way isn't correct
because blk-mq supports three queue maps, and the correct queue map
should be used for the specified hw queue.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a05ce77250316..5d25abf9e551c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3107,15 +3107,41 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags)
 	blk_mq_free_tags(tags);
 }
 
+static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set,
+		unsigned int hctx_idx)
+{
+	int i;
+
+	for (i = 0; i < set->nr_maps; i++) {
+		unsigned int start = set->map[i].queue_offset;
+		unsigned int end = start + set->map[i].nr_queues;
+
+		if (hctx_idx >= start && hctx_idx < end)
+			break;
+	}
+
+	if (i >= set->nr_maps)
+		i = HCTX_TYPE_DEFAULT;
+
+	return i;
+}
+
+static int blk_mq_get_hctx_node(struct blk_mq_tag_set *set,
+		unsigned int hctx_idx)
+{
+	enum hctx_type type = hctx_idx_to_type(set, hctx_idx);
+
+	return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);
+}
+
 static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					       unsigned int hctx_idx,
 					       unsigned int nr_tags,
 					       unsigned int reserved_tags)
 {
+	int node = blk_mq_get_hctx_node(set, hctx_idx);
 	struct blk_mq_tags *tags;
-	int node;
 
-	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
 	if (node == NUMA_NO_NODE)
 		node = set->numa_node;
 
@@ -3164,10 +3190,9 @@ static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
 			    unsigned int hctx_idx, unsigned int depth)
 {
 	unsigned int i, j, entries_per_page, max_order = 4;
+	int node = blk_mq_get_hctx_node(set, hctx_idx);
 	size_t rq_size, left;
-	int node;
 
-	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
 	if (node == NUMA_NO_NODE)
 		node = set->numa_node;
 
@@ -3941,10 +3966,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	/* protect against switching io scheduler  */
 	mutex_lock(&q->sysfs_lock);
 	for (i = 0; i < set->nr_hw_queues; i++) {
-		int node;
+		int node = blk_mq_get_hctx_node(set, i);
 		struct blk_mq_hw_ctx *hctx;
 
-		node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
 		/*
 		 * If the hw queue has been mapped to another numa node,
 		 * we need to realloc the hctx. If allocation fails, fallback
-- 
GitLab


From 306f13ee16424805d602d427a66ea38078d473b0 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:15 +0800
Subject: [PATCH 1135/1586] blk-mq: simplify reallocation of hw ctxs a bit

blk_mq_alloc_and_init_hctx() has already taken reuse into account, so
no need to do it outside, then we can simplify blk_mq_realloc_hw_ctxs().

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d25abf9e551c..64fddb36c93ce 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3966,29 +3966,24 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	/* protect against switching io scheduler  */
 	mutex_lock(&q->sysfs_lock);
 	for (i = 0; i < set->nr_hw_queues; i++) {
+		int old_node;
 		int node = blk_mq_get_hctx_node(set, i);
-		struct blk_mq_hw_ctx *hctx;
+		struct blk_mq_hw_ctx *old_hctx = hctxs[i];
 
-		/*
-		 * If the hw queue has been mapped to another numa node,
-		 * we need to realloc the hctx. If allocation fails, fallback
-		 * to use the previous one.
-		 */
-		if (hctxs[i] && (hctxs[i]->numa_node == node))
-			continue;
+		if (old_hctx) {
+			old_node = old_hctx->numa_node;
+			blk_mq_exit_hctx(q, set, old_hctx, i);
+		}
 
-		hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
-		if (hctx) {
-			if (hctxs[i])
-				blk_mq_exit_hctx(q, set, hctxs[i], i);
-			hctxs[i] = hctx;
-		} else {
-			if (hctxs[i])
-				pr_warn("Allocate new hctx on node %d fails,\
-						fallback to previous one on node %d\n",
-						node, hctxs[i]->numa_node);
-			else
+		hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i, node);
+		if (!hctxs[i]) {
+			if (!old_hctx)
 				break;
+			pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n",
+					node, old_node);
+			hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i,
+					old_node);
+			WARN_ON_ONCE(!hctxs[i]);
 		}
 	}
 	/*
-- 
GitLab


From 42ee3061293e206bc0f3a084feea1304c61c137a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:16 +0800
Subject: [PATCH 1136/1586] blk-mq: reconfigure poll after queue map is changed

queue map can be changed when updating nr_hw_queues, so we need to
reconfigure queue's poll capability. Add one helper for doing this job.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 64fddb36c93ce..57ae9df0f4dc5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4010,6 +4010,17 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	mutex_unlock(&q->sysfs_lock);
 }
 
+static void blk_mq_update_poll_flag(struct request_queue *q)
+{
+	struct blk_mq_tag_set *set = q->tag_set;
+
+	if (set->nr_maps > HCTX_TYPE_POLL &&
+	    set->map[HCTX_TYPE_POLL].nr_queues)
+		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+}
+
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		struct request_queue *q)
 {
@@ -4044,9 +4055,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->tag_set = set;
 
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
-	if (set->nr_maps > HCTX_TYPE_POLL &&
-	    set->map[HCTX_TYPE_POLL].nr_queues)
-		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
+	blk_mq_update_poll_flag(q);
 
 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
 	INIT_LIST_HEAD(&q->requeue_list);
@@ -4512,6 +4521,7 @@ fallback:
 	blk_mq_update_queue_map(set);
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_realloc_hw_ctxs(set, q);
+		blk_mq_update_poll_flag(q);
 		if (q->nr_hw_queues != set->nr_hw_queues) {
 			int i = prev_nr_hw_queues;
 
-- 
GitLab


From de0328d3a253a339be14a80fe2a0256ec26867da Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:17 +0800
Subject: [PATCH 1137/1586] block: mtip32xx: don't touch q->queue_hw_ctx

q->queue_hw_ctx is really one blk-mq internal structure for retrieving
hctx via its index, not supposed to be used by drivers. Meantime drivers
can get the tags structure easily from tagset.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-5-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index cba956881d55c..2d43ab5b5cc04 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -160,9 +160,7 @@ static bool mtip_check_surprise_removal(struct driver_data *dd)
 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
 					  unsigned int tag)
 {
-	struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
-
-	return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag));
+	return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(dd->tags.tags[0], tag));
 }
 
 /*
-- 
GitLab


From 4f481208749a22d3570073e629dbc27d7d27c8da Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:18 +0800
Subject: [PATCH 1138/1586] blk-mq: prepare for implementing hctx table via
 xarray

It is inevitable to cause use-after-free on q->queue_hw_ctx between
queue_for_each_hw_ctx() and blk_mq_update_nr_hw_queues(). And converting
to xarray can fix the uaf, meantime code gets cleaner.

Prepare for converting q->queue_hctx_ctx into xarray, one thing is that
xa_for_each() can only accept 'unsigned long' as index, so changes type
of hctx index of queue_for_each_hw_ctx() into 'unsigned long'.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220308073219.91173-6-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c        |  6 +++---
 block/blk-mq-sched.c          |  9 +++++----
 block/blk-mq-sysfs.c          | 16 ++++++++++------
 block/blk-mq-tag.c            |  2 +-
 block/blk-mq.c                | 30 ++++++++++++++++--------------
 drivers/block/rnbd/rnbd-clt.c |  2 +-
 6 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3a790eb4995c6..e2880f6deb348 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -707,7 +707,7 @@ static void debugfs_create_files(struct dentry *parent, void *data,
 void blk_mq_debugfs_register(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
 
@@ -780,7 +780,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
 void blk_mq_debugfs_register_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_debugfs_register_hctx(q, hctx);
@@ -789,7 +789,7 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q)
 void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_debugfs_unregister_hctx(hctx);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 55488ba978232..e6ad8f7614742 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -515,7 +515,7 @@ static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
 static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->sched_tags) {
@@ -550,9 +550,10 @@ static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
 
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
-	unsigned int i, flags = q->tag_set->flags;
+	unsigned int flags = q->tag_set->flags;
 	struct blk_mq_hw_ctx *hctx;
 	struct elevator_queue *eq;
+	unsigned long i;
 	int ret;
 
 	if (!e) {
@@ -618,7 +619,7 @@ err_free_map_and_rqs:
 void blk_mq_sched_free_rqs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
 		blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
@@ -635,7 +636,7 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 {
 	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
+	unsigned long i;
 	unsigned int flags = 0;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6747865740750..c08426975856e 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -206,7 +206,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	lockdep_assert_held(&q->sysfs_dir_lock);
 
@@ -255,7 +255,8 @@ void blk_mq_sysfs_init(struct request_queue *q)
 int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int ret, i;
+	unsigned long i, j;
+	int ret;
 
 	WARN_ON_ONCE(!q->kobj.parent);
 	lockdep_assert_held(&q->sysfs_dir_lock);
@@ -278,8 +279,10 @@ out:
 	return ret;
 
 unreg:
-	while (--i >= 0)
-		blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
+	queue_for_each_hw_ctx(q, hctx, j) {
+		if (j < i)
+			blk_mq_unregister_hctx(hctx);
+	}
 
 	kobject_uevent(q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(q->mq_kobj);
@@ -290,7 +293,7 @@ unreg:
 void blk_mq_sysfs_unregister(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	mutex_lock(&q->sysfs_dir_lock);
 	if (!q->mq_sysfs_init_done)
@@ -306,7 +309,8 @@ unlock:
 int blk_mq_sysfs_register(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i, ret = 0;
+	unsigned long i;
+	int ret = 0;
 
 	mutex_lock(&q->sysfs_dir_lock);
 	if (!q->mq_sysfs_init_done)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 0fd409b8e86ef..1850a4225e12c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -515,7 +515,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
 		bt_for_each(NULL, q, btags, fn, priv, false);
 	} else {
 		struct blk_mq_hw_ctx *hctx;
-		int i;
+		unsigned long i;
 
 		queue_for_each_hw_ctx(q, hctx, i) {
 			struct blk_mq_tags *tags = hctx->tags;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 57ae9df0f4dc5..bffdd71c670df 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -312,7 +312,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		if (blk_mq_hw_queue_mapped(hctx))
@@ -1442,7 +1442,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
 		container_of(work, struct request_queue, timeout_work);
 	unsigned long next = 0;
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	/* A deadlock might occur if a request is stuck requiring a
 	 * timeout at the same time a queue freeze is waiting
@@ -2143,7 +2143,7 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
 void blk_mq_run_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx, *sq_hctx;
-	int i;
+	unsigned long i;
 
 	sq_hctx = NULL;
 	if (blk_mq_has_sqsched(q))
@@ -2171,7 +2171,7 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues);
 void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
 {
 	struct blk_mq_hw_ctx *hctx, *sq_hctx;
-	int i;
+	unsigned long i;
 
 	sq_hctx = NULL;
 	if (blk_mq_has_sqsched(q))
@@ -2209,7 +2209,7 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
 bool blk_mq_queue_stopped(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		if (blk_mq_hctx_stopped(hctx))
@@ -2248,7 +2248,7 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 void blk_mq_stop_hw_queues(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_stop_hw_queue(hctx);
@@ -2266,7 +2266,7 @@ EXPORT_SYMBOL(blk_mq_start_hw_queue);
 void blk_mq_start_hw_queues(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_start_hw_queue(hctx);
@@ -2286,7 +2286,7 @@ EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_start_stopped_hw_queue(hctx, async);
@@ -3446,7 +3446,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
 		struct blk_mq_tag_set *set, int nr_queue)
 {
 	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (i == nr_queue)
@@ -3637,7 +3637,8 @@ static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
 
 static void blk_mq_map_swqueue(struct request_queue *q)
 {
-	unsigned int i, j, hctx_idx;
+	unsigned int j, hctx_idx;
+	unsigned long i;
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	struct blk_mq_tag_set *set = q->tag_set;
@@ -3744,7 +3745,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (shared) {
@@ -3844,7 +3845,7 @@ static int blk_mq_alloc_ctxs(struct request_queue *q)
 void blk_mq_release(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx, *next;
-	int i;
+	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
@@ -4362,7 +4363,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 	struct blk_mq_hw_ctx *hctx;
-	int i, ret;
+	int ret;
+	unsigned long i;
 
 	if (!set)
 		return -EINVAL;
@@ -4738,7 +4740,7 @@ void blk_mq_cancel_work_sync(struct request_queue *q)
 {
 	if (queue_is_mq(q)) {
 		struct blk_mq_hw_ctx *hctx;
-		int i;
+		unsigned long i;
 
 		cancel_delayed_work_sync(&q->requeue_work);
 
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index c08971de369fc..58304f978e103 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1343,7 +1343,7 @@ static inline void rnbd_init_hw_queue(struct rnbd_clt_dev *dev,
 
 static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
 {
-	int i;
+	unsigned long i;
 	struct blk_mq_hw_ctx *hctx;
 	struct rnbd_queue *q;
 
-- 
GitLab


From 5189810a6687b6f01d35b4a7b407eb1f6edb97e5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 14:41:22 +0200
Subject: [PATCH 1139/1586] f2fs: don't pass a bio to f2fs_target_device

Set the bdev at bio allocation time by changing the f2fs_target_device
calling conventions, so that no bio needs to be passed in.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20220228124123.856027-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/f2fs/data.c | 25 +++++++++++++------------
 fs/f2fs/f2fs.h |  2 +-
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e71dde8de0db0..59dd0347c4bc8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -354,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio)
 }
 
 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
-				block_t blk_addr, struct bio *bio)
+		block_t blk_addr, sector_t *sector)
 {
 	struct block_device *bdev = sbi->sb->s_bdev;
 	int i;
@@ -369,10 +369,9 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 			}
 		}
 	}
-	if (bio) {
-		bio_set_dev(bio, bdev);
-		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
-	}
+
+	if (sector)
+		*sector = SECTOR_FROM_BLOCK(blk_addr);
 	return bdev;
 }
 
@@ -392,11 +391,13 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
+	struct block_device *bdev;
+	sector_t sector;
 	struct bio *bio;
 
-	bio = bio_alloc_bioset(NULL, npages, 0, GFP_NOIO, &f2fs_bioset);
-
-	f2fs_target_device(sbi, fio->new_blkaddr, bio);
+	bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
+	bio = bio_alloc_bioset(bdev, npages, 0, GFP_NOIO, &f2fs_bioset);
+	bio->bi_iter.bi_sector = sector;
 	if (is_read_io(fio->op)) {
 		bio->bi_end_io = f2fs_read_end_io;
 		bio->bi_private = NULL;
@@ -984,15 +985,15 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 	struct bio *bio;
 	struct bio_post_read_ctx *ctx = NULL;
 	unsigned int post_read_steps = 0;
+	sector_t sector;
+	struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
 
-	bio = bio_alloc_bioset(NULL, bio_max_segs(nr_pages), REQ_OP_READ,
+	bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), REQ_OP_READ,
 			       for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
-
+	bio->bi_iter.bi_sector = sector;
 	f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
-
-	f2fs_target_device(sbi, blkaddr, bio);
 	bio->bi_end_io = f2fs_read_end_io;
 
 	if (fscrypt_inode_uses_fs_layer_crypto(inode))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb22fa91c2b26..37faae388fe01 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3631,7 +3631,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio);
 int f2fs_merge_page_bio(struct f2fs_io_info *fio);
 void f2fs_submit_page_write(struct f2fs_io_info *fio);
 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
-			block_t blk_addr, struct bio *bio);
+		block_t blk_addr, sector_t *sector);
 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
 void f2fs_set_data_blkaddr(struct dnode_of_data *dn);
 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
-- 
GitLab


From 64bf0eef0171912f7c2f3ea30ee6ad7a2ad0a511 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 14:41:23 +0200
Subject: [PATCH 1140/1586] f2fs: pass the bio operation to bio_alloc_bioset

Refactor block I/O code so that the bio operation and known flags are set
at bio allocation time.  Only the later updated flags are updated on the
fly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20220228124123.856027-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/f2fs/data.c | 70 +++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 41 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 59dd0347c4bc8..fc077bce679d9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -388,6 +388,24 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 	return 0;
 }
 
+static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
+{
+	unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
+	unsigned int fua_flag = io_flag & temp_mask;
+	unsigned int meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+
+	/*
+	 * data/node io flag bits per temp:
+	 *      REQ_META     |      REQ_FUA      |
+	 *    5 |    4 |   3 |    2 |    1 |   0 |
+	 * Cold | Warm | Hot | Cold | Warm | Hot |
+	 */
+	if ((1 << fio->temp) & meta_flag)
+		fio->op_flags |= REQ_META;
+	if ((1 << fio->temp) & fua_flag)
+		fio->op_flags |= REQ_FUA;
+}
+
 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
@@ -395,8 +413,14 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 	sector_t sector;
 	struct bio *bio;
 
+	if (fio->type == DATA)
+		__attach_io_flag(fio, sbi->data_io_flag);
+	else if (fio->type == NODE)
+		__attach_io_flag(fio, sbi->node_io_flag);
+
 	bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
-	bio = bio_alloc_bioset(bdev, npages, 0, GFP_NOIO, &f2fs_bioset);
+	bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags, GFP_NOIO,
+			       &f2fs_bioset);
 	bio->bi_iter.bi_sector = sector;
 	if (is_read_io(fio->op)) {
 		bio->bi_end_io = f2fs_read_end_io;
@@ -501,34 +525,6 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi,
 	__submit_bio(sbi, bio, type);
 }
 
-static void __attach_io_flag(struct f2fs_io_info *fio)
-{
-	struct f2fs_sb_info *sbi = fio->sbi;
-	unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
-	unsigned int io_flag, fua_flag, meta_flag;
-
-	if (fio->type == DATA)
-		io_flag = sbi->data_io_flag;
-	else if (fio->type == NODE)
-		io_flag = sbi->node_io_flag;
-	else
-		return;
-
-	fua_flag = io_flag & temp_mask;
-	meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
-
-	/*
-	 * data/node io flag bits per temp:
-	 *      REQ_META     |      REQ_FUA      |
-	 *    5 |    4 |   3 |    2 |    1 |   0 |
-	 * Cold | Warm | Hot | Cold | Warm | Hot |
-	 */
-	if ((1 << fio->temp) & meta_flag)
-		fio->op_flags |= REQ_META;
-	if ((1 << fio->temp) & fua_flag)
-		fio->op_flags |= REQ_FUA;
-}
-
 static void __submit_merged_bio(struct f2fs_bio_info *io)
 {
 	struct f2fs_io_info *fio = &io->fio;
@@ -536,9 +532,6 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;
 
-	__attach_io_flag(fio);
-	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
-
 	if (is_read_io(fio->op))
 		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 	else
@@ -596,10 +589,9 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
-		io->fio.op = REQ_OP_WRITE;
-		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
+		io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
 		if (!test_opt(sbi, NOBARRIER))
-			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+			io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
 	up_write(&io->io_rwsem);
@@ -680,9 +672,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	if (fio->io_wbc && !is_read_io(fio->op))
 		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
 
-	__attach_io_flag(fio);
-	bio_set_op_attrs(bio, fio->op, fio->op_flags);
-
 	inc_page_count(fio->sbi, is_read_io(fio->op) ?
 			__read_io_type(page): WB_DATA_TYPE(fio->page));
 
@@ -876,10 +865,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 alloc_new:
 	if (!bio) {
 		bio = __bio_alloc(fio, BIO_MAX_VECS);
-		__attach_io_flag(fio);
 		f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 				       fio->page->index, fio, GFP_NOIO);
-		bio_set_op_attrs(bio, fio->op, fio->op_flags);
 
 		add_bio_entry(fio->sbi, bio, page, fio->temp);
 	} else {
@@ -988,7 +975,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 	sector_t sector;
 	struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
 
-	bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), REQ_OP_READ,
+	bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
+			       REQ_OP_READ | op_flag,
 			       for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
-- 
GitLab


From 4e5cc99e1e485954a9c09872e0eeea570fb2b5a5 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 15:32:19 +0800
Subject: [PATCH 1141/1586] blk-mq: manage hctx map via xarray

First code becomes more clean by switching to xarray from plain array.

Second use-after-free on q->queue_hw_ctx can be fixed because
queue_for_each_hw_ctx() may be run when updating nr_hw_queues is
in-progress. With this patch, q->hctx_table is defined as xarray, and
this structure will share same lifetime with request queue, so
queue_for_each_hw_ctx() can use q->hctx_table to lookup hctx reliably.

Reported-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220308073219.91173-7-ming.lei@redhat.com
[axboe: fix blk_mq_hw_ctx forward declaration]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.h |  2 ++
 block/blk-mq-tag.c     |  2 +-
 block/blk-mq.c         | 62 ++++++++++++++++--------------------------
 block/blk-mq.h         |  2 +-
 include/linux/blk-mq.h |  3 +-
 include/linux/blkdev.h |  2 +-
 6 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index a68aa6041a10d..69918f4170d69 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -6,6 +6,8 @@
 
 #include <linux/seq_file.h>
 
+struct blk_mq_hw_ctx;
+
 struct blk_mq_debugfs_attr {
 	const char *name;
 	umode_t mode;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1850a4225e12c..68ac23d0b6406 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -498,7 +498,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
 		void *priv)
 {
 	/*
-	 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
+	 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
 	 * while the queue is frozen. So we can use q_usage_counter to avoid
 	 * racing with it.
 	 */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bffdd71c670df..e8d6d7a05a43a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -71,7 +71,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
 static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
 		blk_qc_t qc)
 {
-	return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT];
+	return xa_load(&q->hctx_table,
+			(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
 }
 
 static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
@@ -573,7 +574,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 	 * If not tell the caller that it should skip this queue.
 	 */
 	ret = -EXDEV;
-	data.hctx = q->queue_hw_ctx[hctx_idx];
+	data.hctx = xa_load(&q->hctx_table, hctx_idx);
 	if (!blk_mq_hw_queue_mapped(data.hctx))
 		goto out_queue_exit;
 	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
@@ -3437,6 +3438,8 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 
 	blk_mq_remove_cpuhp(hctx);
 
+	xa_erase(&q->hctx_table, hctx_idx);
+
 	spin_lock(&q->unused_hctx_lock);
 	list_add(&hctx->hctx_list, &q->unused_hctx_list);
 	spin_unlock(&q->unused_hctx_lock);
@@ -3476,8 +3479,15 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
 				hctx->numa_node))
 		goto exit_hctx;
+
+	if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
+		goto exit_flush_rq;
+
 	return 0;
 
+ exit_flush_rq:
+	if (set->ops->exit_request)
+		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
  exit_hctx:
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
@@ -3856,7 +3866,7 @@ void blk_mq_release(struct request_queue *q)
 		kobject_put(&hctx->kobj);
 	}
 
-	kfree(q->queue_hw_ctx);
+	xa_destroy(&q->hctx_table);
 
 	/*
 	 * release .mq_kobj and sw queue's kobject now because
@@ -3945,46 +3955,28 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
-	int i, j, end;
-	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
-
-	if (q->nr_hw_queues < set->nr_hw_queues) {
-		struct blk_mq_hw_ctx **new_hctxs;
-
-		new_hctxs = kcalloc_node(set->nr_hw_queues,
-				       sizeof(*new_hctxs), GFP_KERNEL,
-				       set->numa_node);
-		if (!new_hctxs)
-			return;
-		if (hctxs)
-			memcpy(new_hctxs, hctxs, q->nr_hw_queues *
-			       sizeof(*hctxs));
-		q->queue_hw_ctx = new_hctxs;
-		kfree(hctxs);
-		hctxs = new_hctxs;
-	}
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i, j;
 
 	/* protect against switching io scheduler  */
 	mutex_lock(&q->sysfs_lock);
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		int old_node;
 		int node = blk_mq_get_hctx_node(set, i);
-		struct blk_mq_hw_ctx *old_hctx = hctxs[i];
+		struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);
 
 		if (old_hctx) {
 			old_node = old_hctx->numa_node;
 			blk_mq_exit_hctx(q, set, old_hctx, i);
 		}
 
-		hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i, node);
-		if (!hctxs[i]) {
+		if (!blk_mq_alloc_and_init_hctx(set, q, i, node)) {
 			if (!old_hctx)
 				break;
 			pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n",
 					node, old_node);
-			hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i,
-					old_node);
-			WARN_ON_ONCE(!hctxs[i]);
+			hctx = blk_mq_alloc_and_init_hctx(set, q, i, old_node);
+			WARN_ON_ONCE(!hctx);
 		}
 	}
 	/*
@@ -3993,21 +3985,13 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	 */
 	if (i != set->nr_hw_queues) {
 		j = q->nr_hw_queues;
-		end = i;
 	} else {
 		j = i;
-		end = q->nr_hw_queues;
 		q->nr_hw_queues = set->nr_hw_queues;
 	}
 
-	for (; j < end; j++) {
-		struct blk_mq_hw_ctx *hctx = hctxs[j];
-
-		if (hctx) {
-			blk_mq_exit_hctx(q, set, hctx, j);
-			hctxs[j] = NULL;
-		}
-	}
+	xa_for_each_start(&q->hctx_table, j, hctx, j)
+		blk_mq_exit_hctx(q, set, hctx, j);
 	mutex_unlock(&q->sysfs_lock);
 }
 
@@ -4046,6 +4030,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	INIT_LIST_HEAD(&q->unused_hctx_list);
 	spin_lock_init(&q->unused_hctx_lock);
 
+	xa_init(&q->hctx_table);
+
 	blk_mq_realloc_hw_ctxs(set, q);
 	if (!q->nr_hw_queues)
 		goto err_hctxs;
@@ -4075,7 +4061,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	return 0;
 
 err_hctxs:
-	kfree(q->queue_hw_ctx);
+	xa_destroy(&q->hctx_table);
 	q->nr_hw_queues = 0;
 	blk_mq_sysfs_deinit(q);
 err_poll:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 948791ea2a3e8..2615bd58bad3c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -83,7 +83,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
 							  enum hctx_type type,
 							  unsigned int cpu)
 {
-	return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
+	return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
 }
 
 static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3a41d50b85d3a..7aa5c54901a93 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -917,8 +917,7 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
 }
 
 #define queue_for_each_hw_ctx(q, hctx, i)				\
-	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
-	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
+	xa_for_each(&(q)->hctx_table, (i), (hctx))
 
 #define hctx_for_each_ctx(hctx, ctx, i)					\
 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e19947d84f128..d982171469bcb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -355,7 +355,7 @@ struct request_queue {
 	unsigned int		queue_depth;
 
 	/* hw dispatch queues */
-	struct blk_mq_hw_ctx	**queue_hw_ctx;
+	struct xarray		hctx_table;
 	unsigned int		nr_hw_queues;
 
 	/*
-- 
GitLab


From 41fa722239b46f0c033da94746212344175cdaa0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:47 +0100
Subject: [PATCH 1142/1586] blk-mq: do not include passthrough requests in I/O
 accounting

I/O accounting buckets I/O into the read/write/discard categories into
which passthrough I/O does not fit at all.  It also accounts to the
block_device, which may not even exist for passthrough I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 11 ++++++++---
 block/blk.h    |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e8d6d7a05a43a..40f634ab70266 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -884,10 +884,15 @@ static inline void blk_account_io_done(struct request *req, u64 now)
 
 static void __blk_account_io_start(struct request *rq)
 {
-	/* passthrough requests can hold bios that do not have ->bi_bdev set */
-	if (rq->bio && rq->bio->bi_bdev)
+	/*
+	 * All non-passthrough requests are created from a bio with one
+	 * exception: when a flush command that is part of a flush sequence
+	 * generated by the state machine in blk-flush.c is cloned onto the
+	 * lower device by dm-multipath we can get here without a bio.
+	 */
+	if (rq->bio)
 		rq->part = rq->bio->bi_bdev;
-	else if (rq->q->disk)
+	else
 		rq->part = rq->q->disk->part0;
 
 	part_stat_lock();
diff --git a/block/blk.h b/block/blk.h
index ebaa59ca46ca6..6f21859c7f0ff 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -325,7 +325,7 @@ int blk_dev_init(void);
  */
 static inline bool blk_do_io_stat(struct request *rq)
 {
-	return (rq->rq_flags & RQF_IO_STAT) && rq->q->disk;
+	return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
 }
 
 void update_io_ticks(struct block_device *part, unsigned long now, bool end);
-- 
GitLab


From e02657ea7b86d1c41e095f49e4f7c7bb24bbee64 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:51:48 +0100
Subject: [PATCH 1143/1586] blk-mq: handle already freed tags gracefully in
 blk_mq_free_rqs

To simplify further changes allow for double calling blk_mq_free_rqs on
a queue.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
[hch: split out from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 40f634ab70266..f7ef8e2ab935b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3071,6 +3071,9 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	struct blk_mq_tags *drv_tags;
 	struct page *page;
 
+	if (list_empty(&tags->page_list))
+		return;
+
 	if (blk_mq_is_shared_tags(set->flags))
 		drv_tags = set->shared_tags;
 	else
-- 
GitLab


From e7f76552277ca79d9dfe4c59f7b3620a98467f9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:49 +0100
Subject: [PATCH 1144/1586] scsi: don't use disk->private_data to find the
 scsi_driver

Requiring every ULP to have the scsi_drive as first member of the
private data is rather fragile and not necessary anyway.  Just use
the driver hanging off the SCSI device instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c          | 3 +--
 drivers/scsi/sd.h          | 3 +--
 drivers/scsi/sr.c          | 5 ++---
 drivers/scsi/sr.h          | 1 -
 drivers/scsi/st.c          | 1 -
 drivers/scsi/st.h          | 1 -
 include/scsi/scsi_cmnd.h   | 9 ---------
 include/scsi/scsi_driver.h | 9 +++++++--
 8 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2d648d27bfd71..2a1e19e871d30 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3515,7 +3515,6 @@ static int sd_probe(struct device *dev)
 	}
 
 	sdkp->device = sdp;
-	sdkp->driver = &sd_template;
 	sdkp->disk = gd;
 	sdkp->index = index;
 	sdkp->max_retries = SD_MAX_RETRIES;
@@ -3548,7 +3547,7 @@ static int sd_probe(struct device *dev)
 	gd->minors = SD_MINORS;
 
 	gd->fops = &sd_fops;
-	gd->private_data = &sdkp->driver;
+	gd->private_data = sdkp;
 
 	/* defaults, until the device tells us otherwise */
 	sdp->sector_size = 512;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 2e5932bde43d1..303aa1c23aefb 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -68,7 +68,6 @@ enum {
 };
 
 struct scsi_disk {
-	struct scsi_driver *driver;	/* always &sd_template */
 	struct scsi_device *device;
 	struct device	dev;
 	struct gendisk	*disk;
@@ -131,7 +130,7 @@ struct scsi_disk {
 
 static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
 {
-	return container_of(disk->private_data, struct scsi_disk, driver);
+	return disk->private_data;
 }
 
 #define sd_printk(prefix, sdsk, fmt, a...)				\
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index f925b1f1f9ada..569bda76a5175 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -147,7 +147,7 @@ static void sr_kref_release(struct kref *kref);
 
 static inline struct scsi_cd *scsi_cd(struct gendisk *disk)
 {
-	return container_of(disk->private_data, struct scsi_cd, driver);
+	return disk->private_data;
 }
 
 static int sr_runtime_suspend(struct device *dev)
@@ -692,7 +692,6 @@ static int sr_probe(struct device *dev)
 
 	cd->device = sdev;
 	cd->disk = disk;
-	cd->driver = &sr_template;
 	cd->capacity = 0x1fffff;
 	cd->device->changed = 1;	/* force recheck CD type */
 	cd->media_present = 1;
@@ -713,7 +712,7 @@ static int sr_probe(struct device *dev)
 	sr_vendor_init(cd);
 
 	set_capacity(disk, cd->capacity);
-	disk->private_data = &cd->driver;
+	disk->private_data = cd;
 
 	if (register_cdrom(disk, &cd->cdi))
 		goto fail_minor;
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 1609f02ed29ac..d80af3fcb6f97 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -32,7 +32,6 @@ struct scsi_device;
 
 
 typedef struct scsi_cd {
-	struct scsi_driver *driver;
 	unsigned capacity;	/* size in blocks                       */
 	struct scsi_device *device;
 	unsigned int vendor;	/* vendor code, see sr_vendor.c         */
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e869e90e05afe..ebe9412c86f43 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4276,7 +4276,6 @@ static int st_probe(struct device *dev)
 		goto out_buffer_free;
 	}
 	kref_init(&tpnt->kref);
-	tpnt->driver = &st_template;
 
 	tpnt->device = SDp;
 	if (SDp->scsi_level <= 2)
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index c0ef0d9aaf8a2..7a68eaba7e810 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -117,7 +117,6 @@ struct scsi_tape_stats {
 
 /* The tape drive descriptor */
 struct scsi_tape {
-	struct scsi_driver *driver;
 	struct scsi_device *device;
 	struct mutex lock;	/* For serialization */
 	struct completion wait;	/* For SCSI commands */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 6794d7322cbde..e3a4c67794b14 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -13,7 +13,6 @@
 #include <scsi/scsi_request.h>
 
 struct Scsi_Host;
-struct scsi_driver;
 
 /*
  * MAX_COMMAND_SIZE is:
@@ -159,14 +158,6 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
 	return cmd + 1;
 }
 
-/* make sure not to use it with passthrough commands */
-static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
-{
-	struct request *rq = scsi_cmd_to_rq(cmd);
-
-	return *(struct scsi_driver **)rq->q->disk->private_data;
-}
-
 void scsi_done(struct scsi_cmnd *cmd);
 
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 6dffa8555a390..4ce1988b2ba01 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -4,11 +4,10 @@
 
 #include <linux/blk_types.h>
 #include <linux/device.h>
+#include <scsi/scsi_cmnd.h>
 
 struct module;
 struct request;
-struct scsi_cmnd;
-struct scsi_device;
 
 struct scsi_driver {
 	struct device_driver	gendrv;
@@ -31,4 +30,10 @@ extern int scsi_register_interface(struct class_interface *);
 #define scsi_unregister_interface(intf) \
 	class_interface_unregister(intf)
 
+/* make sure not to use it with passthrough commands */
+static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
+{
+	return to_scsi_driver(cmd->device->sdev_gendev.driver);
+}
+
 #endif /* _SCSI_SCSI_DRIVER_H */
-- 
GitLab


From fad45c3007a18064da759b4dba35eb722bc64e97 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:50 +0100
Subject: [PATCH 1145/1586] sd: rename the scsi_disk.dev field

dev is very hard to grep for.  Give the field a more descriptive name and
documents its purpose.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c | 22 +++++++++++-----------
 drivers/scsi/sd.h |  9 +++++++--
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2a1e19e871d30..7479e7cb36b43 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -672,7 +672,7 @@ static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
 	if (disk->private_data) {
 		sdkp = scsi_disk(disk);
 		if (scsi_device_get(sdkp->device) == 0)
-			get_device(&sdkp->dev);
+			get_device(&sdkp->disk_dev);
 		else
 			sdkp = NULL;
 	}
@@ -685,7 +685,7 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 	struct scsi_device *sdev = sdkp->device;
 
 	mutex_lock(&sd_ref_mutex);
-	put_device(&sdkp->dev);
+	put_device(&sdkp->disk_dev);
 	scsi_device_put(sdev);
 	mutex_unlock(&sd_ref_mutex);
 }
@@ -3529,14 +3529,14 @@ static int sd_probe(struct device *dev)
 					     SD_MOD_TIMEOUT);
 	}
 
-	device_initialize(&sdkp->dev);
-	sdkp->dev.parent = get_device(dev);
-	sdkp->dev.class = &sd_disk_class;
-	dev_set_name(&sdkp->dev, "%s", dev_name(dev));
+	device_initialize(&sdkp->disk_dev);
+	sdkp->disk_dev.parent = get_device(dev);
+	sdkp->disk_dev.class = &sd_disk_class;
+	dev_set_name(&sdkp->disk_dev, "%s", dev_name(dev));
 
-	error = device_add(&sdkp->dev);
+	error = device_add(&sdkp->disk_dev);
 	if (error) {
-		put_device(&sdkp->dev);
+		put_device(&sdkp->disk_dev);
 		goto out;
 	}
 
@@ -3577,7 +3577,7 @@ static int sd_probe(struct device *dev)
 
 	error = device_add_disk(dev, gd, NULL);
 	if (error) {
-		put_device(&sdkp->dev);
+		put_device(&sdkp->disk_dev);
 		goto out;
 	}
 
@@ -3628,7 +3628,7 @@ static int sd_remove(struct device *dev)
 	sdkp = dev_get_drvdata(dev);
 	scsi_autopm_get_device(sdkp->device);
 
-	device_del(&sdkp->dev);
+	device_del(&sdkp->disk_dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
 
@@ -3636,7 +3636,7 @@ static int sd_remove(struct device *dev)
 
 	mutex_lock(&sd_ref_mutex);
 	dev_set_drvdata(dev, NULL);
-	put_device(&sdkp->dev);
+	put_device(&sdkp->disk_dev);
 	mutex_unlock(&sd_ref_mutex);
 
 	return 0;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 303aa1c23aefb..0a33a4b68ffbc 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -69,7 +69,12 @@ enum {
 
 struct scsi_disk {
 	struct scsi_device *device;
-	struct device	dev;
+
+	/*
+	 * disk_dev is used to show attributes in /sys/class/scsi_disk/,
+	 * but otherwise not really needed.  Do not use for refcounting.
+	 */
+	struct device	disk_dev;
 	struct gendisk	*disk;
 	struct opal_dev *opal_dev;
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -126,7 +131,7 @@ struct scsi_disk {
 	unsigned	security : 1;
 	unsigned	ignore_medium_access_errors : 1;
 };
-#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
+#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
 
 static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
 {
-- 
GitLab


From c76c46fa04c42f7d3a494c526ce5f030da36b553 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:51 +0100
Subject: [PATCH 1146/1586] sd: call sd_zbc_release_disk before releasing the
 scsi_device reference

sd_zbc_release_disk accesses disk->device, so ensure that actually still has
a valid reference.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7479e7cb36b43..7bfebf5b2832d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3672,9 +3672,9 @@ static void scsi_disk_release(struct device *dev)
 
 	disk->private_data = NULL;
 	put_disk(disk);
-	put_device(&sdkp->device->sdev_gendev);
 
 	sd_zbc_release_disk(sdkp);
+	put_device(&sdkp->device->sdev_gendev);
 
 	kfree(sdkp);
 }
-- 
GitLab


From 534cf52aa9c18d87599a8811a39735c1b62742a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:52 +0100
Subject: [PATCH 1147/1586] sd: delay calling free_opal_dev

Call free_opal_dev from scsi_disk_release as the opal_dev field is accessed
from the ioctl handler, which isn't synchronized vs sd_release and thus
can be accessed during or after sd_release was called.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7bfebf5b2832d..346b8d62de7d1 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3632,8 +3632,6 @@ static int sd_remove(struct device *dev)
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
 
-	free_opal_dev(sdkp->opal_dev);
-
 	mutex_lock(&sd_ref_mutex);
 	dev_set_drvdata(dev, NULL);
 	put_device(&sdkp->disk_dev);
@@ -3675,6 +3673,7 @@ static void scsi_disk_release(struct device *dev)
 
 	sd_zbc_release_disk(sdkp);
 	put_device(&sdkp->device->sdev_gendev);
+	free_opal_dev(sdkp->opal_dev);
 
 	kfree(sdkp);
 }
-- 
GitLab


From 9c63f7f6ff3142edc81776df35d79e088df6ba6c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:53 +0100
Subject: [PATCH 1148/1586] sd: implement ->free_disk to simplify refcounting

Implement the ->free_disk method to to put struct scsi_disk when the last
gendisk reference count goes away.  This removes the need to clear
->private_data and thus freeze the queue on unbind.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c | 90 +++++++++--------------------------------------
 1 file changed, 16 insertions(+), 74 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 346b8d62de7d1..73e6f5f0f37c2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -121,11 +121,6 @@ static void scsi_disk_release(struct device *cdev);
 
 static DEFINE_IDA(sd_index_ida);
 
-/* This semaphore is used to mediate the 0->1 reference get in the
- * face of object destruction (i.e. we can't allow a get on an
- * object after last put) */
-static DEFINE_MUTEX(sd_ref_mutex);
-
 static struct kmem_cache *sd_cdb_cache;
 static mempool_t *sd_cdb_pool;
 static mempool_t *sd_page_pool;
@@ -663,33 +658,6 @@ static int sd_major(int major_idx)
 	}
 }
 
-static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
-{
-	struct scsi_disk *sdkp = NULL;
-
-	mutex_lock(&sd_ref_mutex);
-
-	if (disk->private_data) {
-		sdkp = scsi_disk(disk);
-		if (scsi_device_get(sdkp->device) == 0)
-			get_device(&sdkp->disk_dev);
-		else
-			sdkp = NULL;
-	}
-	mutex_unlock(&sd_ref_mutex);
-	return sdkp;
-}
-
-static void scsi_disk_put(struct scsi_disk *sdkp)
-{
-	struct scsi_device *sdev = sdkp->device;
-
-	mutex_lock(&sd_ref_mutex);
-	put_device(&sdkp->disk_dev);
-	scsi_device_put(sdev);
-	mutex_unlock(&sd_ref_mutex);
-}
-
 #ifdef CONFIG_BLK_SED_OPAL
 static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
 		size_t len, bool send)
@@ -1418,17 +1386,15 @@ static bool sd_need_revalidate(struct block_device *bdev,
  **/
 static int sd_open(struct block_device *bdev, fmode_t mode)
 {
-	struct scsi_disk *sdkp = scsi_disk_get(bdev->bd_disk);
-	struct scsi_device *sdev;
+	struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
+	struct scsi_device *sdev = sdkp->device;
 	int retval;
 
-	if (!sdkp)
+	if (scsi_device_get(sdev))
 		return -ENXIO;
 
 	SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n"));
 
-	sdev = sdkp->device;
-
 	/*
 	 * If the device is in error recovery, wait until it is done.
 	 * If the device is offline, then disallow any access to it.
@@ -1473,7 +1439,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
 	return 0;
 
 error_out:
-	scsi_disk_put(sdkp);
+	scsi_device_put(sdev);
 	return retval;	
 }
 
@@ -1502,7 +1468,7 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
 	}
 
-	scsi_disk_put(sdkp);
+	scsi_device_put(sdev);
 }
 
 static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1616,7 +1582,7 @@ static int media_not_present(struct scsi_disk *sdkp,
  **/
 static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 {
-	struct scsi_disk *sdkp = scsi_disk_get(disk);
+	struct scsi_disk *sdkp = disk->private_data;
 	struct scsi_device *sdp;
 	int retval;
 	bool disk_changed;
@@ -1679,7 +1645,6 @@ out:
 	 */
 	disk_changed = sdp->changed;
 	sdp->changed = 0;
-	scsi_disk_put(sdkp);
 	return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
@@ -1887,6 +1852,13 @@ static const struct pr_ops sd_pr_ops = {
 	.pr_clear	= sd_pr_clear,
 };
 
+static void scsi_disk_free_disk(struct gendisk *disk)
+{
+	struct scsi_disk *sdkp = scsi_disk(disk);
+
+	put_device(&sdkp->disk_dev);
+}
+
 static const struct block_device_operations sd_fops = {
 	.owner			= THIS_MODULE,
 	.open			= sd_open,
@@ -1898,6 +1870,7 @@ static const struct block_device_operations sd_fops = {
 	.unlock_native_capacity	= sd_unlock_native_capacity,
 	.report_zones		= sd_zbc_report_zones,
 	.get_unique_id		= sd_get_unique_id,
+	.free_disk		= scsi_disk_free_disk,
 	.pr_ops			= &sd_pr_ops,
 };
 
@@ -3623,54 +3596,23 @@ static int sd_probe(struct device *dev)
  **/
 static int sd_remove(struct device *dev)
 {
-	struct scsi_disk *sdkp;
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
-	sdkp = dev_get_drvdata(dev);
 	scsi_autopm_get_device(sdkp->device);
 
 	device_del(&sdkp->disk_dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
 
-	mutex_lock(&sd_ref_mutex);
-	dev_set_drvdata(dev, NULL);
-	put_device(&sdkp->disk_dev);
-	mutex_unlock(&sd_ref_mutex);
-
+	put_disk(sdkp->disk);
 	return 0;
 }
 
-/**
- *	scsi_disk_release - Called to free the scsi_disk structure
- *	@dev: pointer to embedded class device
- *
- *	sd_ref_mutex must be held entering this routine.  Because it is
- *	called on last put, you should always use the scsi_disk_get()
- *	scsi_disk_put() helpers which manipulate the semaphore directly
- *	and never do a direct put_device.
- **/
 static void scsi_disk_release(struct device *dev)
 {
 	struct scsi_disk *sdkp = to_scsi_disk(dev);
-	struct gendisk *disk = sdkp->disk;
-	struct request_queue *q = disk->queue;
 
 	ida_free(&sd_index_ida, sdkp->index);
-
-	/*
-	 * Wait until all requests that are in progress have completed.
-	 * This is necessary to avoid that e.g. scsi_end_request() crashes
-	 * due to clearing the disk->private_data pointer. Wait from inside
-	 * scsi_disk_release() instead of from sd_release() to avoid that
-	 * freezing and unfreezing the request queue affects user space I/O
-	 * in case multiple processes open a /dev/sd... node concurrently.
-	 */
-	blk_mq_freeze_queue(q);
-	blk_mq_unfreeze_queue(q);
-
-	disk->private_data = NULL;
-	put_disk(disk);
-
 	sd_zbc_release_disk(sdkp);
 	put_device(&sdkp->device->sdev_gendev);
 	free_opal_dev(sdkp->opal_dev);
-- 
GitLab


From 01d0c698536fe920733fc6cab7f9740c7acfdece Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:54 +0100
Subject: [PATCH 1149/1586] sr: implement ->free_disk to simplify refcounting

Simplify the refcounting and remove the need to clear disk->private_data
by implementing the ->free_disk method.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220308055200.735835-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sr.c | 124 ++++++++++------------------------------------
 drivers/scsi/sr.h |   4 --
 2 files changed, 26 insertions(+), 102 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 569bda76a5175..11fbdc75bb711 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -109,11 +109,6 @@ static DEFINE_SPINLOCK(sr_index_lock);
 
 static struct lock_class_key sr_bio_compl_lkclass;
 
-/* This semaphore is used to mediate the 0->1 reference get in the
- * face of object destruction (i.e. we can't allow a get on an
- * object after last put) */
-static DEFINE_MUTEX(sr_ref_mutex);
-
 static int sr_open(struct cdrom_device_info *, int);
 static void sr_release(struct cdrom_device_info *);
 
@@ -143,8 +138,6 @@ static const struct cdrom_device_ops sr_dops = {
 	.capability		= SR_CAPABILITIES,
 };
 
-static void sr_kref_release(struct kref *kref);
-
 static inline struct scsi_cd *scsi_cd(struct gendisk *disk)
 {
 	return disk->private_data;
@@ -163,38 +156,6 @@ static int sr_runtime_suspend(struct device *dev)
 		return 0;
 }
 
-/*
- * The get and put routines for the struct scsi_cd.  Note this entity
- * has a scsi_device pointer and owns a reference to this.
- */
-static inline struct scsi_cd *scsi_cd_get(struct gendisk *disk)
-{
-	struct scsi_cd *cd = NULL;
-
-	mutex_lock(&sr_ref_mutex);
-	if (disk->private_data == NULL)
-		goto out;
-	cd = scsi_cd(disk);
-	kref_get(&cd->kref);
-	if (scsi_device_get(cd->device)) {
-		kref_put(&cd->kref, sr_kref_release);
-		cd = NULL;
-	}
- out:
-	mutex_unlock(&sr_ref_mutex);
-	return cd;
-}
-
-static void scsi_cd_put(struct scsi_cd *cd)
-{
-	struct scsi_device *sdev = cd->device;
-
-	mutex_lock(&sr_ref_mutex);
-	kref_put(&cd->kref, sr_kref_release);
-	scsi_device_put(sdev);
-	mutex_unlock(&sr_ref_mutex);
-}
-
 static unsigned int sr_get_events(struct scsi_device *sdev)
 {
 	u8 buf[8];
@@ -522,15 +483,13 @@ static void sr_revalidate_disk(struct scsi_cd *cd)
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
-	struct scsi_cd *cd;
-	struct scsi_device *sdev;
+	struct scsi_cd *cd = cd = scsi_cd(bdev->bd_disk);
+	struct scsi_device *sdev = cd->device;
 	int ret = -ENXIO;
 
-	cd = scsi_cd_get(bdev->bd_disk);
-	if (!cd)
-		goto out;
+	if (scsi_device_get(cd->device))
+		return -ENXIO;
 
-	sdev = cd->device;
 	scsi_autopm_get_device(sdev);
 	if (bdev_check_media_change(bdev))
 		sr_revalidate_disk(cd);
@@ -541,9 +500,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
 
 	scsi_autopm_put_device(sdev);
 	if (ret)
-		scsi_cd_put(cd);
-
-out:
+		scsi_device_put(cd->device);
 	return ret;
 }
 
@@ -555,7 +512,7 @@ static void sr_block_release(struct gendisk *disk, fmode_t mode)
 	cdrom_release(&cd->cdi, mode);
 	mutex_unlock(&cd->lock);
 
-	scsi_cd_put(cd);
+	scsi_device_put(cd->device);
 }
 
 static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
@@ -595,18 +552,24 @@ out:
 static unsigned int sr_block_check_events(struct gendisk *disk,
 					  unsigned int clearing)
 {
-	unsigned int ret = 0;
-	struct scsi_cd *cd;
+	struct scsi_cd *cd = disk->private_data;
 
-	cd = scsi_cd_get(disk);
-	if (!cd)
+	if (atomic_read(&cd->device->disk_events_disable_depth))
 		return 0;
+	return cdrom_check_events(&cd->cdi, clearing);
+}
 
-	if (!atomic_read(&cd->device->disk_events_disable_depth))
-		ret = cdrom_check_events(&cd->cdi, clearing);
+static void sr_free_disk(struct gendisk *disk)
+{
+	struct scsi_cd *cd = disk->private_data;
 
-	scsi_cd_put(cd);
-	return ret;
+	spin_lock(&sr_index_lock);
+	clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
+	spin_unlock(&sr_index_lock);
+
+	unregister_cdrom(&cd->cdi);
+	mutex_destroy(&cd->lock);
+	kfree(cd);
 }
 
 static const struct block_device_operations sr_bdops =
@@ -617,6 +580,7 @@ static const struct block_device_operations sr_bdops =
 	.ioctl		= sr_block_ioctl,
 	.compat_ioctl	= blkdev_compat_ptr_ioctl,
 	.check_events	= sr_block_check_events,
+	.free_disk	= sr_free_disk,
 };
 
 static int sr_open(struct cdrom_device_info *cdi, int purpose)
@@ -660,8 +624,6 @@ static int sr_probe(struct device *dev)
 	if (!cd)
 		goto fail;
 
-	kref_init(&cd->kref);
-
 	disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
 				 &sr_bio_compl_lkclass);
 	if (!disk)
@@ -727,10 +689,8 @@ static int sr_probe(struct device *dev)
 	sr_revalidate_disk(cd);
 
 	error = device_add_disk(&sdev->sdev_gendev, disk, NULL);
-	if (error) {
-		kref_put(&cd->kref, sr_kref_release);
-		goto fail;
-	}
+	if (error)
+		goto unregister_cdrom;
 
 	sdev_printk(KERN_DEBUG, sdev,
 		    "Attached scsi CD-ROM %s\n", cd->cdi.name);
@@ -738,6 +698,8 @@ static int sr_probe(struct device *dev)
 
 	return 0;
 
+unregister_cdrom:
+	unregister_cdrom(&cd->cdi);
 fail_minor:
 	spin_lock(&sr_index_lock);
 	clear_bit(minor, sr_index_bits);
@@ -1009,36 +971,6 @@ out_put_request:
 	return ret;
 }
 
-
-/**
- *	sr_kref_release - Called to free the scsi_cd structure
- *	@kref: pointer to embedded kref
- *
- *	sr_ref_mutex must be held entering this routine.  Because it is
- *	called on last put, you should always use the scsi_cd_get()
- *	scsi_cd_put() helpers which manipulate the semaphore directly
- *	and never do a direct kref_put().
- **/
-static void sr_kref_release(struct kref *kref)
-{
-	struct scsi_cd *cd = container_of(kref, struct scsi_cd, kref);
-	struct gendisk *disk = cd->disk;
-
-	spin_lock(&sr_index_lock);
-	clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
-	spin_unlock(&sr_index_lock);
-
-	unregister_cdrom(&cd->cdi);
-
-	disk->private_data = NULL;
-
-	put_disk(disk);
-
-	mutex_destroy(&cd->lock);
-
-	kfree(cd);
-}
-
 static int sr_remove(struct device *dev)
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
@@ -1046,11 +978,7 @@ static int sr_remove(struct device *dev)
 	scsi_autopm_get_device(cd->device);
 
 	del_gendisk(cd->disk);
-	dev_set_drvdata(dev, NULL);
-
-	mutex_lock(&sr_ref_mutex);
-	kref_put(&cd->kref, sr_kref_release);
-	mutex_unlock(&sr_ref_mutex);
+	put_disk(cd->disk);
 
 	return 0;
 }
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index d80af3fcb6f97..1175f2e213b56 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -18,7 +18,6 @@
 #ifndef _SR_H
 #define _SR_H
 
-#include <linux/kref.h>
 #include <linux/mutex.h>
 
 #define MAX_RETRIES	3
@@ -51,9 +50,6 @@ typedef struct scsi_cd {
 
 	struct cdrom_device_info cdi;
 	struct mutex lock;
-	/* We hold gendisk and scsi_device references on probe and use
-	 * the refs on this kref to decide when to release them */
-	struct kref kref;
 	struct gendisk *disk;
 } Scsi_CD;
 
-- 
GitLab


From 1059699f87eb0b3aa9d574b91a572d534897134a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:51:55 +0100
Subject: [PATCH 1150/1586] block: move blkcg initialization/destroy into disk
 allocation/release handler

blkcg works on FS bio level, so it is reasonable to make both blkcg and
gendisk sharing same lifetime. Meantime there won't be any FS IO when
releasing disk, so safe to move blkcg initialization/destroy into disk
allocation/release handler

Long term, we can move blkcg into gendisk completely.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220308055200.735835-10-hch@lst.de
[axboe: fixup missing blk-cgroup.h include]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  | 5 -----
 block/blk-sysfs.c | 7 -------
 block/genhd.c     | 9 +++++++++
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4d858fc08f8ba..3fa2f08d3750b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -496,17 +496,12 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
 				PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto fail_stats;
 
-	if (blkcg_init_queue(q))
-		goto fail_ref;
-
 	blk_queue_dma_alignment(q, 511);
 	blk_set_default_limits(&q->limits);
 	q->nr_requests = BLKDEV_DEFAULT_RQ;
 
 	return q;
 
-fail_ref:
-	percpu_ref_exit(&q->q_usage_counter);
 fail_stats:
 	blk_free_queue_stats(q->stats);
 fail_split:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 241ded62f458f..220085109d7f0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -751,13 +751,6 @@ static void blk_exit_queue(struct request_queue *q)
 		ioc_clear_queue(q);
 		elevator_exit(q);
 	}
-
-	/*
-	 * Remove all references to @q from the block cgroup controller before
-	 * restoring @q->queue_lock to avoid that restoring this pointer causes
-	 * e.g. blkcg_print_blkgs() to crash.
-	 */
-	blkcg_exit_queue(q);
 }
 
 /**
diff --git a/block/genhd.c b/block/genhd.c
index 54f60ded2ee6f..fc10aedab2099 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,6 +29,7 @@
 #include "blk.h"
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
+#include "blk-cgroup.h"
 
 static struct kobject *block_depr;
 
@@ -1120,9 +1121,12 @@ static void disk_release(struct device *dev)
 
 	blk_mq_cancel_work_sync(disk->queue);
 
+	blkcg_exit_queue(disk->queue);
+
 	disk_release_events(disk);
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);
+
 	disk->queue->disk = NULL;
 	blk_put_queue(disk->queue);
 
@@ -1328,6 +1332,9 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
 		goto out_destroy_part_tbl;
 
+	if (blkcg_init_queue(q))
+		goto out_erase_part0;
+
 	rand_initialize_disk(disk);
 	disk_to_dev(disk)->class = &block_class;
 	disk_to_dev(disk)->type = &disk_type;
@@ -1340,6 +1347,8 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 #endif
 	return disk;
 
+out_erase_part0:
+	xa_erase(&disk->part_tbl, 0);
 out_destroy_part_tbl:
 	xa_destroy(&disk->part_tbl);
 	disk->part0->bd_disk = NULL;
-- 
GitLab


From de3d347f7b8a1a997fbc9267454ed6065068b969 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:51:56 +0100
Subject: [PATCH 1151/1586] block: don't remove hctx debugfs dir from
 blk_mq_exit_queue

The queue's top debugfs dir is removed from blk_release_queue(), so all
hctx's debugfs dirs are removed from there. Given blk_mq_exit_queue()
is only called from blk_cleanup_queue(), it isn't necessary to remove
hctx debugfs from blk_mq_exit_queue().

So remove it from blk_mq_exit_queue().

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220308055200.735835-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f7ef8e2ab935b..862d91c6112e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3462,7 +3462,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (i == nr_queue)
 			break;
-		blk_mq_debugfs_unregister_hctx(hctx);
 		blk_mq_exit_hctx(q, set, hctx, i);
 	}
 }
-- 
GitLab


From ba3e845665fbbb0252336f27200cd5cf288a3573 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:51:57 +0100
Subject: [PATCH 1152/1586] block: move q_usage_counter release into
 blk_queue_release

After blk_cleanup_queue() returns, disk may not be released yet, so
probably bio may still be submitted and ->q_usage_counter may be
touched, so far this way seems safe, but not good from API's viewpoint.

Move the release q_usage_counter into blk_queue_release().

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220308055200.735835-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  | 2 --
 block/blk-sysfs.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3fa2f08d3750b..a97918d107a01 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -342,8 +342,6 @@ void blk_cleanup_queue(struct request_queue *q)
 		blk_mq_sched_free_rqs(q);
 	mutex_unlock(&q->sysfs_lock);
 
-	percpu_ref_exit(&q->q_usage_counter);
-
 	/* @q is and will stay empty, shutdown and put */
 	blk_put_queue(q);
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 220085109d7f0..af5a6d86073f1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -780,6 +780,8 @@ static void blk_release_queue(struct kobject *kobj)
 
 	might_sleep();
 
+	percpu_ref_exit(&q->q_usage_counter);
+
 	if (q->poll_stat)
 		blk_stat_remove_callback(q, q->poll_cb);
 	blk_stat_free_callback(q->poll_cb);
-- 
GitLab


From 28ce942fa2d5d80af5367ba9d39f2e0b4af37bfd Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:51:58 +0100
Subject: [PATCH 1153/1586] block: move blk_exit_queue into disk_release

There can't be file system I/O in disk_release(), so move the call to
blk_exit_queue() there, preparing to have the teardown of file system I/O
only functionality in one place, when the gendisk that is needed for it
is torn down.

We still need to freeze queue here since the request is freed after the
bio is completed and passthrough request rely on scheduler tags as well.

The disk can be released before or after queue is cleaned up, and we have
to free the scheduler request pool before blk_cleanup_queue returns,
while the static request pool has to be freed before exiting the
I/O scheduler.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
[hch: rebased, updated the commit log]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20220308055200.735835-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 16 ----------------
 block/genhd.c     | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index af5a6d86073f1..85c4ba006671d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -739,20 +739,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 	kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
 }
 
-/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
-static void blk_exit_queue(struct request_queue *q)
-{
-	/*
-	 * Since the I/O scheduler exit code may access cgroup information,
-	 * perform I/O scheduler exit before disassociating from the block
-	 * cgroup controller.
-	 */
-	if (q->elevator) {
-		ioc_clear_queue(q);
-		elevator_exit(q);
-	}
-}
-
 /**
  * blk_release_queue - releases all allocated resources of the request_queue
  * @kobj: pointer to a kobject, whose container is a request_queue
@@ -786,8 +772,6 @@ static void blk_release_queue(struct kobject *kobj)
 		blk_stat_remove_callback(q, q->poll_cb);
 	blk_stat_free_callback(q->poll_cb);
 
-	blk_exit_queue(q);
-
 	blk_free_queue_stats(q->stats);
 	kfree(q->poll_stat);
 
diff --git a/block/genhd.c b/block/genhd.c
index fc10aedab2099..73705a749ea92 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1098,6 +1098,34 @@ static const struct attribute_group *disk_attr_groups[] = {
 	NULL
 };
 
+static void disk_release_mq(struct request_queue *q)
+{
+	blk_mq_cancel_work_sync(q);
+
+	/*
+	 * There can't be any non non-passthrough bios in flight here, but
+	 * requests stay around longer, including passthrough ones so we
+	 * still need to freeze the queue here.
+	 */
+	blk_mq_freeze_queue(q);
+
+	/*
+	 * Since the I/O scheduler exit code may access cgroup information,
+	 * perform I/O scheduler exit before disassociating from the block
+	 * cgroup controller.
+	 */
+	if (q->elevator) {
+		ioc_clear_queue(q);
+
+		mutex_lock(&q->sysfs_lock);
+		blk_mq_sched_free_rqs(q);
+		elevator_exit(q);
+		mutex_unlock(&q->sysfs_lock);
+	}
+
+	__blk_mq_unfreeze_queue(q, true);
+}
+
 /**
  * disk_release - releases all allocated resources of the gendisk
  * @dev: the device representing this disk
@@ -1119,7 +1147,8 @@ static void disk_release(struct device *dev)
 	might_sleep();
 	WARN_ON_ONCE(disk_live(disk));
 
-	blk_mq_cancel_work_sync(disk->queue);
+	if (queue_is_mq(disk->queue))
+		disk_release_mq(disk->queue);
 
 	blkcg_exit_queue(disk->queue);
 
-- 
GitLab


From 28883074fc104f16ab181207d556ab75df6c6526 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Mar 2022 06:51:59 +0100
Subject: [PATCH 1154/1586] block: do more work in elevator_exit

Move the calls to ioc_clear_queue and blk_mq_sched_free_rqs into
elevator_exit.  Except for one call where we know we can't have io_cq
structures yet these always go together, and that extra call in an
error path is harmless.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220308055200.735835-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/elevator.c | 6 +++---
 block/genhd.c    | 3 ---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index a842e4b8ebc66..20a4e7c7c7745 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -192,6 +192,9 @@ void elevator_exit(struct request_queue *q)
 {
 	struct elevator_queue *e = q->elevator;
 
+	ioc_clear_queue(q);
+	blk_mq_sched_free_rqs(q);
+
 	mutex_lock(&e->sysfs_lock);
 	blk_mq_exit_sched(q, e);
 	mutex_unlock(&e->sysfs_lock);
@@ -596,8 +599,6 @@ int elevator_switch_mq(struct request_queue *q,
 
 	if (q->elevator) {
 		elv_unregister_queue(q);
-		ioc_clear_queue(q);
-		blk_mq_sched_free_rqs(q);
 		elevator_exit(q);
 	}
 
@@ -608,7 +609,6 @@ int elevator_switch_mq(struct request_queue *q,
 	if (new_e) {
 		ret = elv_register_queue(q, true);
 		if (ret) {
-			blk_mq_sched_free_rqs(q);
 			elevator_exit(q);
 			goto out;
 		}
diff --git a/block/genhd.c b/block/genhd.c
index 73705a749ea92..857e0a54da7dd 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1115,10 +1115,7 @@ static void disk_release_mq(struct request_queue *q)
 	 * cgroup controller.
 	 */
 	if (q->elevator) {
-		ioc_clear_queue(q);
-
 		mutex_lock(&q->sysfs_lock);
-		blk_mq_sched_free_rqs(q);
 		elevator_exit(q);
 		mutex_unlock(&q->sysfs_lock);
 	}
-- 
GitLab


From 5ca7546fe317bd181ad55f623f6e522069c2e426 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 8 Mar 2022 06:52:00 +0100
Subject: [PATCH 1155/1586] block: move rq_qos_exit() into disk_release()

Keep all teardown of file system I/O related functionality in one place.
There can't be file system I/O in disk_release(), so it is safe to move
rq_qos_exit() there.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220308055200.735835-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 857e0a54da7dd..56f66c6fee943 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -627,7 +627,6 @@ void del_gendisk(struct gendisk *disk)
 
 	blk_mq_freeze_queue_wait(q);
 
-	rq_qos_exit(q);
 	blk_sync_queue(q);
 	blk_flush_integrity();
 	/*
@@ -1119,7 +1118,7 @@ static void disk_release_mq(struct request_queue *q)
 		elevator_exit(q);
 		mutex_unlock(&q->sysfs_lock);
 	}
-
+	rq_qos_exit(q);
 	__blk_mq_unfreeze_queue(q, true);
 }
 
-- 
GitLab


From 54cce8ecb9254f971b40a72911c6da403720a2d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= <davispuh@gmail.com>
Date: Mon, 28 Feb 2022 05:15:45 +0200
Subject: [PATCH 1156/1586] crypto: ccp - ccp_dmaengine_unregister release dma
 channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ccp_dmaengine_register adds dma_chan->device_node to dma_dev->channels list
but ccp_dmaengine_unregister didn't remove them.
That can cause crashes in various dmaengine methods that tries to use dma_dev->channels

Fixes: 58ea8abf4904 ("crypto: ccp - Register the CCP as a DMA...")
Signed-off-by: Dāvis Mosāns <davispuh@gmail.com>
Acked-by: John Allen <john.allen@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dmaengine.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index d718db224be42..7d4b4ad1db1f3 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -632,6 +632,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
 	return 0;
 }
 
+static void ccp_dma_release(struct ccp_device *ccp)
+{
+	struct ccp_dma_chan *chan;
+	struct dma_chan *dma_chan;
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		chan = ccp->ccp_dma_chan + i;
+		dma_chan = &chan->dma_chan;
+		tasklet_kill(&chan->cleanup_tasklet);
+		list_del_rcu(&dma_chan->device_node);
+	}
+}
+
 int ccp_dmaengine_register(struct ccp_device *ccp)
 {
 	struct ccp_dma_chan *chan;
@@ -736,6 +750,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 	return 0;
 
 err_reg:
+	ccp_dma_release(ccp);
 	kmem_cache_destroy(ccp->dma_desc_cache);
 
 err_cache:
@@ -752,6 +767,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp)
 		return;
 
 	dma_async_device_unregister(dma_dev);
+	ccp_dma_release(ccp);
 
 	kmem_cache_destroy(ccp->dma_desc_cache);
 	kmem_cache_destroy(ccp->dma_cmd_cache);
-- 
GitLab


From 3d950c34074ed74d2713c3856ba01264523289e6 Mon Sep 17 00:00:00 2001
From: Jianglei Nie <niejianglei2021@163.com>
Date: Thu, 3 Mar 2022 10:23:37 +0800
Subject: [PATCH 1157/1586] crypto: ccree - Fix use after free in
 cc_cipher_exit()

kfree_sensitive(ctx_p->user.key) will free the ctx_p->user.key. But
ctx_p->user.key is still used in the next line, which will lead to a
use after free.

We can call kfree_sensitive() after dev_dbg() to avoid the uaf.

Fixes: 63ee04c8b491 ("crypto: ccree - add skcipher support")
Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccree/cc_cipher.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 78833491f534d..309da6334a0a0 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm)
 		&ctx_p->user.key_dma_addr);
 
 	/* Free key buffer in context */
-	kfree_sensitive(ctx_p->user.key);
 	dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key);
+	kfree_sensitive(ctx_p->user.key);
 }
 
 struct tdes_keys {
-- 
GitLab


From ee6584bf3c680e20464e20bf28066d1ce685f7d3 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 4 Mar 2022 13:57:42 +0800
Subject: [PATCH 1158/1586] crypto: marvell/octeontx - Use swap() instead of
 open coding it

Clean the following coccicheck warning:

./drivers/crypto/marvell/octeontx/otx_cptvf_algs.c:1645:16-17: WARNING
opportunity for swap().

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx/otx_cptvf_algs.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
index ccbef01888d42..01c48ddc4eeb4 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -1639,11 +1639,8 @@ static void swap_func(void *lptr, void *rptr, int size)
 {
 	struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr;
 	struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr;
-	struct cpt_device_desc desc;
 
-	desc = *ldesc;
-	*ldesc = *rdesc;
-	*rdesc = desc;
+	swap(*ldesc, *rdesc);
 }
 
 int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod,
-- 
GitLab


From 7a70d9a1cf112c0bdb42800d264d48f34089e3e8 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Fri, 4 Mar 2022 07:36:48 +0000
Subject: [PATCH 1159/1586] crypto: xilinx: prevent probing on non-xilinx
 hardware

The zynqmp-sha driver is always loaded and register its algorithm even on
platform which do not have the proper hardware.
This lead to a stacktrace due to zynqmp-sha3-384 failing its crypto
self tests.
So check if hardware is present via the firmware API call get_version.

While at it, simplify the platform_driver by using module_platform_driver()

Furthermore the driver should depend on ZYNQMP_FIRMWARE since it cannot
work without it.

Fixes: 7ecc3e34474b ("crypto: xilinx - Add Xilinx SHA3 driver")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig             |  2 +-
 drivers/crypto/xilinx/zynqmp-sha.c | 35 +++++++-----------------------
 2 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index bf4e55e730aa0..597559ec2057e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -810,7 +810,7 @@ config CRYPTO_DEV_ZYNQMP_AES
 
 config CRYPTO_DEV_ZYNQMP_SHA3
 	bool "Support for Xilinx ZynqMP SHA3 hardware accelerator"
-	depends on ARCH_ZYNQMP
+	depends on ZYNQMP_FIRMWARE
 	select CRYPTO_SHA3
 	help
 	  Xilinx ZynqMP has SHA3 engine used for secure hash calculation.
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
index 89549f4788bae..43ff170ff1c24 100644
--- a/drivers/crypto/xilinx/zynqmp-sha.c
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -193,6 +193,13 @@ static int zynqmp_sha_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	int err;
+	u32 v;
+
+	/* Verify the hardware is present */
+	err = zynqmp_pm_get_api_version(&v);
+	if (err)
+		return err;
+
 
 	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK));
 	if (err < 0) {
@@ -251,33 +258,7 @@ static struct platform_driver zynqmp_sha_driver = {
 	},
 };
 
-static int __init sha_driver_init(void)
-{
-	struct platform_device *pdev;
-	int ret;
-
-	ret = platform_driver_register(&zynqmp_sha_driver);
-	if (ret)
-		return ret;
-
-	pdev = platform_device_register_simple(zynqmp_sha_driver.driver.name,
-					       0, NULL, 0);
-	if (IS_ERR(pdev)) {
-		ret = PTR_ERR(pdev);
-		platform_driver_unregister(&zynqmp_sha_driver);
-		pr_info("Failed to register ZynqMP SHA3 dvixe %d\n", ret);
-	}
-
-	return ret;
-}
-
-device_initcall(sha_driver_init);
-
-static void __exit sha_driver_exit(void)
-{
-	platform_driver_unregister(&zynqmp_sha_driver);
-}
-
+module_platform_driver(zynqmp_sha_driver);
 MODULE_DESCRIPTION("ZynqMP SHA3 hardware acceleration support.");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Harsha <harsha.harsha@xilinx.com>");
-- 
GitLab


From fd11727eec0dd95ee1b7d8f9f10ee60678eecc29 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 4 Mar 2022 17:46:47 +0800
Subject: [PATCH 1160/1586] crypto: hisilicon/qm - fix memset during queues
 clearing

Due to that extra page addr is used as a qp error flag when the device
resetting. So it not should to clear this qp flag in userspace.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 3b29c8993b8c7..453390044181d 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3840,7 +3840,7 @@ static void qm_clear_queues(struct hisi_qm *qm)
 
 	for (i = 0; i < qm->qp_num; i++) {
 		qp = &qm->qp_array[i];
-		if (qp->is_resetting)
+		if (qp->is_in_kernel && qp->is_resetting)
 			memset(qp->qdma.va, 0, qp->qdma.size);
 	}
 
-- 
GitLab


From 8893d27ffcaf6ec6267038a177cb87bcde4dd3de Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 4 Mar 2022 17:54:47 +0000
Subject: [PATCH 1161/1586] crypto: qat - disable registration of algorithms

The implementations of aead and skcipher in the QAT driver do not
support properly requests with the CRYPTO_TFM_REQ_MAY_BACKLOG flag set.
If the HW queue is full, the driver returns -EBUSY but does not enqueue
the request.
This can result in applications like dm-crypt waiting indefinitely for a
completion of a request that was never submitted to the hardware.

To avoid this problem, disable the registration of all crypto algorithms
in the QAT driver by setting the number of crypto instances to 0 at
configuration time.

Cc: stable@vger.kernel.org
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_4xxx/adf_drv.c      | 7 +++++++
 drivers/crypto/qat/qat_common/qat_crypto.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a6c78b9c730bc..fa4c350c1bf92 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -75,6 +75,13 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	/* Temporarily set the number of crypto instances to zero to avoid
+	 * registering the crypto algorithms.
+	 * This will be removed when the algorithms will support the
+	 * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+	 */
+	instances = 0;
+
 	for (i = 0; i < instances; i++) {
 		val = i;
 		bank = i * 2;
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 7234c4940fae4..67c9588e89df9 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -161,6 +161,13 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	/* Temporarily set the number of crypto instances to zero to avoid
+	 * registering the crypto algorithms.
+	 * This will be removed when the algorithms will support the
+	 * CRYPTO_TFM_REQ_MAY_BACKLOG flag
+	 */
+	instances = 0;
+
 	for (i = 0; i < instances; i++) {
 		val = i;
 		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
-- 
GitLab


From c700216c70fca38ae7ceff07ed63d79ae25379c5 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 4 Mar 2022 18:03:54 +0000
Subject: [PATCH 1162/1586] crypto: qat - remove unneeded assignment

The function adf_gen4_get_vf2pf_sources() computes a mask which is
stored in a variable which is returned and not used.
Remove superfluous assignment of variable.

This is to fix the following warning when compiling the QAT driver
with clang scan-build:

    drivers/crypto/qat/qat_common/adf_gen4_pfvf.c:46:9: warning: Although the value stored to 'sou' is used in the enclosing expression, the value is never actually read from 'sou' [deadcode.DeadStores]
            return sou &= ~mask;
                   ^      ~~~~~

Fixes: 5901b4af6e07 ("crypto: qat - fix access to PFVF interrupt registers for GEN4")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
index 3b3ea849c5e53..d80d493a77568 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
@@ -43,7 +43,7 @@ static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
 	sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
 	mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
 
-	return sou &= ~mask;
+	return sou & ~mask;
 }
 
 static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
-- 
GitLab


From 54584146cc8cb49ce471011d4afcc03a8a529463 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 4 Mar 2022 18:03:55 +0000
Subject: [PATCH 1163/1586] crypto: qat - fix initialization of pfvf cap_msg
 structures

Initialize fully the structures cap_msg containing the device
capabilities from the host.

This is to fix the following warning when compiling the QAT driver
using the clang compiler with CC=clang W=2:

    drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c:99:44: warning: missing field 'ext_dc_caps' initializer [-Wmissing-field-initializers]
            struct capabilities_v3 cap_msg = { { 0 }, };
                                                      ^

Fixes: 851ed498dba1 ("crypto: qat - exchange device capabilities over PFVF")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
index 14b222691c9c2..c5b326f63e95b 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
@@ -96,7 +96,7 @@ int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
 int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
 {
 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-	struct capabilities_v3 cap_msg = { { 0 }, };
+	struct capabilities_v3 cap_msg = { 0 };
 	unsigned int len = sizeof(cap_msg);
 
 	if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_CAPABILITIES)
-- 
GitLab


From 44dbd0c61bf1480be55dbb0cac793d861d1957b9 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Fri, 4 Mar 2022 18:03:56 +0000
Subject: [PATCH 1164/1586] crypto: qat - fix initialization of pfvf
 rts_map_msg structures

Initialize fully the structures rts_map_msg containing the ring to
service map from the host.

This is to fix the following warning when compiling the QAT driver
using the clang compiler with CC=clang W=2:

    drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c:144:51: warning: missing field 'map' initializer [-Wmissing-field-initializers]
            struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, };
                                                             ^
Fixes: e1b176af3d7e ("crypto: qat - exchange ring-to-service mappings over PFVF")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
index c5b326f63e95b..1141258db4b65 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c
@@ -141,7 +141,7 @@ int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev)
 
 int adf_vf2pf_get_ring_to_svc(struct adf_accel_dev *accel_dev)
 {
-	struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, };
+	struct ring_to_svc_map_v1 rts_map_msg = { 0 };
 	unsigned int len = sizeof(rts_map_msg);
 
 	if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_RING_TO_SVC_MAP)
-- 
GitLab


From cd6714f94091308f07a32b79e0a43f983587da3f Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 6 Mar 2022 05:34:20 -0800
Subject: [PATCH 1165/1586] crypto: arm64 - cleanup comments

For spdx, use // for *.c files

Replacements
significanty to significantly

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sha3-ce-glue.c   | 2 +-
 arch/arm64/crypto/sha512-armv8.pl  | 2 +-
 arch/arm64/crypto/sha512-ce-glue.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 8c65cecf560a9..250e1377c481b 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
  *
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index 2d8655d5b1af5..35ec9ae99fe16 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -43,7 +43,7 @@
 #	on Cortex-A53 (or by 4 cycles per round).
 # (***)	Super-impressive coefficients over gcc-generated code are
 #	indication of some compiler "pathology", most notably code
-#	generated with -mgeneral-regs-only is significanty faster
+#	generated with -mgeneral-regs-only is significantly faster
 #	and the gap is only 40-90%.
 #
 # October 2016.
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index e62a094a9d526..94cb7580deb7b 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
  *
-- 
GitLab


From 9f7c3f837a3768ce8d5a5c34070a47608a4a0209 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:00 +0200
Subject: [PATCH 1166/1586] raid5-ppl: fully initialize the bio in
 ppl_new_iounit

We have all the information to pass the bdev and op directly to bio_init,
so do that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-ppl.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 3446797fa0aca..f7fdd8292485d 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -250,7 +250,8 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
 	INIT_LIST_HEAD(&io->stripe_list);
 	atomic_set(&io->pending_stripes, 0);
 	atomic_set(&io->pending_flushes, 0);
-	bio_init(&io->bio, NULL, io->biovec, PPL_IO_INLINE_BVECS, 0);
+	bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS,
+		 REQ_OP_WRITE | REQ_FUA);
 
 	pplhdr = page_address(io->header_page);
 	clear_page(pplhdr);
@@ -465,8 +466,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 
 
 	bio->bi_end_io = ppl_log_endio;
-	bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
-	bio_set_dev(bio, log->rdev->bdev);
 	bio->bi_iter.bi_sector = log->next_io_sector;
 	bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
 	bio->bi_write_hint = ppl_conf->write_hint;
-- 
GitLab


From 0dd00cba99c352dc9afd62979f350d808c215cb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:01 +0200
Subject: [PATCH 1167/1586] raid5-cache: fully initialize flush_bio when needed

Stop using bio_reset and just initialize the bio fully when needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-cache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 86e2bb89d9c7b..8907d18c225a9 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1266,6 +1266,8 @@ static void r5l_log_flush_endio(struct bio *bio)
 		r5l_io_run_stripes(io);
 	list_splice_tail_init(&log->flushing_ios, &log->finished_ios);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
+
+	bio_uninit(bio);
 }
 
 /*
@@ -1301,7 +1303,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
 
 	if (!do_flush)
 		return;
-	bio_reset(&log->flush_bio, log->rdev->bdev,
+	bio_init(&log->flush_bio, log->rdev->bdev, NULL, 0,
 		  REQ_OP_WRITE | REQ_PREFLUSH);
 	log->flush_bio.bi_end_io = r5l_log_flush_endio;
 	submit_bio(&log->flush_bio);
@@ -3105,7 +3107,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	INIT_LIST_HEAD(&log->io_end_ios);
 	INIT_LIST_HEAD(&log->flushing_ios);
 	INIT_LIST_HEAD(&log->finished_ios);
-	bio_init(&log->flush_bio, NULL, NULL, 0, 0);
 
 	log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
 	if (!log->io_kc)
-- 
GitLab


From 89f94b64408b576c88e407d87061bb7b9bc2111d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:02 +0200
Subject: [PATCH 1168/1586] raid5-cache: statically allocate the recovery ra
 bio

There is no need to preallocate the bio and reset it when use.  Just
allocate it on-stack and use a bvec places next to the pages used for
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5-cache.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 8907d18c225a9..a7d50ff9020a8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1623,10 +1623,10 @@ struct r5l_recovery_ctx {
 	 * just copy data from the pool.
 	 */
 	struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE];
+	struct bio_vec ra_bvec[R5L_RECOVERY_PAGE_POOL_SIZE];
 	sector_t pool_offset;	/* offset of first page in the pool */
 	int total_pages;	/* total allocated pages */
 	int valid_pages;	/* pages with valid data */
-	struct bio *ra_bio;	/* bio to do the read ahead */
 };
 
 static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
@@ -1634,11 +1634,6 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 {
 	struct page *page;
 
-	ctx->ra_bio = bio_alloc_bioset(NULL, BIO_MAX_VECS, 0, GFP_KERNEL,
-				       &log->bs);
-	if (!ctx->ra_bio)
-		return -ENOMEM;
-
 	ctx->valid_pages = 0;
 	ctx->total_pages = 0;
 	while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) {
@@ -1650,10 +1645,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 		ctx->total_pages += 1;
 	}
 
-	if (ctx->total_pages == 0) {
-		bio_put(ctx->ra_bio);
+	if (ctx->total_pages == 0)
 		return -ENOMEM;
-	}
 
 	ctx->pool_offset = 0;
 	return 0;
@@ -1666,7 +1659,6 @@ static void r5l_recovery_free_ra_pool(struct r5l_log *log,
 
 	for (i = 0; i < ctx->total_pages; ++i)
 		put_page(ctx->ra_pool[i]);
-	bio_put(ctx->ra_bio);
 }
 
 /*
@@ -1679,15 +1671,19 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
 				      struct r5l_recovery_ctx *ctx,
 				      sector_t offset)
 {
-	bio_reset(ctx->ra_bio, log->rdev->bdev, REQ_OP_READ);
-	ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
+	struct bio bio;
+	int ret;
+
+	bio_init(&bio, log->rdev->bdev, ctx->ra_bvec,
+		 R5L_RECOVERY_PAGE_POOL_SIZE, REQ_OP_READ);
+	bio.bi_iter.bi_sector = log->rdev->data_offset + offset;
 
 	ctx->valid_pages = 0;
 	ctx->pool_offset = offset;
 
 	while (ctx->valid_pages < ctx->total_pages) {
-		bio_add_page(ctx->ra_bio,
-			     ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0);
+		__bio_add_page(&bio, ctx->ra_pool[ctx->valid_pages], PAGE_SIZE,
+			       0);
 		ctx->valid_pages += 1;
 
 		offset = r5l_ring_add(log, offset, BLOCK_SECTORS);
@@ -1696,7 +1692,9 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
 			break;
 	}
 
-	return submit_bio_wait(ctx->ra_bio);
+	ret = submit_bio_wait(&bio);
+	bio_uninit(&bio);
+	return ret;
 }
 
 /*
-- 
GitLab


From 03a6b195e8e846f7373bcbeb3ea2a756dfb9ae61 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Feb 2022 13:25:03 +0200
Subject: [PATCH 1169/1586] raid5: initialize the stripe_head embeeded bios as
 needed

Use bio_init to initialize the bios when needed to the full state
instead of a partial initialization plus later setting of dev and op
and bio_reset.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8891aaba65964..8bd5f06390ea1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1060,6 +1060,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 	int i, disks = sh->disks;
 	struct stripe_head *head_sh = sh;
 	struct bio_list pending_bios = BIO_EMPTY_LIST;
+	struct r5dev *dev;
 	bool should_defer;
 
 	might_sleep();
@@ -1094,8 +1095,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			op_flags |= REQ_SYNC;
 
 again:
-		bi = &sh->dev[i].req;
-		rbi = &sh->dev[i].rreq; /* For writing to replacement */
+		dev = &sh->dev[i];
+		bi = &dev->req;
+		rbi = &dev->rreq; /* For writing to replacement */
 
 		rcu_read_lock();
 		rrdev = rcu_dereference(conf->disks[i].replacement);
@@ -1171,8 +1173,7 @@ again:
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_set_dev(bi, rdev->bdev);
-			bio_set_op_attrs(bi, op, op_flags);
+			bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags);
 			bi->bi_end_io = op_is_write(op)
 				? raid5_end_write_request
 				: raid5_end_read_request;
@@ -1238,8 +1239,7 @@ again:
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_set_dev(rbi, rrdev->bdev);
-			bio_set_op_attrs(rbi, op, op_flags);
+			bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags);
 			BUG_ON(!op_is_write(op));
 			rbi->bi_end_io = raid5_end_write_request;
 			rbi->bi_private = sh;
@@ -2294,7 +2294,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 	int disks, struct r5conf *conf)
 {
 	struct stripe_head *sh;
-	int i;
 
 	sh = kmem_cache_zalloc(sc, gfp);
 	if (sh) {
@@ -2307,12 +2306,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 		atomic_set(&sh->count, 1);
 		sh->raid_conf = conf;
 		sh->log_start = MaxSector;
-		for (i = 0; i < disks; i++) {
-			struct r5dev *dev = &sh->dev[i];
-
-			bio_init(&dev->req, NULL, &dev->vec, 1, 0);
-			bio_init(&dev->rreq, NULL, &dev->rvec, 1, 0);
-		}
 
 		if (raid5_has_ppl(conf)) {
 			sh->ppl_page = alloc_page(gfp);
@@ -2677,7 +2670,6 @@ static void raid5_end_read_request(struct bio * bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2785,7 +2777,7 @@ static void raid5_end_read_request(struct bio * bi)
 		}
 	}
 	rdev_dec_pending(rdev, conf->mddev);
-	bio_reset(bi, NULL, 0);
+	bio_uninit(bi);
 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
 	raid5_release_stripe(sh);
@@ -2823,7 +2815,6 @@ static void raid5_end_write_request(struct bio *bi)
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_status);
 	if (i == disks) {
-		bio_reset(bi, NULL, 0);
 		BUG();
 		return;
 	}
@@ -2860,7 +2851,7 @@ static void raid5_end_write_request(struct bio *bi)
 	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
-	bio_reset(bi, NULL, 0);
+	bio_uninit(bi);
 	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
 		clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
-- 
GitLab


From d6c47d21a0ef9a76c537f8c5258b49d61f2e160f Mon Sep 17 00:00:00 2001
From: Qianggui Song <qianggui.song@amlogic.com>
Date: Fri, 25 Feb 2022 13:52:06 +0800
Subject: [PATCH 1170/1586] irqchip/meson-gpio: Add support for meson s4 SoCs

The meson s4 SoCs support 12 gpio irq lines compared with previous
serial chips and have something different, details are as below.

IRQ Number:
- 80:68 13 pins on bank Z
- 67:48 20 pins on bank X
- 47:36 12 pins on bank H
- 35:24 12 pins on bank D
- 23:22 2  pins on bank E
- 21:14 8  pins on bank C
- 13:0  13 pins on bank B

Signed-off-by: Qianggui Song <qianggui.song@amlogic.com>
[maz: fixed some W=1 build warnings]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220225055207.1048-5-qianggui.song@amlogic.com
---
 drivers/irqchip/irq-meson-gpio.c | 65 ++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 7b5863e368167..2aaa9aad3e87a 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -26,6 +26,8 @@
 
 /* use for A1 like chips */
 #define REG_PIN_A1_SEL	0x04
+/* Used for s4 chips */
+#define REG_EDGE_POL_S4	0x1c
 
 /*
  * Note: The S905X3 datasheet reports that BOTH_EDGE is controlled by
@@ -53,6 +55,8 @@ static void meson_a1_gpio_irq_sel_pin(struct meson_gpio_irq_controller *ctl,
 static void meson_a1_gpio_irq_init(struct meson_gpio_irq_controller *ctl);
 static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
 				    unsigned int type, u32 *channel_hwirq);
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				      unsigned int type, u32 *channel_hwirq);
 
 struct irq_ctl_ops {
 	void (*gpio_irq_sel_pin)(struct meson_gpio_irq_controller *ctl,
@@ -101,6 +105,17 @@ struct meson_gpio_irq_params {
 	.pin_sel_mask = 0x7f,					\
 	.nr_channels = 8,					\
 
+#define INIT_MESON_S4_COMMON_DATA(irqs)				\
+	INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init,		\
+			  meson_a1_gpio_irq_sel_pin,		\
+			  meson_s4_gpio_irq_set_type)		\
+	.support_edge_both = true,				\
+	.edge_both_offset = 0,					\
+	.edge_single_offset = 12,				\
+	.pol_low_offset = 0,					\
+	.pin_sel_mask = 0xff,					\
+	.nr_channels = 12,					\
+
 static const struct meson_gpio_irq_params meson8_params = {
 	INIT_MESON8_COMMON_DATA(134)
 };
@@ -131,6 +146,10 @@ static const struct meson_gpio_irq_params a1_params = {
 	INIT_MESON_A1_COMMON_DATA(62)
 };
 
+static const struct meson_gpio_irq_params s4_params = {
+	INIT_MESON_S4_COMMON_DATA(82)
+};
+
 static const struct of_device_id meson_irq_gpio_matches[] = {
 	{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
 	{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -140,6 +159,7 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
 	{ .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
 	{ .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params },
 	{ .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
+	{ .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
 	{ }
 };
 
@@ -308,6 +328,51 @@ static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
 	return 0;
 }
 
+/*
+ * gpio irq relative registers for s4
+ * -PADCTRL_GPIO_IRQ_CTRL0
+ * bit[31]:    enable/disable all the irq lines
+ * bit[12-23]: single edge trigger
+ * bit[0-11]:  polarity trigger
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[X]
+ * bit[0-16]: 7 bits to choose gpio source for irq line 2*[X] - 2
+ * bit[16-22]:7 bits to choose gpio source for irq line 2*[X] - 1
+ * where X = 1-6
+ *
+ * -PADCTRL_GPIO_IRQ_CTRL[7]
+ * bit[0-11]: both edge trigger
+ */
+static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl,
+				      unsigned int type, u32 *channel_hwirq)
+{
+	u32 val = 0;
+	unsigned int idx;
+
+	idx = meson_gpio_irq_get_channel_idx(ctl, channel_hwirq);
+
+	type &= IRQ_TYPE_SENSE_MASK;
+
+	meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4, BIT(idx), 0);
+
+	if (type == IRQ_TYPE_EDGE_BOTH) {
+		val |= BIT(ctl->params->edge_both_offset + idx);
+		meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4,
+					   BIT(ctl->params->edge_both_offset + idx), val);
+		return 0;
+	}
+
+	if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING))
+		val |= BIT(ctl->params->pol_low_offset + idx);
+
+	if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))
+		val |= BIT(ctl->params->edge_single_offset + idx);
+
+	meson_gpio_irq_update_bits(ctl, REG_EDGE_POL,
+				   BIT(idx) | BIT(12 + idx), val);
+	return 0;
+};
+
 static unsigned int meson_gpio_irq_type_output(unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
-- 
GitLab


From d8fd5a1e78db375f2246d43df7833fec07a221cd Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Tue, 1 Mar 2022 15:45:18 +0000
Subject: [PATCH 1171/1586] kasan: fix a missing header include of
 static_keys.h

The kasan-enabled.h header relies on static keys, so make sure
to include the header to avoid compilation errors (with JUMP_LABEL=n).

It fixes the following:
./include/linux/kasan-enabled.h:9:1: warning: data definition has no type or storage class
    9 | DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
      | ^~~~~~~~~~~~~~~~~~~~~~~~
error: type defaults to 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int]

Fixes: f9b5e46f4097eb29 ("kasan: split kasan_*enabled() functions into a separate header")
Cc: Peter Collingbourne <pcc@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20220301154518.19456-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/kasan-enabled.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h
index 4b6615375022f..6f612d69ea0ca 100644
--- a/include/linux/kasan-enabled.h
+++ b/include/linux/kasan-enabled.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_KASAN_ENABLED_H
 #define _LINUX_KASAN_ENABLED_H
 
+#include <linux/static_key.h>
+
 #ifdef CONFIG_KASAN_HW_TAGS
 
 DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
-- 
GitLab


From 3c3dd2c81a1036c487353f7b9a485268b2fc1d7f Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 9 Mar 2022 12:16:33 +0000
Subject: [PATCH 1172/1586] Documentation: vmcoreinfo: Fix htmldocs warning

Since commit 2369f171d5c5 ("arm64: crash_core: Export MODULES, VMALLOC,
and VMEMMAP ranges"), Stephen reports a warning when building htmldocs:

  | Documentation/admin-guide/kdump/vmcoreinfo.rst:498: WARNING: Title underline too short.

Extend the underline to squash the warning.

Fixes: 2369f171d5c5 ("arm64: crash_core: Export MODULES, VMALLOC, and VMEMMAP ranges")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/admin-guide/kdump/vmcoreinfo.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index a339af45a22ed..8419019b6a88f 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -495,7 +495,7 @@ addresses in the higher VA range (refer to ARMv8 ARM document for
 more details).
 
 MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
--------------
+-----------------------------------------------------------------------------
 
 Used to get the correct ranges:
 	MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
-- 
GitLab


From 770093459b9b333380aa71f2c31c60b14895c1df Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 9 Mar 2022 12:21:37 +0000
Subject: [PATCH 1173/1586] arm64: mm: Drop 'const' from conditional
 arm64_dma_phys_limit definition

Commit 031495635b46 ("arm64: Do not defer reserve_crashkernel() for
platforms with no DMA memory zones") introduced different definitions
for 'arm64_dma_phys_limit' depending on CONFIG_ZONE_DMA{,32} based on
a late suggestion from Pasha. Sadly, this results in a build error when
passing W=1:

  | arch/arm64/mm/init.c:90:19: error: conflicting type qualifiers for 'arm64_dma_phys_limit'

Drop the 'const' for now and use '__ro_after_init' consistently.

Link: https://lore.kernel.org/r/202203090241.aj7paWeX-lkp@intel.com
Link: https://lore.kernel.org/r/CA+CK2bDbbx=8R=UthkMesWOST8eJMtOGJdfMRTFSwVmo0Vn0EA@mail.gmail.com
Fixes: 031495635b46 ("arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones")
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 919be440494fa..9e26ec80d3175 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(memstart_addr);
 #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)
 phys_addr_t __ro_after_init arm64_dma_phys_limit;
 #else
-const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1;
+phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1;
 #endif
 
 #ifdef CONFIG_KEXEC_CORE
-- 
GitLab


From 6676a42f1e4f1b8ec166b723a3801b7113c25a0e Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 9 Mar 2022 12:31:00 +0000
Subject: [PATCH 1174/1586] perf/marvell: Fix !CONFIG_OF build for CN10K DDR
 PMU driver

When compiling the Marvell CN10K DDR PMU driver with CONFIG_OF=n, the
build fails:

  | drivers/perf/marvell_cn10k_ddr_pmu.c:723:35: error: 'cn10k_ddr_pmu_of_match' undeclared here (not in a function); did you mean 'cn10k_ddr_pmu_driver'?

Use `of_match_ptr()` to avoid referencing the non-existent match table
in this configuration.

Link: https://lore.kernel.org/r/202203091424.Vfe8J4W9-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/marvell_cn10k_ddr_pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index 7f3146e71f995..665b382a0ee3d 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -720,7 +720,7 @@ MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
 static struct platform_driver cn10k_ddr_pmu_driver = {
 	.driver	= {
 		.name   = "cn10k-ddr-pmu",
-		.of_match_table = cn10k_ddr_pmu_of_match,
+		.of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match),
 		.suppress_bind_attrs = true,
 	},
 	.probe		= cn10k_ddr_perf_probe,
-- 
GitLab


From b77e70f6b8f2cc62fba847f3008a430a09ef275d Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Wed, 9 Mar 2022 16:01:42 +0800
Subject: [PATCH 1175/1586] regulator: Add bindings for Richtek RT5190A PMIC

Add bindings for Richtek RT5190A PMIC.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/1646812903-32496-2-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/richtek,rt5190a-regulator.yaml  | 141 ++++++++++++++++++
 .../regulator/richtek,rt5190a-regulator.h     |  15 ++
 2 files changed, 156 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
 create mode 100644 include/dt-bindings/regulator/richtek,rt5190a-regulator.h

diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
new file mode 100644
index 0000000000000..28725c5467fc8
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt5190a-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT5190A PMIC Regulator
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+  The RT5190A integrates 1 channel buck controller, 3 channels high efficiency
+  synchronous buck converters, 1 LDO, I2C control interface and peripherial
+  logical control.
+
+  It also supports mute AC OFF depop sound and quick setting storage while
+  input power is removed.
+
+properties:
+  compatible:
+    enum:
+      - richtek,rt5190a
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  vin2-supply:
+    description: phandle to buck2 input voltage.
+
+  vin3-supply:
+    description: phandle to buck3 input voltage.
+
+  vin4-supply:
+    description: phandle to buck4 input voltage.
+
+  vinldo-supply:
+    description: phandle to ldo input voltage
+
+  richtek,mute-enable:
+    description: |
+      The mute function uses 'mutein', 'muteout', and 'vdet' pins as the control
+      signal. When enabled, The normal behavior is to bypass the 'mutein' signal
+      'muteout'. But if the power source removal is detected from 'vdet',
+      whatever the 'mutein' signal is, it will pull down the 'muteout' to force
+      speakers mute. this function is commonly used to prevent the speaker pop
+      noise during AC power turned off in the modern TV system design.
+    type: boolean
+
+  regulators:
+    type: object
+
+    patternProperties:
+      "^buck[1-4]$|^ldo$":
+        type: object
+        $ref: regulator.yaml#
+        description: |
+          regulator description for buck1 and buck4.
+
+        properties:
+          regulator-allowed-modes:
+            description: |
+             buck operating mode, only buck1/4 support mode operating.
+              0: auto mode
+              1: force pwm mode
+            items:
+              enum: [0, 1]
+
+          richtek,latchup-enable:
+            type: boolean
+            description: |
+              If specified, undervolt protection mode changes from the default
+              hiccup to latchup.
+
+        unevaluatedProperties: false
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/regulator/richtek,rt5190a-regulator.h>
+
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      pmic@64 {
+        compatible = "richtek,rt5190a";
+        reg = <0x64>;
+        interrupts-extended = <&gpio26 0 IRQ_TYPE_LEVEL_LOW>;
+        vin2-supply = <&rt5190_buck1>;
+        vin3-supply = <&rt5190_buck1>;
+        vin4-supply = <&rt5190_buck1>;
+
+        regulators {
+          rt5190_buck1: buck1 {
+            regulator-name = "rt5190a-buck1";
+            regulator-min-microvolt = <5090000>;
+            regulator-max-microvolt = <5090000>;
+            regulator-allowed-modes = <RT5190A_OPMODE_AUTO RT5190A_OPMODE_FPWM>;
+            regulator-boot-on;
+          };
+          buck2 {
+            regulator-name = "rt5190a-buck2";
+            regulator-min-microvolt = <600000>;
+            regulator-max-microvolt = <1400000>;
+            regulator-boot-on;
+          };
+          buck3 {
+            regulator-name = "rt5190a-buck3";
+            regulator-min-microvolt = <600000>;
+            regulator-max-microvolt = <1400000>;
+            regulator-boot-on;
+          };
+          buck4 {
+            regulator-name = "rt5190a-buck4";
+            regulator-min-microvolt = <850000>;
+            regulator-max-microvolt = <850000>;
+            regulator-allowed-modes = <RT5190A_OPMODE_AUTO RT5190A_OPMODE_FPWM>;
+            regulator-boot-on;
+          };
+          ldo {
+            regulator-name = "rt5190a-ldo";
+            regulator-min-microvolt = <1200000>;
+            regulator-max-microvolt = <1200000>;
+            regulator-boot-on;
+          };
+        };
+      };
+    };
diff --git a/include/dt-bindings/regulator/richtek,rt5190a-regulator.h b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h
new file mode 100644
index 0000000000000..63f99d4c1cb35
--- /dev/null
+++ b/include/dt-bindings/regulator/richtek,rt5190a-regulator.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__
+#define __DT_BINDINGS_RICHTEK_RT5190A_REGULATOR_H__
+
+/*
+ * BUCK/LDO mode constants which may be used in devicetree properties
+ * (eg. regulator-allowed-modes).
+ * See the manufacturer's datasheet for more information on these modes.
+ */
+
+#define RT5190A_OPMODE_AUTO	0
+#define RT5190A_OPMODE_FPWM	1
+
+#endif
-- 
GitLab


From 760423dfad53877b468490758fe7ea968ded9402 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Wed, 9 Mar 2022 16:01:43 +0800
Subject: [PATCH 1176/1586] regulator: rt5190a: Add support for Richtek RT5190A
 PMIC

Add support for Richtek RT5190A PMIC.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1646812903-32496-3-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig             |  10 +
 drivers/regulator/Makefile            |   1 +
 drivers/regulator/rt5190a-regulator.c | 513 ++++++++++++++++++++++++++
 3 files changed, 524 insertions(+)
 create mode 100644 drivers/regulator/rt5190a-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 00559c214f570..c8ce6e5eea246 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1047,6 +1047,16 @@ config REGULATOR_RT5033
 	  RT5033 PMIC. The device supports multiple regulators like
 	  current source, LDO and Buck.
 
+config REGULATOR_RT5190A
+	tristate "Richtek RT5190A PMIC"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This adds support for voltage regulator in Richtek RT5190A PMIC.
+	  It integratas 1 channel buck controller, 3 channels high efficiency
+	  buck converters, 1 LDO, mute AC OFF depop function, with the general
+	  I2C control interface.
+
 config REGULATOR_RT6160
 	tristate "Richtek RT6160 BuckBoost voltage regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 4b8794a73e170..1b64ad5767be0 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_REGULATOR_ROHM)	+= rohm-regulator.o
 obj-$(CONFIG_REGULATOR_RT4801)	+= rt4801-regulator.o
 obj-$(CONFIG_REGULATOR_RT4831)	+= rt4831-regulator.o
 obj-$(CONFIG_REGULATOR_RT5033)	+= rt5033-regulator.o
+obj-$(CONFIG_REGULATOR_RT5190A) += rt5190a-regulator.o
 obj-$(CONFIG_REGULATOR_RT6160)	+= rt6160-regulator.o
 obj-$(CONFIG_REGULATOR_RT6245)	+= rt6245-regulator.o
 obj-$(CONFIG_REGULATOR_RTMV20)	+= rtmv20-regulator.o
diff --git a/drivers/regulator/rt5190a-regulator.c b/drivers/regulator/rt5190a-regulator.c
new file mode 100644
index 0000000000000..155d4afd00b16
--- /dev/null
+++ b/drivers/regulator/rt5190a-regulator.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <dt-bindings/regulator/richtek,rt5190a-regulator.h>
+#include <linux/bits.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#define RT5190A_REG_MANUFACTURE		0x00
+#define RT5190A_REG_BUCK2VSEL		0x04
+#define RT5190A_REG_BUCK3VSEL		0x05
+#define RT5190A_REG_DCDCCNTL		0x06
+#define RT5190A_REG_ENABLE		0x07
+#define RT5190A_REG_DISCHARGE		0x09
+#define RT5190A_REG_PROTMODE		0x0A
+#define RT5190A_REG_MUTECNTL		0x0B
+#define RT5190A_REG_PGSTAT		0x0F
+#define RT5190A_REG_OVINT		0x10
+#define RT5190A_REG_HOTDIEMASK		0x17
+
+#define RT5190A_VSEL_MASK		GENMASK(6, 0)
+#define RT5190A_RID_BITMASK(rid)	BIT(rid + 1)
+#define RT5190A_BUCK1_DISCHG_MASK	GENMASK(1, 0)
+#define RT5190A_BUCK1_DISCHG_ONVAL	0x01
+#define RT5190A_OVERVOLT_MASK		GENMASK(7, 0)
+#define RT5190A_UNDERVOLT_MASK		GENMASK(15, 8)
+#define RT5190A_CH234OT_MASK		BIT(29)
+#define RT5190A_CHIPOT_MASK		BIT(28)
+
+#define RT5190A_BUCK23_MINUV		600000
+#define RT5190A_BUCK23_MAXUV		1400000
+#define RT5190A_BUCK23_STEPUV		10000
+#define RT5190A_BUCK23_STEPNUM		((1400000 - 600000) / 10000 + 1)
+
+enum {
+	RT5190A_IDX_BUCK1 = 0,
+	RT5190A_IDX_BUCK2,
+	RT5190A_IDX_BUCK3,
+	RT5190A_IDX_BUCK4,
+	RT5190A_IDX_LDO,
+	RT5190A_MAX_IDX
+};
+
+struct rt5190a_priv {
+	struct device *dev;
+	struct regmap *regmap;
+	struct regulator_desc rdesc[RT5190A_MAX_IDX];
+	struct regulator_dev *rdev[RT5190A_MAX_IDX];
+};
+
+static int rt5190a_get_error_flags(struct regulator_dev *rdev,
+				   unsigned int *flags)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	int rid = rdev_get_id(rdev);
+	unsigned int pgood_stat;
+	int ret;
+
+	ret = regmap_read(regmap, RT5190A_REG_PGSTAT, &pgood_stat);
+	if (ret)
+		return ret;
+
+	if (!(pgood_stat & RT5190A_RID_BITMASK(rid)))
+		*flags = REGULATOR_ERROR_FAIL;
+	else
+		*flags = 0;
+
+	return 0;
+}
+
+static int rt5190a_fixed_buck_set_mode(struct regulator_dev *rdev,
+				       unsigned int mode)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	int rid = rdev_get_id(rdev);
+	unsigned int mask = RT5190A_RID_BITMASK(rid), val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = mask;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(regmap, RT5190A_REG_DCDCCNTL, mask, val);
+}
+
+static unsigned int rt5190a_fixed_buck_get_mode(struct regulator_dev *rdev)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	int rid = rdev_get_id(rdev);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(regmap, RT5190A_REG_DCDCCNTL, &val);
+	if (ret) {
+		dev_err(&rdev->dev, "Failed to get mode [%d]\n", ret);
+		return ret;
+	}
+
+	if (val & RT5190A_RID_BITMASK(rid))
+		return REGULATOR_MODE_FAST;
+
+	return REGULATOR_MODE_NORMAL;
+}
+
+static const struct regulator_ops rt5190a_ranged_buck_ops = {
+	.enable	= regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+	.get_error_flags = rt5190a_get_error_flags,
+};
+
+static const struct regulator_ops rt5190a_fixed_buck_ops = {
+	.enable	= regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+	.set_mode = rt5190a_fixed_buck_set_mode,
+	.get_mode = rt5190a_fixed_buck_get_mode,
+	.get_error_flags = rt5190a_get_error_flags,
+};
+
+static const struct regulator_ops rt5190a_fixed_ldo_ops = {
+	.enable	= regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+	.get_error_flags = rt5190a_get_error_flags,
+};
+
+static irqreturn_t rt5190a_irq_handler(int irq, void *data)
+{
+	struct rt5190a_priv *priv = data;
+	__le32 raws;
+	unsigned int events, fields;
+	static const struct {
+		unsigned int bitmask;
+		unsigned int report;
+	} event_tbl[] = {
+		{ RT5190A_OVERVOLT_MASK, REGULATOR_ERROR_REGULATION_OUT },
+		{ RT5190A_UNDERVOLT_MASK, REGULATOR_ERROR_UNDER_VOLTAGE }
+	};
+	int i, j, ret;
+
+	ret = regmap_raw_read(priv->regmap, RT5190A_REG_OVINT, &raws,
+			      sizeof(raws));
+	if (ret) {
+		dev_err(priv->dev, "Failed to read events\n");
+		return IRQ_NONE;
+	}
+
+	events = le32_to_cpu(raws);
+
+	ret = regmap_raw_write(priv->regmap, RT5190A_REG_OVINT, &raws,
+			       sizeof(raws));
+	if (ret)
+		dev_err(priv->dev, "Failed to write-clear events\n");
+
+	/* Handle OV,UV events */
+	for (i = 0; i < ARRAY_SIZE(event_tbl); i++) {
+		fields = events & event_tbl[i].bitmask;
+		fields >>= ffs(event_tbl[i].bitmask) - 1;
+
+		for (j = 0; j < RT5190A_MAX_IDX; j++) {
+			if (!(fields & RT5190A_RID_BITMASK(j)))
+				continue;
+
+			regulator_notifier_call_chain(priv->rdev[j],
+						      event_tbl[i].report,
+						      NULL);
+		}
+	}
+
+	/* Handle CH234 OT event */
+	if (events & RT5190A_CH234OT_MASK) {
+		for (j = RT5190A_IDX_BUCK2; j < RT5190A_IDX_LDO; j++) {
+			regulator_notifier_call_chain(priv->rdev[j],
+						      REGULATOR_ERROR_OVER_TEMP,
+						      NULL);
+		}
+	}
+
+	/* Warning if CHIP OT occur */
+	if (events & RT5190A_CHIPOT_MASK)
+		dev_warn(priv->dev, "CHIP overheat\n");
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int rt5190a_of_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case RT5190A_OPMODE_AUTO:
+		return REGULATOR_MODE_NORMAL;
+	case RT5190A_OPMODE_FPWM:
+		return REGULATOR_MODE_FAST;
+	default:
+		return REGULATOR_MODE_INVALID;
+	}
+}
+
+static int rt5190a_of_parse_cb(struct rt5190a_priv *priv, int rid,
+			       struct of_regulator_match *match)
+{
+	struct regulator_desc *desc = priv->rdesc + rid;
+	struct regulator_init_data *init_data = match->init_data;
+	struct device_node *np = match->of_node;
+	bool latchup_enable;
+	unsigned int mask = RT5190A_RID_BITMASK(rid), val;
+
+	switch (rid) {
+	case RT5190A_IDX_BUCK1:
+	case RT5190A_IDX_BUCK4:
+	case RT5190A_IDX_LDO:
+		init_data->constraints.apply_uV = 0;
+
+		if (init_data->constraints.min_uV ==
+				init_data->constraints.max_uV)
+			desc->fixed_uV = init_data->constraints.min_uV;
+		else {
+			dev_err(priv->dev,
+				"Variable voltage for fixed regulator\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		break;
+	}
+
+	latchup_enable = of_property_read_bool(np, "richtek,latchup-enable");
+
+	/* latchup: 0, default hiccup: 1 */
+	val = !latchup_enable ? mask : 0;
+
+	return regmap_update_bits(priv->regmap, RT5190A_REG_PROTMODE, mask, val);
+}
+
+static void rt5190a_fillin_regulator_desc(struct regulator_desc *desc, int rid)
+{
+	static const char * const regu_name[] = { "buck1", "buck2",
+						  "buck3", "buck4",
+						  "ldo" };
+	static const char * const supply[] = { NULL, "vin2", "vin3", "vin4",
+					       "vinldo" };
+
+	desc->name = regu_name[rid];
+	desc->supply_name = supply[rid];
+	desc->owner = THIS_MODULE;
+	desc->type = REGULATOR_VOLTAGE;
+	desc->id = rid;
+	desc->enable_reg = RT5190A_REG_ENABLE;
+	desc->enable_mask = RT5190A_RID_BITMASK(rid);
+	desc->active_discharge_reg = RT5190A_REG_DISCHARGE;
+	desc->active_discharge_mask = RT5190A_RID_BITMASK(rid);
+	desc->active_discharge_on = RT5190A_RID_BITMASK(rid);
+
+	switch (rid) {
+	case RT5190A_IDX_BUCK1:
+		desc->active_discharge_mask = RT5190A_BUCK1_DISCHG_MASK;
+		desc->active_discharge_on = RT5190A_BUCK1_DISCHG_ONVAL;
+		desc->n_voltages = 1;
+		desc->ops = &rt5190a_fixed_buck_ops;
+		desc->of_map_mode = rt5190a_of_map_mode;
+		break;
+	case RT5190A_IDX_BUCK2:
+		desc->vsel_reg = RT5190A_REG_BUCK2VSEL;
+		desc->vsel_mask = RT5190A_VSEL_MASK;
+		desc->min_uV = RT5190A_BUCK23_MINUV;
+		desc->uV_step = RT5190A_BUCK23_STEPUV;
+		desc->n_voltages = RT5190A_BUCK23_STEPNUM;
+		desc->ops = &rt5190a_ranged_buck_ops;
+		break;
+	case RT5190A_IDX_BUCK3:
+		desc->vsel_reg = RT5190A_REG_BUCK3VSEL;
+		desc->vsel_mask = RT5190A_VSEL_MASK;
+		desc->min_uV = RT5190A_BUCK23_MINUV;
+		desc->uV_step = RT5190A_BUCK23_STEPUV;
+		desc->n_voltages = RT5190A_BUCK23_STEPNUM;
+		desc->ops = &rt5190a_ranged_buck_ops;
+		break;
+	case RT5190A_IDX_BUCK4:
+		desc->n_voltages = 1;
+		desc->ops = &rt5190a_fixed_buck_ops;
+		desc->of_map_mode = rt5190a_of_map_mode;
+		break;
+	case RT5190A_IDX_LDO:
+		desc->n_voltages = 1;
+		desc->ops = &rt5190a_fixed_ldo_ops;
+		break;
+	}
+}
+
+static struct of_regulator_match rt5190a_regulator_match[] = {
+	{ .name = "buck1", },
+	{ .name = "buck2", },
+	{ .name = "buck3", },
+	{ .name = "buck4", },
+	{ .name = "ldo", }
+};
+
+static int rt5190a_parse_regulator_dt_data(struct rt5190a_priv *priv)
+{
+	struct device_node *regulator_np;
+	struct regulator_desc *reg_desc;
+	struct of_regulator_match *match;
+	int i, ret;
+
+	for (i = 0; i < RT5190A_MAX_IDX; i++) {
+		reg_desc = priv->rdesc + i;
+		match = rt5190a_regulator_match + i;
+
+		rt5190a_fillin_regulator_desc(reg_desc, i);
+
+		match->desc = reg_desc;
+	}
+
+	regulator_np = of_get_child_by_name(priv->dev->of_node, "regulators");
+	if (!regulator_np) {
+		dev_err(priv->dev, "Could not find 'regulators' node\n");
+		return -ENODEV;
+	}
+
+	ret = of_regulator_match(priv->dev, regulator_np,
+				 rt5190a_regulator_match,
+				 ARRAY_SIZE(rt5190a_regulator_match));
+
+	of_node_put(regulator_np);
+
+	if (ret < 0) {
+		dev_err(priv->dev,
+			"Error parsing regulator init data: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < RT5190A_MAX_IDX; i++) {
+		match = rt5190a_regulator_match + i;
+
+		ret = rt5190a_of_parse_cb(priv, i, match);
+		if (ret) {
+			dev_err(priv->dev, "Failed in [%d] of_parse_cb\n", i);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct reg_sequence rt5190a_init_patch[] = {
+	{ 0x09, 0x3d, },
+	{ 0x0a, 0x3e, },
+	{ 0x0b, 0x01, },
+	{ 0x10, 0xff, },
+	{ 0x11, 0xff, },
+	{ 0x12, 0xff, },
+	{ 0x13, 0xff, },
+	{ 0x14, 0, },
+	{ 0x15, 0, },
+	{ 0x16, 0x3e, },
+	{ 0x17, 0, }
+};
+
+static int rt5190a_device_initialize(struct rt5190a_priv *priv)
+{
+	bool mute_enable;
+	int ret;
+
+	ret = regmap_register_patch(priv->regmap, rt5190a_init_patch,
+				    ARRAY_SIZE(rt5190a_init_patch));
+	if (ret) {
+		dev_err(priv->dev, "Failed to do register patch\n");
+		return ret;
+	}
+
+	mute_enable = device_property_read_bool(priv->dev,
+						"richtek,mute-enable");
+
+	if (mute_enable) {
+		ret = regmap_write(priv->regmap, RT5190A_REG_MUTECNTL, 0x00);
+		if (ret) {
+			dev_err(priv->dev, "Failed to enable mute function\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rt5190a_device_check(struct rt5190a_priv *priv)
+{
+	u16 devid;
+	int ret;
+
+	ret = regmap_raw_read(priv->regmap, RT5190A_REG_MANUFACTURE, &devid,
+			      sizeof(devid));
+	if (ret)
+		return ret;
+
+	if (devid) {
+		dev_err(priv->dev, "Incorrect device id 0x%04x\n", devid);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static const struct regmap_config rt5190a_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RT5190A_REG_HOTDIEMASK,
+};
+
+static int rt5190a_probe(struct i2c_client *i2c)
+{
+	struct rt5190a_priv *priv;
+	struct regulator_config cfg = {};
+	int i, ret;
+
+	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = &i2c->dev;
+
+	priv->regmap = devm_regmap_init_i2c(i2c, &rt5190a_regmap_config);
+	if (IS_ERR(priv->regmap)) {
+		dev_err(&i2c->dev, "Failed to allocate regmap\n");
+		return PTR_ERR(priv->regmap);
+	}
+
+	ret = rt5190a_device_check(priv);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to check device %d\n", ret);
+		return ret;
+	}
+
+	ret = rt5190a_device_initialize(priv);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to initialize the device\n");
+		return ret;
+	}
+
+	ret = rt5190a_parse_regulator_dt_data(priv);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to parse regulator dt\n");
+		return ret;
+	}
+
+	cfg.dev = &i2c->dev;
+	cfg.regmap = priv->regmap;
+
+	for (i = 0; i < RT5190A_MAX_IDX; i++) {
+		struct regulator_desc *desc = priv->rdesc + i;
+		struct of_regulator_match *match = rt5190a_regulator_match + i;
+
+		cfg.init_data = match->init_data;
+		cfg.of_node = match->of_node;
+
+		priv->rdev[i] = devm_regulator_register(&i2c->dev, desc, &cfg);
+		if (IS_ERR(priv->rdev[i])) {
+			dev_err(&i2c->dev, "Failed to register regulator %s\n",
+				desc->name);
+			return PTR_ERR(priv->rdev[i]);
+		}
+	}
+
+	if (i2c->irq) {
+		ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+						rt5190a_irq_handler,
+						IRQF_ONESHOT,
+						dev_name(&i2c->dev), priv);
+		if (ret) {
+			dev_err(&i2c->dev, "Failed to register interrupt\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id __maybe_unused rt5190a_device_table[] = {
+	{ .compatible = "richtek,rt5190a", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rt5190a_device_table);
+
+static struct i2c_driver rt5190a_driver = {
+	.driver = {
+		.name = "rt5190a",
+		.of_match_table = rt5190a_device_table,
+	},
+	.probe_new = rt5190a_probe,
+};
+module_i2c_driver(rt5190a_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RT5190A Regulator Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From f833116ad2c3eabf9c739946170e07825cca67ed Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 9 Mar 2022 10:37:07 -0600
Subject: [PATCH 1177/1586] coredump: Don't compile flat_core_dump when
 coredumps are disabled

Recently the kernel test robot reported:
> In file included from include/linux/kernel.h:29,
>                     from fs/binfmt_flat.c:21:
>    fs/binfmt_flat.c: In function 'flat_core_dump':
> >> fs/binfmt_flat.c:121:50: error: invalid use of undefined type 'struct coredump_params'
>      121 |                 current->comm, current->pid, cprm->siginfo->si_signo);
>          |                                                  ^~
>    include/linux/printk.h:418:33: note: in definition of macro 'printk_index_wrap'
>      418 |                 _p_func(_fmt, ##__VA_ARGS__);                           \
>          |                                 ^~~~~~~~~~~
>    include/linux/printk.h:499:9: note: in expansion of macro 'printk'
>      499 |         printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
>          |         ^~~~~~
>    fs/binfmt_flat.c:120:9: note: in expansion of macro 'pr_warn'
>      120 |         pr_warn("Process %s:%d received signr %d and should have core dumped\n",
>          |         ^~~~~~~
>    At top level:
>    fs/binfmt_flat.c:118:12: warning: 'flat_core_dump' defined but not used [-Wunused-function]
>      118 | static int flat_core_dump(struct coredump_params *cprm)
>          |            ^~~~~~~~~~~~~~

The little dinky do nothing function flat_core_dump has always been
compiled unconditionally.  With my change to move coredump_params into
coredump.h coredump_params reasonably becomes unavailable when
coredump support is not compiled in.  Fix this old issue by simply not
compiling flat_core_dump when coredump support is not supported.

Fixes: a99a3e2efaf1 ("coredump: Move definition of struct coredump_params into coredump.h")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_flat.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 208cdce16de1b..6268981500112 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -98,7 +98,9 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
 static int load_flat_binary(struct linux_binprm *);
+#ifdef CONFIG_COREDUMP
 static int flat_core_dump(struct coredump_params *cprm);
+#endif
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -115,12 +117,14 @@ static struct linux_binfmt flat_format = {
  * Currently only a stub-function.
  */
 
+#ifdef CONFIG_COREDUMP
 static int flat_core_dump(struct coredump_params *cprm)
 {
 	pr_warn("Process %s:%d received signr %d and should have core dumped\n",
 		current->comm, current->pid, cprm->siginfo->si_signo);
 	return 1;
 }
+#endif
 
 /****************************************************************************/
 /*
-- 
GitLab


From f90205b95368ee2b56fc523abda6c4d514901d9b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 9 Mar 2022 18:06:00 +0000
Subject: [PATCH 1178/1586] arm64: Add cavium_erratum_23154_cpus missing
 sentinel

Qian Cai reported that playing with CPU hotplug resulted in a
out-of-bound access due to cavium_erratum_23154_cpus missing
a sentinel indicating the end of the array.

Add it in order to restore peace and harmony in the world
of broken HW.

Reported-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Fixes: 24a147bcef8c ("irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR")
Link: https://lore.kernel.org/r/YijmkXp1VG7e8lDx@qian
Cc: Linu Cherian <lcherian@marvell.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220309180600.3990874-1-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 510f47055b914..6485d8e54ccaf 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -225,6 +225,7 @@ const struct midr_range cavium_erratum_23154_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN),
 	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM),
 	MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO),
+	{},
 };
 #endif
 
-- 
GitLab


From cf220ad6749b8305ba11bdf601c55a17ad2a715d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 9 Mar 2022 13:12:00 +0000
Subject: [PATCH 1179/1586] arm64/mte: Remove asymmetric mode from the prctl()
 interface

As pointed out by Evgenii Stepanov one potential issue with the new ABI for
enabling asymmetric is that if there are multiple places where MTE is
configured in a process, some of which were compiled with the old prctl.h
and some of which were compiled with the new prctl.h, there may be problems
keeping track of which MTE modes are requested. For example some code may
disable only sync and async modes leaving asymmetric mode enabled when it
intended to fully disable MTE.

In order to avoid such mishaps remove asymmetric mode from the prctl(),
instead implicitly allowing it if both sync and async modes are requested.
This should not disrupt userspace since a process requesting both may
already see a mix of sync and async modes due to differing defaults between
CPUs or changes in default while the process is running but it does mean
that userspace is unable to explicitly request asymmetric mode without
changing the system default for CPUs.

Reported-by: Evgenii Stepanov <eugenis@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Evgenii Stepanov <eugenis@google.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Branislav Rankov <branislav.rankov@arm.com>
Link: https://lore.kernel.org/r/20220309131200.112637-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/memory-tagging-extension.rst | 15 ++++++++-------
 arch/arm64/kernel/mte.c                          | 13 ++++++++++---
 arch/arm64/kernel/process.c                      |  2 --
 include/uapi/linux/prctl.h                       |  4 +---
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index 42677d48a114b..7e812a51e5065 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -88,7 +88,6 @@ bit-field:
                          (ignored if combined with other options)
 - ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
 - ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
-- ``PR_MTE_TCF_ASYMM`` - *Asymmetric* tag check fault mode
 
 If no modes are specified, tag check faults are ignored. If a single
 mode is specified, the program will run in that mode. If multiple
@@ -149,17 +148,19 @@ default preferred mode for each CPU is ``async``.
 To allow a program to potentially run in the CPU's preferred tag
 checking mode, the user program may set multiple tag check fault mode
 bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
-flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
-mode is in the task's set of provided tag checking modes, that
-mode will be selected. Otherwise, one of the modes in the task's mode
-selected by the kernel using the preference order:
+flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
+modes are requested then asymmetric mode may also be selected by the
+kernel. If the CPU's preferred tag checking mode is in the task's set
+of provided tag checking modes, that mode will be selected. Otherwise,
+one of the modes in the task's mode will be selected by the kernel
+from the task's mode set using the preference order:
 
 	1. Asynchronous
 	2. Asymmetric
 	3. Synchronous
 
-If asymmetric mode is specified by the program but not supported by
-either the system or the kernel then an error will be returned.
+Note that there is no way for userspace to request multiple modes and
+also disable asymmetric mode.
 
 Initial process state
 ---------------------
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index cbbd8d93fc502..78b3e0f8e997c 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -311,7 +311,16 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 		mte_ctrl |= MTE_CTRL_TCF_ASYNC;
 	if (arg & PR_MTE_TCF_SYNC)
 		mte_ctrl |= MTE_CTRL_TCF_SYNC;
-	if (arg & PR_MTE_TCF_ASYMM)
+
+	/*
+	 * If the system supports it and both sync and async modes are
+	 * specified then implicitly enable asymmetric mode.
+	 * Userspace could see a mix of both sync and async anyway due
+	 * to differing or changing defaults on CPUs.
+	 */
+	if (cpus_have_cap(ARM64_MTE_ASYMM) &&
+	    (arg & PR_MTE_TCF_ASYNC) &&
+	    (arg & PR_MTE_TCF_SYNC))
 		mte_ctrl |= MTE_CTRL_TCF_ASYMM;
 
 	task->thread.mte_ctrl = mte_ctrl;
@@ -341,8 +350,6 @@ long get_mte_ctrl(struct task_struct *task)
 		ret |= PR_MTE_TCF_ASYNC;
 	if (mte_ctrl & MTE_CTRL_TCF_SYNC)
 		ret |= PR_MTE_TCF_SYNC;
-	if (mte_ctrl & MTE_CTRL_TCF_ASYMM)
-		ret |= PR_MTE_TCF_ASYMM;
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 941cfa7117b9b..7fa97df55e3ad 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -637,8 +637,6 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 	if (system_supports_mte())
 		valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
 			| PR_MTE_TAG_MASK;
-	if (cpus_have_cap(ARM64_MTE_ASYMM))
-		valid_mask |= PR_MTE_TCF_ASYMM;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 4ae2b21e4066a..e998764f02625 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,9 +238,7 @@ struct prctl_mm_map {
 # define PR_MTE_TCF_NONE		0UL
 # define PR_MTE_TCF_SYNC		(1UL << 1)
 # define PR_MTE_TCF_ASYNC		(1UL << 2)
-# define PR_MTE_TCF_ASYMM		(1UL << 19)
-# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \
-					 PR_MTE_TCF_ASYMM)
+# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
 /* MTE tag inclusion mask */
 # define PR_MTE_TAG_SHIFT		3
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
-- 
GitLab


From 3f8dec116210ca649163574ed5f8df1e3b837d07 Mon Sep 17 00:00:00 2001
From: Darren Hart <darren@os.amperecomputing.com>
Date: Tue, 8 Mar 2022 10:50:48 -0800
Subject: [PATCH 1180/1586] ACPI/APEI: Limit printable size of BERT table data

Platforms with large BERT table data can trigger soft lockup errors
while attempting to print the entire BERT table data to the console at
boot:

  watchdog: BUG: soft lockup - CPU#160 stuck for 23s! [swapper/0:1]

Observed on Ampere Altra systems with a single BERT record of ~250KB.

The original bert driver appears to have assumed relatively small table
data. Since it is impractical to reassemble large table data from
interwoven console messages, and the table data is available in

  /sys/firmware/acpi/tables/data/BERT

limit the size for tables printed to the console to 1024 (for no reason
other than it seemed like a good place to kick off the discussion, would
appreciate feedback from existing users in terms of what size would
maintain their current usage model).

Alternatively, we could make printing a CONFIG option, use the
bert_disable boot arg (or something similar), or use a debug log level.
However, all those solutions require extra steps or change the existing
behavior for small table data. Limiting the size preserves existing
behavior on existing platforms with small table data, and eliminates the
soft lockups for platforms with large table data, while still making it
available.

Signed-off-by: Darren Hart <darren@os.amperecomputing.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/bert.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 86211422f4eee..598fd19b65fa4 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -29,6 +29,7 @@
 
 #undef pr_fmt
 #define pr_fmt(fmt) "BERT: " fmt
+#define ACPI_BERT_PRINT_MAX_LEN 1024
 
 static int bert_disable;
 
@@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region,
 		}
 
 		pr_info_once("Error records from previous boot:\n");
-
-		cper_estatus_print(KERN_INFO HW_ERR, estatus);
+		if (region_len < ACPI_BERT_PRINT_MAX_LEN)
+			cper_estatus_print(KERN_INFO HW_ERR, estatus);
+		else
+			pr_info_once("Max print length exceeded, table data is available at:\n"
+				     "/sys/firmware/acpi/tables/data/BERT");
 
 		/*
 		 * Because the boot error source is "one-time polled" type,
-- 
GitLab


From 23c296fb7eeea99dec035521c07ae54c28b8a267 Mon Sep 17 00:00:00 2001
From: Jinzhou Su <Jinzhou.Su@amd.com>
Date: Wed, 9 Mar 2022 09:23:48 +0800
Subject: [PATCH 1181/1586] cpufreq: amd-pstate: Add more tracepoint for AMD
 P-State module

Add frequency, mperf, aperf and tsc in the trace. This can be used
to debug and tune the performance of AMD P-state driver.

Use the time difference between amd_pstate_update to calculate CPU
frequency. There could be sleep in arch_freq_get_on_cpu, so do not
use it here.

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Co-developed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd-pstate-trace.h | 22 ++++++++++-
 drivers/cpufreq/amd-pstate.c       | 59 +++++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 647505957d4f2..35f38ae67fb13 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -27,6 +27,10 @@ TRACE_EVENT(amd_pstate_perf,
 	TP_PROTO(unsigned long min_perf,
 		 unsigned long target_perf,
 		 unsigned long capacity,
+		 u64 freq,
+		 u64 mperf,
+		 u64 aperf,
+		 u64 tsc,
 		 unsigned int cpu_id,
 		 bool changed,
 		 bool fast_switch
@@ -35,6 +39,10 @@ TRACE_EVENT(amd_pstate_perf,
 	TP_ARGS(min_perf,
 		target_perf,
 		capacity,
+		freq,
+		mperf,
+		aperf,
+		tsc,
 		cpu_id,
 		changed,
 		fast_switch
@@ -44,6 +52,10 @@ TRACE_EVENT(amd_pstate_perf,
 		__field(unsigned long, min_perf)
 		__field(unsigned long, target_perf)
 		__field(unsigned long, capacity)
+		__field(unsigned long long, freq)
+		__field(unsigned long long, mperf)
+		__field(unsigned long long, aperf)
+		__field(unsigned long long, tsc)
 		__field(unsigned int, cpu_id)
 		__field(bool, changed)
 		__field(bool, fast_switch)
@@ -53,15 +65,23 @@ TRACE_EVENT(amd_pstate_perf,
 		__entry->min_perf = min_perf;
 		__entry->target_perf = target_perf;
 		__entry->capacity = capacity;
+		__entry->freq = freq;
+		__entry->mperf = mperf;
+		__entry->aperf = aperf;
+		__entry->tsc = tsc;
 		__entry->cpu_id = cpu_id;
 		__entry->changed = changed;
 		__entry->fast_switch = fast_switch;
 		),
 
-	TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s",
+	TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
 		  (unsigned long)__entry->min_perf,
 		  (unsigned long)__entry->target_perf,
 		  (unsigned long)__entry->capacity,
+		  (unsigned long long)__entry->freq,
+		  (unsigned long long)__entry->mperf,
+		  (unsigned long long)__entry->aperf,
+		  (unsigned long long)__entry->tsc,
 		  (unsigned int)__entry->cpu_id,
 		  (__entry->changed) ? "true" : "false",
 		  (__entry->fast_switch) ? "true" : "false"
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ce75ed11f8e8..7be38bc6a673b 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -65,6 +65,18 @@ MODULE_PARM_DESC(shared_mem,
 
 static struct cpufreq_driver amd_pstate_driver;
 
+/**
+ * struct  amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc:   time stamp counter
+ */
+struct amd_aperf_mperf {
+	u64 aperf;
+	u64 mperf;
+	u64 tsc;
+};
+
 /**
  * struct amd_cpudata - private CPU data for AMD P-State
  * @cpu: CPU number
@@ -81,6 +93,9 @@ static struct cpufreq_driver amd_pstate_driver;
  * @min_freq: the frequency that mapped to lowest_perf
  * @nominal_freq: the frequency that mapped to nominal_perf
  * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
  * @boost_supported: check whether the Processor or SBIOS supports boost mode
  *
  * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
@@ -102,6 +117,10 @@ struct amd_cpudata {
 	u32	nominal_freq;
 	u32	lowest_nonlinear_freq;
 
+	struct amd_aperf_mperf cur;
+	struct amd_aperf_mperf prev;
+
+	u64 freq;
 	bool	boost_supported;
 };
 
@@ -211,6 +230,39 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
 					    max_perf, fast_switch);
 }
 
+static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
+{
+	u64 aperf, mperf, tsc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	rdmsrl(MSR_IA32_APERF, aperf);
+	rdmsrl(MSR_IA32_MPERF, mperf);
+	tsc = rdtsc();
+
+	if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
+		local_irq_restore(flags);
+		return false;
+	}
+
+	local_irq_restore(flags);
+
+	cpudata->cur.aperf = aperf;
+	cpudata->cur.mperf = mperf;
+	cpudata->cur.tsc =  tsc;
+	cpudata->cur.aperf -= cpudata->prev.aperf;
+	cpudata->cur.mperf -= cpudata->prev.mperf;
+	cpudata->cur.tsc -= cpudata->prev.tsc;
+
+	cpudata->prev.aperf = aperf;
+	cpudata->prev.mperf = mperf;
+	cpudata->prev.tsc = tsc;
+
+	cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
+
+	return true;
+}
+
 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 			      u32 des_perf, u32 max_perf, bool fast_switch)
 {
@@ -226,8 +278,11 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 	value &= ~AMD_CPPC_MAX_PERF(~0L);
 	value |= AMD_CPPC_MAX_PERF(max_perf);
 
-	trace_amd_pstate_perf(min_perf, des_perf, max_perf,
-			      cpudata->cpu, (value != prev), fast_switch);
+	if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
+		trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
+			cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
+				cpudata->cpu, (value != prev), fast_switch);
+	}
 
 	if (value == prev)
 		return;
-- 
GitLab


From ab3ff9f1d7507c23cb4725f7429926a22e3290cb Mon Sep 17 00:00:00 2001
From: Jinzhou Su <Jinzhou.Su@amd.com>
Date: Wed, 9 Mar 2022 09:23:49 +0800
Subject: [PATCH 1182/1586] tools/power/x86/intel_pstate_tracer: make tracer as
 a module

Make intel_pstate_tracer as a module. Other trace event can import
this module to analyze their trace data.

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Acked-by: Doug Smythies <dsmythies@telus.net>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel_pstate_tracer.py                    | 260 +++++++++---------
 1 file changed, 129 insertions(+), 131 deletions(-)

diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index e15e20696d17b..b46e9eb8f5aac 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -63,7 +63,7 @@ C_USEC = 3
 C_SEC = 2
 C_CPU = 1
 
-global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file
 
 # 11 digits covers uptime to 115 days
 getcontext().prec = 11
@@ -72,17 +72,17 @@ sample_num =0
 last_sec_cpu = [0] * MAX_CPUS
 last_usec_cpu = [0] * MAX_CPUS
 
-def print_help():
-    print('intel_pstate_tracer.py:')
+def print_help(driver_name):
+    print('%s_tracer.py:'%driver_name)
     print('  Usage:')
     print('    If the trace file is available, then to simply parse and plot, use (sudo not required):')
-    print('      ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+    print('      ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name)
     print('    Or')
-    print('      ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+    print('      ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name)
     print('    To generate trace file, parse and plot, use (sudo required):')
-    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
+    print('      sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name)
     print('    Or')
-    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
+    print('      sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name)
     print('    Optional argument:')
     print('      cpus:   comma separated list of CPUs')
     print('      kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
@@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot):
     g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
     g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
 
-def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask):
     """ Store master csv file information """
 
     global graph_data_present
@@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t
 
     graph_data_present = True;
 
-def split_csv():
+def split_csv(current_max_cpu, cpu_mask):
     """ seperate the all csv file into per CPU csv files. """
 
-    global current_max_cpu
-
     if os.path.exists('cpu.csv'):
         for index in range(0, current_max_cpu + 1):
             if cpu_mask[int(index)] != 0:
@@ -381,27 +379,25 @@ def clear_trace_file():
         print('IO error clearing trace file ')
         sys.exit(2)
 
-def enable_trace():
+def enable_trace(trace_file):
     """ Enable trace """
 
     try:
-       open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
-                 , 'w').write("1")
+        open(trace_file,'w').write("1")
     except:
         print('IO error enabling trace ')
         sys.exit(2)
 
-def disable_trace():
+def disable_trace(trace_file):
     """ Disable trace """
 
     try:
-       open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
-                 , 'w').write("0")
+       open(trace_file, 'w').write("0")
     except:
         print('IO error disabling trace ')
         sys.exit(2)
 
-def set_trace_buffer_size():
+def set_trace_buffer_size(memory):
     """ Set trace buffer size """
 
     try:
@@ -421,7 +417,7 @@ def free_trace_buffer():
         print('IO error freeing trace buffer ')
         sys.exit(2)
 
-def read_trace_data(filename):
+def read_trace_data(filename, cpu_mask):
     """ Read and parse trace data """
 
     global current_max_cpu
@@ -481,135 +477,137 @@ def read_trace_data(filename):
                 tsc_ghz = Decimal(0)
                 if duration_ms != Decimal(0) :
                     tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
-                store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+                store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask)
 
             if cpu_int > current_max_cpu:
                 current_max_cpu = cpu_int
 # End of for each trace line loop
 # Now seperate the main overall csv file into per CPU csv files.
-    split_csv()
+    split_csv(current_max_cpu, cpu_mask)
 
 def signal_handler(signal, frame):
     print(' SIGINT: Forcing cleanup before exit.')
     if interval:
-        disable_trace()
+        disable_trace(trace_file)
         clear_trace_file()
         # Free the memory
         free_trace_buffer()
         sys.exit(0)
 
-signal.signal(signal.SIGINT, signal_handler)
+if __name__ == "__main__":
+    trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable"
+    signal.signal(signal.SIGINT, signal_handler)
 
-interval = ""
-filename = ""
-cpu_list = ""
-testname = ""
-memory = "10240"
-graph_data_present = False;
+    interval = ""
+    filename = ""
+    cpu_list = ""
+    testname = ""
+    memory = "10240"
+    graph_data_present = False;
 
-valid1 = False
-valid2 = False
+    valid1 = False
+    valid2 = False
 
-cpu_mask = zeros((MAX_CPUS,), dtype=int)
+    cpu_mask = zeros((MAX_CPUS,), dtype=int)
 
-try:
-    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
-except getopt.GetoptError:
-    print_help()
-    sys.exit(2)
-for opt, arg in opts:
-    if opt == '-h':
-        print()
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+    except getopt.GetoptError:
+        print_help('intel_pstate')
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print_help('intel_pstate')
+            sys.exit()
+        elif opt in ("-t", "--trace_file"):
+            valid1 = True
+            location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+            filename = os.path.join(location, arg)
+        elif opt in ("-i", "--interval"):
+            valid1 = True
+            interval = arg
+        elif opt in ("-c", "--cpu"):
+            cpu_list = arg
+        elif opt in ("-n", "--name"):
+            valid2 = True
+            testname = arg
+        elif opt in ("-m", "--memory"):
+            memory = arg
+
+    if not (valid1 and valid2):
+        print_help('intel_pstate')
         sys.exit()
-    elif opt in ("-t", "--trace_file"):
-        valid1 = True
-        location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
-        filename = os.path.join(location, arg)
-    elif opt in ("-i", "--interval"):
-        valid1 = True
-        interval = arg
-    elif opt in ("-c", "--cpu"):
-        cpu_list = arg
-    elif opt in ("-n", "--name"):
-        valid2 = True
-        testname = arg
-    elif opt in ("-m", "--memory"):
-        memory = arg
-
-if not (valid1 and valid2):
-    print_help()
-    sys.exit()
-
-if cpu_list:
-    for p in re.split("[,]", cpu_list):
-        if int(p) < MAX_CPUS :
-            cpu_mask[int(p)] = 1
-else:
-    for i in range (0, MAX_CPUS):
-        cpu_mask[i] = 1
-
-if not os.path.exists('results'):
-    os.mkdir('results')
+
+    if cpu_list:
+        for p in re.split("[,]", cpu_list):
+            if int(p) < MAX_CPUS :
+                cpu_mask[int(p)] = 1
+    else:
+        for i in range (0, MAX_CPUS):
+            cpu_mask[i] = 1
+
+    if not os.path.exists('results'):
+        os.mkdir('results')
+        # The regular user needs to own the directory, not root.
+        fix_ownership('results')
+
+    os.chdir('results')
+    if os.path.exists(testname):
+        print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+        sys.exit()
+    os.mkdir(testname)
     # The regular user needs to own the directory, not root.
-    fix_ownership('results')
-
-os.chdir('results')
-if os.path.exists(testname):
-    print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
-    sys.exit()
-os.mkdir(testname)
-# The regular user needs to own the directory, not root.
-fix_ownership(testname)
-os.chdir(testname)
-
-# Temporary (or perhaps not)
-cur_version = sys.version_info
-print('python version (should be >= 2.7):')
-print(cur_version)
-
-# Left as "cleanup" for potential future re-run ability.
-cleanup_data_files()
-
-if interval:
-    filename = "/sys/kernel/debug/tracing/trace"
-    clear_trace_file()
-    set_trace_buffer_size()
-    enable_trace()
-    print('Sleeping for ', interval, 'seconds')
-    time.sleep(int(interval))
-    disable_trace()
-
-current_max_cpu = 0
-
-read_trace_data(filename)
-
-if interval:
-    clear_trace_file()
-    # Free the memory
-    free_trace_buffer()
-
-if graph_data_present == False:
-    print('No valid data to plot')
-    sys.exit(2)
-
-for cpu_no in range(0, current_max_cpu + 1):
-    plot_perf_busy_with_sample(cpu_no)
-    plot_perf_busy(cpu_no)
-    plot_durations(cpu_no)
-    plot_loads(cpu_no)
-
-plot_pstate_cpu_with_sample()
-plot_pstate_cpu()
-plot_load_cpu()
-plot_frequency_cpu()
-plot_duration_cpu()
-plot_scaled_cpu()
-plot_boost_cpu()
-plot_ghz_cpu()
-
-# It is preferrable, but not necessary, that the regular user owns the files, not root.
-for root, dirs, files in os.walk('.'):
-    for f in files:
-        fix_ownership(f)
-
-os.chdir('../../')
+    fix_ownership(testname)
+    os.chdir(testname)
+
+    # Temporary (or perhaps not)
+    cur_version = sys.version_info
+    print('python version (should be >= 2.7):')
+    print(cur_version)
+
+    # Left as "cleanup" for potential future re-run ability.
+    cleanup_data_files()
+
+    if interval:
+        filename = "/sys/kernel/debug/tracing/trace"
+        clear_trace_file()
+        set_trace_buffer_size(memory)
+        enable_trace(trace_file)
+        print('Sleeping for ', interval, 'seconds')
+        time.sleep(int(interval))
+        disable_trace(trace_file)
+
+    current_max_cpu = 0
+
+    read_trace_data(filename, cpu_mask)
+
+    if interval:
+        clear_trace_file()
+        # Free the memory
+        free_trace_buffer()
+
+    if graph_data_present == False:
+        print('No valid data to plot')
+        sys.exit(2)
+
+    for cpu_no in range(0, current_max_cpu + 1):
+        plot_perf_busy_with_sample(cpu_no)
+        plot_perf_busy(cpu_no)
+        plot_durations(cpu_no)
+        plot_loads(cpu_no)
+
+    plot_pstate_cpu_with_sample()
+    plot_pstate_cpu()
+    plot_load_cpu()
+    plot_frequency_cpu()
+    plot_duration_cpu()
+    plot_scaled_cpu()
+    plot_boost_cpu()
+    plot_ghz_cpu()
+
+    # It is preferrable, but not necessary, that the regular user owns the files, not root.
+    for root, dirs, files in os.walk('.'):
+        for f in files:
+            fix_ownership(f)
+
+    os.chdir('../../')
-- 
GitLab


From 5e32adccea8d803ee07e360daafb5480651ff8e8 Mon Sep 17 00:00:00 2001
From: Jinzhou Su <Jinzhou.Su@amd.com>
Date: Wed, 9 Mar 2022 09:23:50 +0800
Subject: [PATCH 1183/1586] tools/power/x86/amd_pstate_tracer: Add tracer tool
 for AMD P-state

Intel P-state tracer is a useful tool to tune and debug Intel P-state
driver. AMD P-state tracer import intel pstate tracer. This tool can
be used to analyze the performance of AMD P-state tracer.

Now CPU frequency, load and desired perf can be traced.

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS                                   |   1 +
 .../x86/amd_pstate_tracer/amd_pstate_trace.py | 354 ++++++++++++++++++
 2 files changed, 355 insertions(+)
 create mode 100755 tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py

diff --git a/MAINTAINERS b/MAINTAINERS
index f41088418aae2..6c90da80982e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1002,6 +1002,7 @@ L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	Documentation/admin-guide/pm/amd-pstate.rst
 F:	drivers/cpufreq/amd-pstate*
+F:	tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
 
 AMD PTDMA DRIVER
 M:	Sanjay R Mehta <sanju.mehta@amd.com>
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
new file mode 100755
index 0000000000000..2dea4032ac565
--- /dev/null
+++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State
+trace event.
+
+Prerequisites:
+    Python version 2.7.x or higher
+    gnuplot 5.0 or higher
+    gnuplot-py 1.8 or higher
+    (Most of the distributions have these required packages. They may be called
+     gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+
+    Kernel config for Linux trace is enabled
+
+    see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import signal
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+sys.path.append('../intel_pstate_tracer')
+#import intel_pstate_tracer
+import intel_pstate_tracer as ipt
+
+__license__ = "GPL version 2"
+
+MAX_CPUS = 256
+# Define the csv file columns
+C_COMM = 15
+C_ELAPSED = 14
+C_SAMPLE = 13
+C_DURATION = 12
+C_LOAD = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_FREQ = 7
+C_MAX_PERF = 6
+C_DES_PERF = 5
+C_MIN_PERF = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file
+
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def plot_per_cpu_freq(cpu_index):
+    """ Plot per cpu frequency """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_frequency.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:7]')
+        g_plot('set ytics 0, 1')
+        g_plot('set ylabel "CPU Frequency (GHz)"')
+        g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set ylabel "CPU frequency"')
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ))
+
+def plot_per_cpu_des_perf(cpu_index):
+    """ Plot per cpu desired perf """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_des_perf.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:255]')
+        g_plot('set ylabel "des perf"')
+        g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF))
+
+def plot_per_cpu_load(cpu_index):
+    """ Plot per cpu load """
+
+    file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+    if os.path.exists(file_name):
+        output_png = "cpu%03d_load.png" % cpu_index
+        g_plot = ipt.common_gnuplot_settings()
+        g_plot('set output "' + output_png + '"')
+        g_plot('set yrange [0:100]')
+        g_plot('set ytics 0, 10')
+        g_plot('set ylabel "CPU load (percent)"')
+        g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now()))
+        g_plot('set key off')
+        ipt.set_4_plot_linestyles(g_plot)
+        g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_all_cpu_frequency():
+    """ Plot all cpu frequencies """
+
+    output_png = 'all_cpu_frequencies.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set ylabel "CPU Frequency (GHz)"')
+    g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def plot_all_cpu_des_perf():
+    """ Plot all cpu desired perf """
+
+    output_png = 'all_cpu_des_perf.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set ylabel "des perf"')
+    g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def plot_all_cpu_load():
+    """ Plot all cpu load  """
+
+    output_png = 'all_cpu_load.png'
+    g_plot = ipt.common_gnuplot_settings()
+    g_plot('set output "' + output_png + '"')
+    g_plot('set yrange [0:100]')
+    g_plot('set ylabel "CPU load (percent)"')
+    g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now()))
+
+    title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+    plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD)
+    g_plot('title_list = "{}"'.format(title_list))
+    g_plot(plot_str)
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask):
+    """ Store master csv file information """
+
+    global graph_data_present
+
+    if cpu_mask[cpu_int] == 0:
+        return
+
+    try:
+        f_handle = open('cpu.csv', 'a')
+        string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm)
+        f_handle.write(string_buffer)
+        f_handle.close()
+    except:
+        print('IO error cpu.csv')
+        return
+
+    graph_data_present = True;
+
+
+def cleanup_data_files():
+    """ clean up existing data files """
+
+    if os.path.exists('cpu.csv'):
+        os.remove('cpu.csv')
+    f_handle = open('cpu.csv', 'a')
+    f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm')
+    f_handle.write('\n')
+    f_handle.close()
+
+def read_trace_data(file_name, cpu_mask):
+    """ Read and parse trace data """
+
+    global current_max_cpu
+    global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+    try:
+        data = open(file_name, 'r').read()
+    except:
+        print('Error opening ', file_name)
+        sys.exit(2)
+
+    for line in data.splitlines():
+        search_obj = \
+            re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)'
+                      , line)
+
+        if search_obj:
+            cpu = search_obj.group(3)
+            cpu_int = int(cpu)
+            cpu = str(cpu_int)
+
+            time_pre_dec = search_obj.group(6)
+            time_post_dec = search_obj.group(8)
+            min_perf = search_obj.group(10)
+            des_perf = search_obj.group(12)
+            max_perf = search_obj.group(14)
+            freq = search_obj.group(16)
+            mperf = search_obj.group(18)
+            aperf = search_obj.group(20)
+            tsc = search_obj.group(22)
+
+            common_comm = search_obj.group(2).replace(' ', '')
+
+            if sample_num == 0 :
+                start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+            sample_num += 1
+
+            if last_sec_cpu[cpu_int] == 0 :
+                last_sec_cpu[cpu_int] = time_pre_dec
+                last_usec_cpu[cpu_int] = time_post_dec
+            else :
+                duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+                duration_ms = Decimal(duration_us) / Decimal(1000)
+                last_sec_cpu[cpu_int] = time_pre_dec
+                last_usec_cpu[cpu_int] = time_post_dec
+                elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+                load = Decimal(int(mperf)*100)/ Decimal(tsc)
+                freq_ghz = Decimal(freq)/Decimal(1000000)
+                store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask)
+
+            if cpu_int > current_max_cpu:
+                current_max_cpu = cpu_int
+# Now separate the main overall csv file into per CPU csv files.
+    ipt.split_csv(current_max_cpu, cpu_mask)
+
+
+def signal_handler(signal, frame):
+    print(' SIGINT: Forcing cleanup before exit.')
+    if interval:
+        ipt.disable_trace(trace_file)
+        ipt.clear_trace_file()
+        ipt.free_trace_buffer()
+        sys.exit(0)
+
+trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable"
+signal.signal(signal.SIGINT, signal_handler)
+
+interval = ""
+file_name = ""
+cpu_list = ""
+test_name = ""
+memory = "10240"
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+
+try:
+    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+except getopt.GetoptError:
+    ipt.print_help('amd_pstate')
+    sys.exit(2)
+for opt, arg in opts:
+    if opt == '-h':
+        print()
+        sys.exit()
+    elif opt in ("-t", "--trace_file"):
+        valid1 = True
+        location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+        file_name = os.path.join(location, arg)
+    elif opt in ("-i", "--interval"):
+        valid1 = True
+        interval = arg
+    elif opt in ("-c", "--cpu"):
+        cpu_list = arg
+    elif opt in ("-n", "--name"):
+        valid2 = True
+        test_name = arg
+    elif opt in ("-m", "--memory"):
+        memory = arg
+
+if not (valid1 and valid2):
+    ipt.print_help('amd_pstate')
+    sys.exit()
+
+if cpu_list:
+    for p in re.split("[,]", cpu_list):
+        if int(p) < MAX_CPUS :
+            cpu_mask[int(p)] = 1
+else:
+    for i in range (0, MAX_CPUS):
+        cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+    os.mkdir('results')
+    ipt.fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(test_name):
+    print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+    sys.exit()
+os.mkdir(test_name)
+ipt.fix_ownership(test_name)
+os.chdir(test_name)
+
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+cleanup_data_files()
+
+if interval:
+    file_name = "/sys/kernel/debug/tracing/trace"
+    ipt.clear_trace_file()
+    ipt.set_trace_buffer_size(memory)
+    ipt.enable_trace(trace_file)
+    time.sleep(int(interval))
+    ipt.disable_trace(trace_file)
+
+current_max_cpu = 0
+
+read_trace_data(file_name, cpu_mask)
+
+if interval:
+    ipt.clear_trace_file()
+    ipt.free_trace_buffer()
+
+if graph_data_present == False:
+    print('No valid data to plot')
+    sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+    plot_per_cpu_freq(cpu_no)
+    plot_per_cpu_des_perf(cpu_no)
+    plot_per_cpu_load(cpu_no)
+
+plot_all_cpu_des_perf()
+plot_all_cpu_frequency()
+plot_all_cpu_load()
+
+for root, dirs, files in os.walk('.'):
+    for f in files:
+        ipt.fix_ownership(f)
+
+os.chdir('../../')
-- 
GitLab


From b020771a66e474cd8450b3c483cec8492702db22 Mon Sep 17 00:00:00 2001
From: Jinzhou Su <Jinzhou.Su@amd.com>
Date: Wed, 9 Mar 2022 09:23:51 +0800
Subject: [PATCH 1184/1586] Documentation: amd-pstate: add tracer tool
 introduction

Add amd pstate tracer tool introduction

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/amd-pstate.rst | 26 +++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 2f066df4ee9c0..1923cb25073b5 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -369,6 +369,32 @@ governor (for the policies it is attached to), or by the ``CPUFreq`` core (for t
 policies with other scaling governors).
 
 
+Tracer Tool
+-------------
+
+``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
+generate performance plots. This utility can be used to debug and tune the
+performance of ``amd-pstate`` driver. The tracer tool needs to import intel
+pstate tracer.
+
+Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
+used in two ways. If trace file is available, then directly parse the file
+with command ::
+
+ ./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
+
+Or generate trace file with root privilege, then parse and plot with command ::
+
+ sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
+
+The test result can be found in ``results/test_name``. Following is the example
+about part of the output. ::
+
+ common_cpu  common_secs  common_usecs  min_perf  des_perf  max_perf  freq    mperf   apef    tsc       load   duration_ms  sample_num  elapsed_time  common_comm
+ CPU_005     712          116384        39        49        166       0.7565  9645075 2214891 38431470  25.1   11.646       469         2.496         kworker/5:0-40
+ CPU_006     712          116408        39        49        166       0.6769  8950227 1839034 37192089  24.06  11.272       470         2.496         kworker/6:0-1264
+
+
 Reference
 ===========
 
-- 
GitLab


From 659b66e98bb38dc6300dca3c9ebebeba194b575b Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 4 Mar 2022 15:53:28 +0800
Subject: [PATCH 1185/1586] cpuidle: haltpoll: Call cpuidle_poll_state_init()
 later

Call cpuidle_poll_state_init() only if it is needed to avoid doing
useless work.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-haltpoll.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index fcc53215bac8f..3a39a7f48b771 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -108,11 +108,11 @@ static int __init haltpoll_init(void)
 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
 		return -ENODEV;
 
-	cpuidle_poll_state_init(drv);
-
 	if (!kvm_para_available() || !haltpoll_want())
 		return -ENODEV;
 
+	cpuidle_poll_state_init(drv);
+
 	ret = cpuidle_register_driver(drv);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From f09f6f9b69821c9efcf16e6b5b466ce9e263ca51 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 4 Mar 2022 10:14:18 -0800
Subject: [PATCH 1186/1586] Documentation/process: Add Researcher Guidelines

As a follow-up to the UMN incident[1], the TAB took the responsibility
to document Researcher Guidelines so there would be a common place to
point for describing our expectations as a developer community.

Document best practices researchers should follow to participate
successfully with the Linux developer community.

[1] https://lore.kernel.org/lkml/202105051005.49BFABCE@keescook/

Co-developed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Co-developed-by: Jonathan Corbet <corbet@lwn.net>
Co-developed-by: Stefano Zacchiroli <zack@upsilon.cc>
Signed-off-by: Stefano Zacchiroli <zack@upsilon.cc>
Co-developed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Steve Rostedt <rostedt@goodmis.org>
Acked-by: Laura Abbott <labbott@kernel.org>
Reviewed-by: Julia Lawall <julia.lawall@inria.fr>
Reviewed-by: Wenwen Wang <wenwen@cs.uga.edu>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220304181418.1692016-1-keescook@chromium.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/index.rst               |   1 +
 .../process/researcher-guidelines.rst         | 143 ++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 Documentation/process/researcher-guidelines.rst

diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 428e39074f616..3587dae4d0ef8 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -49,6 +49,7 @@ Other guides to the community that are of interest to most developers are:
    deprecated
    embargoed-hardware-issues
    maintainers
+   researcher-guidelines
 
 These are some overall technical guides that have been put here for now for
 lack of a better place.
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
new file mode 100644
index 0000000000000..afc944e0e8986
--- /dev/null
+++ b/Documentation/process/researcher-guidelines.rst
@@ -0,0 +1,143 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _researcher_guidelines:
+
+Researcher Guidelines
++++++++++++++++++++++
+
+The Linux kernel community welcomes transparent research on the Linux
+kernel, the activities involved in producing it, and any other byproducts
+of its development. Linux benefits greatly from this kind of research, and
+most aspects of Linux are driven by research in one form or another.
+
+The community greatly appreciates if researchers can share preliminary
+findings before making their results public, especially if such research
+involves security. Getting involved early helps both improve the quality
+of research and ability for Linux to improve from it. In any case,
+sharing open access copies of the published research with the community
+is recommended.
+
+This document seeks to clarify what the Linux kernel community considers
+acceptable and non-acceptable practices when conducting such research. At
+the very least, such research and related activities should follow
+standard research ethics rules. For more background on research ethics
+generally, ethics in technology, and research of developer communities
+in particular, see:
+
+* `History of Research Ethics <https://www.unlv.edu/research/ORI-HSR/history-ethics>`_
+* `IEEE Ethics <https://www.ieee.org/about/ethics/index.html>`_
+* `Developer and Researcher Views on the Ethics of Experiments on Open-Source Projects <https://arxiv.org/pdf/2112.13217.pdf>`_
+
+The Linux kernel community expects that everyone interacting with the
+project is participating in good faith to make Linux better. Research on
+any publicly-available artifact (including, but not limited to source
+code) produced by the Linux kernel community is welcome, though research
+on developers must be distinctly opt-in.
+
+Passive research that is based entirely on publicly available sources,
+including posts to public mailing lists and commits to public
+repositories, is clearly permissible. Though, as with any research,
+standard ethics must still be followed.
+
+Active research on developer behavior, however, must be done with the
+explicit agreement of, and full disclosure to, the individual developers
+involved. Developers cannot be interacted with/experimented on without
+consent; this, too, is standard research ethics.
+
+To help clarify: sending patches to developers *is* interacting
+with them, but they have already consented to receiving *good faith
+contributions*. Sending intentionally flawed/vulnerable patches or
+contributing misleading information to discussions is not consented
+to. Such communication can be damaging to the developer (e.g. draining
+time, effort, and morale) and damaging to the project by eroding
+the entire developer community's trust in the contributor (and the
+contributor's organization as a whole), undermining efforts to provide
+constructive feedback to contributors, and putting end users at risk of
+software flaws.
+
+Participation in the development of Linux itself by researchers, as
+with anyone, is welcomed and encouraged. Research into Linux code is
+a common practice, especially when it comes to developing or running
+analysis tools that produce actionable results.
+
+When engaging with the developer community, sending a patch has
+traditionally been the best way to make an impact. Linux already has
+plenty of known bugs -- what's much more helpful is having vetted fixes.
+Before contributing, carefully read the appropriate documentation:
+
+* Documentation/process/development-process.rst
+* Documentation/process/submitting-patches.rst
+* Documentation/admin-guide/reporting-issues.rst
+* Documentation/admin-guide/security-bugs.rst
+
+Then send a patch (including a commit log with all the details listed
+below) and follow up on any feedback from other developers.
+
+When sending patches produced from research, the commit logs should
+contain at least the following details, so that developers have
+appropriate context for understanding the contribution. Answer:
+
+* What is the specific problem that has been found?
+* How could the problem be reached on a running system?
+* What effect would encountering the problem have on the system?
+* How was the problem found? Specifically include details about any
+  testing, static or dynamic analysis programs, and any other tools or
+  methods used to perform the work.
+* Which version of Linux was the problem found on? Using the most recent
+  release or a recent linux-next branch is strongly preferred (see
+  Documentation/process/howto.rst).
+* What was changed to fix the problem, and why it is believed to be correct?
+* How was the change build tested and run-time tested?
+* What prior commit does this change fix? This should go in a "Fixes:"
+  tag as the documentation describes.
+* Who else has reviewed this patch? This should go in appropriate
+  "Reviewed-by:" tags; see below.
+
+For example::
+
+  From: Author <author@email>
+  Subject: [PATCH] drivers/foo_bar: Add missing kfree()
+
+  The error path in foo_bar driver does not correctly free the allocated
+  struct foo_bar_info. This can happen if the attached foo_bar device
+  rejects the initialization packets sent during foo_bar_probe(). This
+  would result in a 64 byte slab memory leak once per device attach,
+  wasting memory resources over time.
+
+  This flaw was found using an experimental static analysis tool we are
+  developing, LeakMagic[1], which reported the following warning when
+  analyzing the v5.15 kernel release:
+
+   path/to/foo_bar.c:187: missing kfree() call?
+
+  Add the missing kfree() to the error path. No other references to
+  this memory exist outside the probe function, so this is the only
+  place it can be freed.
+
+  x86_64 and arm64 defconfig builds with CONFIG_FOO_BAR=y using GCC
+  11.2 show no new warnings, and LeakMagic no longer warns about this
+  code path. As we don't have a FooBar device to test with, no runtime
+  testing was able to be performed.
+
+  [1] https://url/to/leakmagic/details
+
+  Reported-by: Researcher <researcher@email>
+  Fixes: aaaabbbbccccdddd ("Introduce support for FooBar")
+  Signed-off-by: Author <author@email>
+  Reviewed-by: Reviewer <reviewer@email>
+
+If you are a first time contributor it is recommended that the patch
+itself be vetted by others privately before being posted to public lists.
+(This is required if you have been explicitly told your patches need
+more careful internal review.) These people are expected to have their
+"Reviewed-by" tag included in the resulting patch. Finding another
+developer familiar with Linux contribution, especially within your own
+organization, and having them help with reviews before sending them to
+the public mailing lists tends to significantly improve the quality of the
+resulting patches, and there by reduces the burden on other developers.
+
+If no one can be found to internally review patches and you need
+help finding such a person, or if you have any other questions
+related to this document and the developer community's expectations,
+please reach out to the private Technical Advisory Board mailing list:
+<tech-board@lists.linux-foundation.org>.
-- 
GitLab


From 125c0d0bec56069810ab835daa49453890d159c8 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 1 Mar 2022 12:18:24 +0800
Subject: [PATCH 1187/1586] docs/zh_CN: add riscv vm-layout translation

Translate .../riscv/vm-layout.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/b59965f7f88ea22ec824dc2e92561b018fd370d1.1646108017.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/riscv/index.rst        |  1 +
 .../translations/zh_CN/riscv/vm-layout.rst    | 67 +++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/riscv/vm-layout.rst

diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst
index bbf5d7b3777aa..614cde0c09972 100644
--- a/Documentation/translations/zh_CN/riscv/index.rst
+++ b/Documentation/translations/zh_CN/riscv/index.rst
@@ -18,6 +18,7 @@ RISC-V 体系结构
     :maxdepth: 1
 
     boot-image-header
+    vm-layout
     pmu
     patch-acceptance
 
diff --git a/Documentation/translations/zh_CN/riscv/vm-layout.rst b/Documentation/translations/zh_CN/riscv/vm-layout.rst
new file mode 100644
index 0000000000000..585cb89317a3a
--- /dev/null
+++ b/Documentation/translations/zh_CN/riscv/vm-layout.rst
@@ -0,0 +1,67 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/riscv/vm-layout.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+============================
+RISC-V Linux上的虚拟内存布局
+============================
+
+:作者: Alexandre Ghiti <alex@ghiti.fr>
+:日期: 12 February 2021
+
+这份文件描述了RISC-V Linux内核使用的虚拟内存布局。
+
+32位 RISC-V Linux 内核
+======================
+
+RISC-V Linux Kernel SV32
+------------------------
+
+TODO
+
+64位 RISC-V Linux 内核
+======================
+
+RISC-V特权架构文档指出，64位地址 "必须使第63-48位值都等于第47位，否则将发生缺页异常。"：这将虚
+拟地址空间分成两半，中间有一个非常大的洞，下半部分是用户空间所在的地方，上半部分是RISC-V Linux
+内核所在的地方。
+
+RISC-V Linux Kernel SV39
+------------------------
+
+::
+
+  ========================================================================================================================
+      开始地址       |   偏移      |     结束地址      |  大小    | 虚拟内存区域描述
+  ========================================================================================================================
+                    |            |                  |         |
+   0000000000000000 |    0       | 0000003fffffffff |  256 GB | 用户空间虚拟内存，每个内存管理器不同
+  __________________|____________|__________________|_________|___________________________________________________________
+                    |            |                  |         |
+   0000004000000000 | +256    GB | ffffffbfffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-256GB地方
+                    |            |                  |         |     开始偏移的非经典虚拟内存地址空洞。
+                    |            |                  |         |
+  __________________|____________|__________________|_________|___________________________________________________________
+                                                              |
+                                                              | 内核空间的虚拟内存，在所有进程之间共享:
+  ____________________________________________________________|___________________________________________________________
+                    |            |                  |         |
+   ffffffc6fee00000 | -228    GB | ffffffc6feffffff |    2 MB | fixmap
+   ffffffc6ff000000 | -228    GB | ffffffc6ffffffff |   16 MB | PCI io
+   ffffffc700000000 | -228    GB | ffffffc7ffffffff |    4 GB | vmemmap
+   ffffffc800000000 | -224    GB | ffffffd7ffffffff |   64 GB | vmalloc/ioremap space
+   ffffffd800000000 | -160    GB | fffffff6ffffffff |  124 GB | 直接映射所有物理内存
+   fffffff700000000 |  -36    GB | fffffffeffffffff |   32 GB | kasan
+  __________________|____________|__________________|_________|____________________________________________________________
+                                                              |
+                                                              |
+  ____________________________________________________________|____________________________________________________________
+                    |            |                  |         |
+   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
+  __________________|____________|__________________|_________|____________________________________________________________
-- 
GitLab


From 0c21751f0a071a6fc94a48598e6c0752a284550c Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 1 Mar 2022 12:18:25 +0800
Subject: [PATCH 1188/1586] docs/zh_CN: add peci index translation

Translate .../pici/index.rst into Chinese

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/9fdba94417277ae20db7e3b13418584c652be93f.1646108017.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst    |  1 +
 .../translations/zh_CN/peci/index.rst         | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/peci/index.rst

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 23f8f4c68e83c..4cab13c1f69ee 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -114,6 +114,7 @@ TODOList:
    filesystems/index
    scheduler/index
    vm/index
+   peci/index
 
 TODOList:
 
diff --git a/Documentation/translations/zh_CN/peci/index.rst b/Documentation/translations/zh_CN/peci/index.rst
new file mode 100644
index 0000000000000..c536dbe1e1a81
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================
+Linux PECI 子系统
+=================
+
+.. toctree::
+
+TODOList
+*   peci
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
GitLab


From 78cebdb7c612cc62721812daec55bd7aa73f0956 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Tue, 1 Mar 2022 12:18:26 +0800
Subject: [PATCH 1189/1586] docs/zh_CN: add peci subsystem translation

Translate .../peci/peci.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/00577790cb16375e0016513e03d06b671006a3da.1646108017.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/peci/index.rst         |  4 +-
 .../translations/zh_CN/peci/peci.rst          | 54 +++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/peci/peci.rst

diff --git a/Documentation/translations/zh_CN/peci/index.rst b/Documentation/translations/zh_CN/peci/index.rst
index c536dbe1e1a81..4f6694c828fa2 100644
--- a/Documentation/translations/zh_CN/peci/index.rst
+++ b/Documentation/translations/zh_CN/peci/index.rst
@@ -15,8 +15,8 @@ Linux PECI 子系统
 
 .. toctree::
 
-TODOList
-*   peci
+
+   peci
 
 .. only::  subproject and html
 
diff --git a/Documentation/translations/zh_CN/peci/peci.rst b/Documentation/translations/zh_CN/peci/peci.rst
new file mode 100644
index 0000000000000..a3b4f99b994c7
--- /dev/null
+++ b/Documentation/translations/zh_CN/peci/peci.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/peci/peci.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+====
+概述
+====
+
+平台环境控制接口（PECI）是英特尔处理器和管理控制器（如底板管理控制器，BMC）
+之间的一个通信接口。PECI提供的服务允许管理控制器通过访问各种寄存器来配置、监
+控和调试平台。它定义了一个专门的命令协议，管理控制器作为PECI的发起者，处理器
+作为PECI的响应者。PECI可以用于基于单处理器和多处理器的系统中。
+
+注意：英特尔PECI规范没有作为专门的文件发布，而是作为英特尔CPU的外部设计规范
+（EDS）的一部分。外部设计规范通常是不公开的。
+
+PECI 线
+---------
+
+PECI线接口使用单线进行自锁和数据传输。它不需要任何额外的控制线--物理层是一个
+自锁的单线总线信号，每一个比特都从接近零伏的空闲状态开始驱动、上升边缘。驱动高
+电平信号的持续时间可以确定位值是逻辑 “0” 还是逻辑 “1”。PECI线还包括与每个信
+息建立的可变数据速率。
+
+对于PECI线，每个处理器包将在一个定义的范围内利用唯一的、固定的地址，该地址应
+该与处理器插座ID有固定的关系--如果其中一个处理器被移除，它不会影响其余处理器
+的地址。
+
+PECI子系统代码内嵌文档
+------------------------
+
+该API在以下内核代码中:
+
+include/linux/peci.h
+
+drivers/peci/internal.h
+
+drivers/peci/core.c
+
+drivers/peci/request.c
+
+PECI CPU 驱动 API
+-------------------
+
+该API在以下内核代码中:
+
+drivers/peci/cpu.c
-- 
GitLab


From fa04150b8ef7f15cb7654c11a24a9a4e0834de33 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Mon, 7 Mar 2022 13:33:41 +0700
Subject: [PATCH 1190/1586] Documentation: describe how to apply incremental
 stable patches

The applying patches document
(Documentation/process/applying-patches.rst) mentions incremental stable
patches, but there is no example of how to apply them. Describe the
process.

While at it, remove note about incremental patches and move the external
link of 5.x.y incremental patches to "Where can I download patches?"
section.

Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://lore.kernel.org/r/20220307063340.256671-1-bagasdotme@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/applying-patches.rst | 28 ++++++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst
index c2121c1e55d73..c269f5e1a0a3b 100644
--- a/Documentation/process/applying-patches.rst
+++ b/Documentation/process/applying-patches.rst
@@ -249,6 +249,10 @@ The 5.x.y (-stable) and 5.x patches live at
 
 	https://www.kernel.org/pub/linux/kernel/v5.x/
 
+The 5.x.y incremental patches live at
+
+	https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+
 The -rc patches are not stored on the webserver but are generated on
 demand from git tags such as
 
@@ -308,12 +312,11 @@ versions.
 If no 5.x.y kernel is available, then the highest numbered 5.x kernel is
 the current stable kernel.
 
-.. note::
+The -stable team provides normal as well as incremental patches. Below is
+how to apply these patches.
 
- The -stable team usually do make incremental patches available as well
- as patches against the latest mainline release, but I only cover the
- non-incremental ones below. The incremental ones can be found at
- https://www.kernel.org/pub/linux/kernel/v5.x/incr/
+Normal patches
+~~~~~~~~~~~~~~
 
 These patches are not incremental, meaning that for example the 5.7.3
 patch does not apply on top of the 5.7.2 kernel source, but rather on top
@@ -331,6 +334,21 @@ Here's a small example::
 	$ cd ..
 	$ mv linux-5.7.2 linux-5.7.3	# rename the kernel source dir
 
+Incremental patches
+~~~~~~~~~~~~~~~~~~~
+
+Incremental patches are different: instead of being applied on top
+of base 5.x kernel, they are applied on top of previous stable kernel
+(5.x.y-1).
+
+Here's the example to apply these::
+
+	$ cd ~/linux-5.7.2		# change to the kernel source dir
+	$ patch -p1 < ../patch-5.7.2-3	# apply the new 5.7.3 patch
+	$ cd ..
+	$ mv linux-5.7.2 linux-5.7.3	# rename the kernel source dir
+
+
 The -rc kernels
 ===============
 
-- 
GitLab


From a17b0169f29bd25c30750846ac0f1f7b5c13e978 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Mon, 7 Mar 2022 16:42:20 +0800
Subject: [PATCH 1191/1586] docs/zh_CN: add devicetree index translation

Translate .../devicetree/index.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/0a01f2eee980da02c0810d3d99b95af9013b646f.1646642188.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/devicetree/index.rst   | 49 +++++++++++++++++++
 Documentation/translations/zh_CN/index.rst    |  6 ++-
 2 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/devicetree/index.rst

diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst
new file mode 100644
index 0000000000000..5ca1ae87b57d5
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/index.rst
@@ -0,0 +1,49 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+=============================
+Open Firmware 和 Devicetree
+=============================
+
+该文档是整个设备树文档的总目录，标题中多是业内默认的术语，初见就翻译成中文，
+晦涩难懂，因此尽量保留，后面翻译其子文档时，可能会根据语境，灵活地翻译为中文。
+
+内核Devicetree的使用
+=======================
+.. toctree::
+   :maxdepth: 1
+
+Todolist:
+
+*   usage-model
+*   of_unittest
+*   kernel-api
+
+Devicetree Overlays
+===================
+.. toctree::
+   :maxdepth: 1
+
+Todolist:
+
+*   changesets
+*   dynamic-resolution-notes
+*   overlay-notes
+
+Devicetree Bindings
+===================
+.. toctree::
+   :maxdepth: 1
+
+Todolist:
+
+*   bindings/index
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 4cab13c1f69ee..88d8df957a78a 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -56,10 +56,14 @@ TODOList:
 
 下列文档描述了内核需要的平台固件相关信息。
 
+.. toctree::
+   :maxdepth: 2
+
+   devicetree/index
+
 TODOList:
 
 * firmware-guide/index
-* devicetree/index
 
 应用程序开发人员文档
 --------------------
-- 
GitLab


From c56481299df3d57a6f874e8dd35dab5e4cfc20e2 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Mon, 7 Mar 2022 16:42:21 +0800
Subject: [PATCH 1192/1586] docs/zh_CN: add devicetree usage-model translation

Translate .../Devicetree/usage-model.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/abc4bfa5a8eb438013913bf8ea5f8a29e5730f33.1646642188.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/devicetree/index.rst   |   4 +-
 .../zh_CN/devicetree/usage-model.rst          | 330 ++++++++++++++++++
 2 files changed, 333 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/devicetree/usage-model.rst

diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst
index 5ca1ae87b57d5..6d9ac4254a1cf 100644
--- a/Documentation/translations/zh_CN/devicetree/index.rst
+++ b/Documentation/translations/zh_CN/devicetree/index.rst
@@ -22,9 +22,11 @@ Open Firmware 和 Devicetree
 .. toctree::
    :maxdepth: 1
 
+   usage-model
+
 Todolist:
 
-*   usage-model
+
 *   of_unittest
 *   kernel-api
 
diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst
new file mode 100644
index 0000000000000..318a3c6a01143
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst
@@ -0,0 +1,330 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/usage-model.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+
+===================
+Linux 和 Devicetree
+===================
+
+Linux对设备树数据的使用模型
+
+:作者: Grant Likely <grant.likely@secretlab.ca>
+
+这篇文章描述了Linux如何使用设备树。关于设备树数据格式的概述可以在
+devicetree.org\ [1]_ 的设备树使用页面上找到。
+
+.. [1] https://www.devicetree.org/specifications/
+
+"Open Firmware Device Tree"，或简称为Devicetree（DT），是一种用于描述硬
+件的数据结构和语言。更确切地说，它是一种操作系统可读的硬件描述，这样操作系统就不
+需要对机器的细节进行硬编码。
+
+从结构上看，DT是一棵树，或者说是带有命名节点的无环图，节点可以有任意数量的命名
+属性来封装任意的数据。还存在一种机制，可以在自然的树状结构之外创建从一个节点到
+另一个节点的任意链接。
+
+从概念上讲，一套通用的使用惯例，称为 "bindings"（后文译为绑定），被定义为数据
+应该如何出现在树中，以描述典型的硬件特性，包括数据总线、中断线、GPIO连接和外围
+设备。
+
+尽可能使用现有的绑定来描述硬件，以最大限度地利用现有的支持代码，但由于属性和节
+点名称是简单的文本字符串，通过定义新的节点和属性来扩展现有的绑定或创建新的绑定
+很容易。然而，要警惕的是，在创建一个新的绑定之前，最好先对已经存在的东西做一些
+功课。目前有两种不同的、不兼容的i2c总线的绑定，这是因为在创建新的绑定时没有事先
+调查i2c设备在现有系统中是如何被枚举的。
+
+1. 历史
+-------
+DT最初是由Open Firmware创建的，作为将数据从Open Firmware传递给客户程序
+（如传递给操作系统）的通信方法的一部分。操作系统使用设备树在运行时探测硬件的拓
+扑结构，从而在没有硬编码信息的情况下支持大多数可用的硬件（假设所有设备的驱动程
+序都可用）。
+
+由于Open Firmware通常在PowerPC和SPARC平台上使用，长期以来，对这些架构的
+Linux支持一直使用设备树。
+
+2005年，当PowerPC Linux开始大规模清理并合并32位和64位支持时，决定在所有
+Powerpc平台上要求DT支持，无论它们是否使用Open Firmware。为了做到这一点，
+我们创建了一个叫做扁平化设备树（FDT）的DT表示法，它可以作为一个二进制的blob
+传递给内核，而不需要真正的Open Firmware实现。U-Boot、kexec和其他引导程序
+被修改，以支持传递设备树二进制（dtb）和在引导时修改dtb。DT也被添加到PowerPC
+引导包装器（arch/powerpc/boot/\*）中，这样dtb就可以被包裹在内核镜像中，以
+支持引导现有的非DT察觉的固件。
+
+一段时间后，FDT基础架构被普及到了所有的架构中。在写这篇文章的时候，6个主线架
+构（arm、microblaze、mips、powerpc、sparc和x86）和1个非主线架构（ios）
+有某种程度的DT支持。
+
+1. 数据模型
+-----------
+如果你还没有读过设备树用法\ [1]_页，那么现在就去读吧。没关系，我等着....
+
+2.1 高层次视角
+--------------
+最重要的是要明白，DT只是一个描述硬件的数据结构。它没有什么神奇之处，也不会神
+奇地让所有的硬件配置问题消失。它所做的是提供一种语言，将硬件配置与Linux内核
+（或任何其他操作系统）中的板卡和设备驱动支持解耦。使用它可以使板卡和设备支持
+变成数据驱动；根据传递到内核的数据做出设置决定，而不是根据每台机器的硬编码选
+择。
+
+理想情况下，数据驱动的平台设置应该导致更少的代码重复，并使其更容易用一个内核
+镜像支持各种硬件。
+
+Linux使用DT数据有三个主要目的:
+
+1) 平台识别。
+2) 运行时配置，以及
+3) 设备数量。
+
+2.2 平台识别
+------------
+首先，内核将使用DT中的数据来识别特定的机器。在一个理想的世界里，具体的平台对
+内核来说并不重要，因为所有的平台细节都会被设备树以一致和可靠的方式完美描述。
+但是，硬件并不完美，所以内核必须在早期启动时识别机器，以便有机会运行特定于机
+器的修复程序。
+
+在大多数情况下，机器的身份是不相关的，而内核将根据机器的核心CPU或SoC来选择
+设置代码。例如，在ARM上，arch/arm/kernel/setup.c中的setup_arch()将调
+用arch/arm/kernel/devtree.c中的setup_machine_fdt()，它通过
+machine_desc表搜索并选择与设备树数据最匹配的machine_desc。它通过查看根
+设备树节点中的'compatible'属性，并将其与struct machine_desc中的
+dt_compat列表（如果你好奇，该列表定义在arch/arm/include/asm/mach/arch.h
+中）进行比较，从而确定最佳匹配。
+
+“compatible” 属性包含一个排序的字符串列表，以机器的确切名称开始，后面是
+一个可选的与之兼容的板子列表，从最兼容到最不兼容排序。例如，TI BeagleBoard
+和它的后继者BeagleBoard xM板的根兼容属性可能看起来分别为::
+
+	compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+	compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+其中 "ti,map3-beagleboard-xm "指定了确切的型号，它还声称它与OMAP 3450 SoC
+以及一般的OMP3系列SoC兼容。你会注意到，该列表从最具体的（确切的板子）到最
+不具体的（SoC系列）进行排序。
+
+聪明的读者可能会指出，Beagle xM也可以声称与原Beagle板兼容。然而，我们应
+该当心在板级上这样做，因为通常情况下，即使在同一产品系列中，每块板都有很高
+的变化，而且当一块板声称与另一块板兼容时，很难确定到底是什么意思。对于高层
+来说，最好是谨慎行事，不要声称一块板子与另一块板子兼容。值得注意的例外是，
+当一块板子是另一块板子的载体时，例如CPU模块连接到一个载体板上。
+
+关于兼容值还有一个注意事项。在兼容属性中使用的任何字符串都必须有文件说明它
+表示什么。在Documentation/devicetree/bindings中添加兼容字符串的文档。
+
+同样在ARM上，对于每个machine_desc，内核会查看是否有任何dt_compat列表条
+目出现在兼容属性中。如果有，那么该机器_desc就是驱动该机器的候选者。在搜索
+了整个machine_descs表之后，setup_machine_fdt()根据每个machine_desc
+在兼容属性中匹配的条目，返回 “最兼容” 的machine_desc。如果没有找到匹配
+的machine_desc，那么它将返回NULL。
+
+这个方案背后的原因是观察到，在大多数情况下，如果它们都使用相同的SoC或相同
+系列的SoC，一个机器_desc可以支持大量的电路板。然而，不可避免地会有一些例
+外情况，即特定的板子需要特殊的设置代码，这在一般情况下是没有用的。特殊情况
+可以通过在通用设置代码中明确检查有问题的板子来处理，但如果超过几个情况下，
+这样做很快就会变得很难看和/或无法维护。
+
+相反，兼容列表允许通用机器_desc通过在dt_compat列表中指定“不太兼容”的值
+来提供对广泛的通用板的支持。在上面的例子中，通用板支持可以声称与“ti,ompa3”
+或“ti,ompa3450”兼容。如果在最初的beagleboard上发现了一个bug，需要在
+早期启动时使用特殊的变通代码，那么可以添加一个新的machine_desc，实现变通，
+并且只在“ti,omap3-beagleboard”上匹配。
+
+PowerPC使用了一个稍微不同的方案，它从每个机器_desc中调用.probe()钩子，
+并使用第一个返回TRUE的钩子。然而，这种方法没有考虑到兼容列表的优先级，对于
+新的架构支持可能应该避免。
+
+2.3 运行时配置
+--------------
+在大多数情况下，DT是将数据从固件传递给内核的唯一方法，所以也被用来传递运行
+时和配置数据，如内核参数字符串和initrd镜像的位置。
+
+这些数据大部分都包含在/chosen节点中，当启动Linux时，它看起来就像这样::
+
+	chosen {
+		bootargs = "console=ttyS0,115200 loglevel=8";
+		initrd-start = <0xc8000000>;
+		initrd-end = <0xc8200000>;
+	};
+
+bootargs属性包含内核参数，initrd-\*属性定义initrd blob的地址和大小。注
+意initrd-end是initrd映像后的第一个地址，所以这与结构体资源的通常语义不一
+致。选择的节点也可以选择包含任意数量的额外属性，用于平台特定的配置数据。
+
+在早期启动过程中，架构设置代码通过不同的辅助回调函数多次调用
+of_scan_flat_dt()来解析设备树数据，然后进行分页设置。of_scan_flat_dt()
+代码扫描设备树，并使用辅助函数来提取早期启动期间所需的信息。通常情况下，
+early_init_dt_scan_chosen()辅助函数用于解析所选节点，包括内核参数，
+early_init_dt_scan_root()用于初始化DT地址空间模型，early_init_dt_scan_memory()
+用于确定可用RAM的大小和位置。
+
+在ARM上，函数setup_machine_fdt()负责在选择支持板子的正确machine_desc
+后，对设备树进行早期扫描。
+
+2.4 设备数量
+------------
+在电路板被识别后，在早期配置数据被解析后，内核初始化可以以正常方式进行。在
+这个过程中的某个时刻，unflatten_device_tree()被调用以将数据转换成更有
+效的运行时表示。这也是调用机器特定设置钩子的时候，比如ARM上的machine_desc
+.init_early()、.init_irq()和.init_machine()钩子。本节的其余部分使用
+了ARM实现的例子，但所有架构在使用DT时都会做几乎相同的事情。
+
+从名称上可以猜到，.init_early()用于在启动过程早期需要执行的任何机器特定设
+置，而.init_irq()则用于设置中断处理。使用DT并不会实质性地改变这两个函数的
+行为。如果提供了DT，那么.init_early()和.init_irq()都能调用任何一个DT查
+询函数（of_* in include/linux/of*.h），以获得关于平台的额外数据。
+
+DT上下文中最有趣的钩子是.init_machine()，它主要负责将平台的数据填充到
+Linux设备模型中。历史上，这在嵌入式平台上是通过在板卡support .c文件中定
+义一组静态时钟结构、platform_devices和其他数据，并在.init_machine()中
+大量注册来实现的。当使用DT时，就不用为每个平台的静态设备进行硬编码，可以通过
+解析DT获得设备列表，并动态分配设备结构体。
+
+最简单的情况是，.init_machine()只负责注册一个platform_devices。
+platform_device是Linux使用的一个概念，用于不能被硬件检测到的内存或I/O映
+射的设备，以及“复合”或 “虚拟”设备（后面会详细介绍）。虽然DT没有“平台设备”的
+术语，但平台设备大致对应于树根的设备节点和简单内存映射总线节点的子节点。
+
+现在是举例说明的好时机。下面是NVIDIA Tegra板的设备树的一部分::
+
+  /{
+	compatible = "nvidia,harmony", "nvidia,tegra20";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+
+	chosen { };
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x40000000>;
+	};
+
+	soc {
+		compatible = "nvidia,tegra20-soc", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		intc: interrupt-controller@50041000 {
+			compatible = "nvidia,tegra20-gic";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+		};
+
+		serial@70006300 {
+			compatible = "nvidia,tegra20-uart";
+			reg = <0x70006300 0x100>;
+			interrupts = <122>;
+		};
+
+		i2s1: i2s@70002800 {
+			compatible = "nvidia,tegra20-i2s";
+			reg = <0x70002800 0x100>;
+			interrupts = <77>;
+			codec = <&wm8903>;
+		};
+
+		i2c@7000c000 {
+			compatible = "nvidia,tegra20-i2c";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x7000c000 0x100>;
+			interrupts = <70>;
+
+			wm8903: codec@1a {
+				compatible = "wlf,wm8903";
+				reg = <0x1a>;
+				interrupts = <347>;
+			};
+		};
+	};
+
+	sound {
+		compatible = "nvidia,harmony-sound";
+		i2s-controller = <&i2s1>;
+		i2s-codec = <&wm8903>;
+	};
+  };
+
+在.init_machine()时，Tegra板支持代码将需要查看这个DT，并决定为哪些节点
+创建platform_devices。然而，看一下这个树，并不能立即看出每个节点代表什么
+类型的设备，甚至不能看出一个节点是否代表一个设备。/chosen、/aliases和
+/memory节点是信息节点，并不描述设备（尽管可以说内存可以被认为是一个设备）。
+/soc节点的子节点是内存映射的设备，但是codec@1a是一个i2c设备，而sound节
+点代表的不是一个设备，而是其他设备是如何连接在一起以创建音频子系统的。我知
+道每个设备是什么，因为我熟悉电路板的设计，但是内核怎么知道每个节点该怎么做？
+
+诀窍在于，内核从树的根部开始，寻找具有“兼容”属性的节点。首先，一般认为任何
+具有“兼容”属性的节点都代表某种设备；其次，可以认为树根的任何节点要么直接连
+接到处理器总线上，要么是无法用其他方式描述的杂项系统设备。对于这些节点中的
+每一个，Linux都会分配和注册一个platform_device，它又可能被绑定到一个
+platform_driver。
+
+为什么为这些节点使用platform_device是一个安全的假设？嗯，就Linux对设备
+的建模方式而言，几乎所有的总线类型都假定其设备是总线控制器的孩子。例如，每
+个i2c_client是i2c_master的一个子节点。每个spi_device都是SPI总线的一
+个子节点。类似的还有USB、PCI、MDIO等。同样的层次结构也出现在DT中，I2C设
+备节点只作为I2C总线节点的子节点出现。同理，SPI、MDIO、USB等等。唯一不需
+要特定类型的父设备的设备是platform_devices（和amba_devices，但后面会
+详细介绍），它们将愉快地运行在Linux/sys/devices树的底部。因此，如果一个
+DT节点位于树的根部，那么它真的可能最好注册为platform_device。
+
+Linux板支持代码调用of_platform_populate(NULL, NULL, NULL, NULL)来
+启动树根的设备发现。参数都是NULL，因为当从树的根部开始时，不需要提供一个起
+始节点（第一个NULL），一个父结构设备（最后一个NULL），而且我们没有使用匹配
+表（尚未）。对于只需要注册设备的板子，除了of_platform_populate()的调用，
+.init_machine()可以完全为空。
+
+在Tegra的例子中，这说明了/soc和/sound节点，但是SoC节点的子节点呢？它们
+不应该也被注册为平台设备吗？对于Linux DT支持，一般的行为是子设备在驱动
+.probe()时被父设备驱动注册。因此，一个i2c总线设备驱动程序将为每个子节点
+注册一个i2c_client，一个SPI总线驱动程序将注册其spi_device子节点，其他
+总线类型也是如此。根据该模型，可以编写一个与SoC节点绑定的驱动程序，并简单
+地为其每个子节点注册platform_device。板卡支持代码将分配和注册一个SoC设
+备，一个（理论上的）SoC设备驱动程序可以绑定到SoC设备，并在其.probe()钩
+中为/soc/interruptcontroller、/soc/serial、/soc/i2s和/soc/i2c注
+册platform_devices。很简单，对吗？
+
+实际上，事实证明，将一些platform_device的子设备注册为更多的platform_device
+是一种常见的模式，设备树支持代码反映了这一点，并使上述例子更简单。
+of_platform_populate()的第二个参数是一个of_device_id表，任何与该表
+中的条目相匹配的节点也将获得其子节点的注册。在Tegra的例子中，代码可以是
+这样的::
+
+  static void __init harmony_init_machine(void)
+  {
+	/* ... */
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+  }
+
+“simple-bus”在Devicetree规范中被定义为一个属性，意味着一个简单的内存映射
+的总线，所以of_platform_populate()代码可以被写成只是假设简单总线兼容的节
+点将总是被遍历。然而，我们把它作为一个参数传入，以便电路板支持代码可以随时覆
+盖默认行为。
+
+[需要添加关于添加i2c/spi/etc子设备的讨论] 。
+
+附录A：AMBA设备
+---------------
+
+ARM Primecell是连接到ARM AMBA总线的某种设备，它包括对硬件检测和电源管理
+的一些支持。在Linux中，amba_device和amba_bus_type结构体被用来表示
+Primecell设备。然而，棘手的一点是，AMBA总线上的所有设备并非都是Primecell，
+而且对于Linux来说，典型的情况是amba_device和platform_device实例都是同
+一总线段的同义词。
+
+当使用DT时，这给of_platform_populate()带来了问题，因为它必须决定是否将
+每个节点注册为platform_device或amba_device。不幸的是，这使设备创建模型
+变得有点复杂，但解决方案原来并不是太具有侵略性。如果一个节点与“arm,amba-primecell”
+兼容，那么of_platform_populate()将把它注册为amba_device而不是
+platform_device。
-- 
GitLab


From 09d4466d3f3c72e3c1d1277839e0f98283e31d1e Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Mon, 7 Mar 2022 16:42:22 +0800
Subject: [PATCH 1193/1586] docs/zh_CN: add devicetree of_unittest translation

Translate .../Devicetree/of_unittest.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/da75225df2ac7bee8de653d95b2a872510b34f7d.1646642188.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/devicetree/index.rst   |   3 +-
 .../zh_CN/devicetree/of_unittest.rst          | 189 ++++++++++++++++++
 2 files changed, 190 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/devicetree/of_unittest.rst

diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst
index 6d9ac4254a1cf..23d0b6fccd58f 100644
--- a/Documentation/translations/zh_CN/devicetree/index.rst
+++ b/Documentation/translations/zh_CN/devicetree/index.rst
@@ -23,11 +23,10 @@ Open Firmware 和 Devicetree
    :maxdepth: 1
 
    usage-model
+   of_unittest
 
 Todolist:
 
-
-*   of_unittest
 *   kernel-api
 
 Devicetree Overlays
diff --git a/Documentation/translations/zh_CN/devicetree/of_unittest.rst b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
new file mode 100644
index 0000000000000..abd94e771ef82
--- /dev/null
+++ b/Documentation/translations/zh_CN/devicetree/of_unittest.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/Devicetree/of_unittest.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+=================================
+Open Firmware Devicetree 单元测试
+=================================
+
+作者: Gaurav Minocha <gaurav.minocha.os@gmail.com>
+
+1. 概述
+=======
+
+本文档解释了执行 OF 单元测试所需的测试数据是如何动态地附加到实时树上的，与机器的架构无关。
+
+建议在继续读下去之前，先阅读以下文件。
+
+(1) Documentation/devicetree/usage-model.rst
+(2) http://www.devicetree.org/Device_Tree_Usage
+
+OF Selftest被设计用来测试提供给设备驱动开发者的接口（include/linux/of.h），以从未扁平
+化的设备树数据结构中获取设备信息等。这个接口被大多数设备驱动在各种使用情况下使用。
+
+
+2. 测试数据
+===========
+
+设备树源文件（drivers/of/unittest-data/testcases.dts）包含执行drivers/of/unittest.c
+中自动化单元测试所需的测试数据。目前，以下设备树源包含文件（.dtsi）被包含在testcases.dt中::
+
+    drivers/of/unittest-data/tests-interrupts.dtsi
+    drivers/of/unittest-data/tests-platform.dtsi
+    drivers/of/unittest-data/tests-phandle.dtsi
+    drivers/of/unittest-data/tests-match.dtsi
+
+当内核在启用OF_SELFTEST的情况下被构建时，那么下面的make规则::
+
+    $(obj)/%.dtb: $(src)/%.dts FORCE
+	    $(call if_changed_dep, dtc)
+
+用于将DT源文件（testcases.dts）编译成二进制blob（testcases.dtb），也被称为扁平化的DT。
+
+之后，使用以下规则将上述二进制blob包装成一个汇编文件（testcases.dtb.S）::
+
+    $(obj)/%.dtb.S: $(obj)/%.dtb
+	    $(call cmd, dt_S_dtb)
+
+汇编文件被编译成一个对象文件（testcases.dtb.o），并被链接到内核镜像中。
+
+
+2.1. 添加测试数据
+-----------------
+
+未扁平化的设备树结构体:
+
+未扁平化的设备树由连接的设备节点组成，其树状结构形式如下所述::
+
+    // following struct members are used to construct the tree
+    struct device_node {
+	...
+	struct  device_node *parent;
+	struct  device_node *child;
+	struct  device_node *sibling;
+	...
+    };
+
+图1描述了一个机器的未扁平化设备树的通用结构，只考虑了子节点和同级指针。存在另一个指针，
+``*parent`` ，用于反向遍历该树。因此，在一个特定的层次上，子节点和所有的兄弟姐妹节点将
+有一个指向共同节点的父指针（例如，child1、sibling2、sibling3、sibling4的父指针指向
+根节点）::
+
+    root ('/')
+    |
+    child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |         |           |           |
+    |         |           |          null
+    |         |           |
+    |         |        child31 -> sibling32 -> null
+    |         |           |          |
+    |         |          null       null
+    |         |
+    |      child21 -> sibling22 -> sibling23 -> null
+    |         |          |            |
+    |        null       null         null
+    |
+    child11 -> sibling12 -> sibling13 -> sibling14 -> null
+    |           |           |            |
+    |           |           |           null
+    |           |           |
+    null        null       child131 -> null
+			    |
+			    null
+
+Figure 1: 未扁平化的设备树的通用结构
+
+
+在执行OF单元测试之前，需要将测试数据附加到机器的设备树上（如果存在）。因此，当调用
+selftest_data_add()时，首先会读取通过以下内核符号链接到内核镜像中的扁平化设备树
+数据::
+
+    __dtb_testcases_begin - address marking the start of test data blob
+    __dtb_testcases_end   - address marking the end of test data blob
+
+其次，它调用of_fdt_unflatten_tree()来解除扁平化的blob。最后，如果机器的设备树
+（即实时树）是存在的，那么它将未扁平化的测试数据树附加到实时树上，否则它将自己作为
+实时设备树附加。
+
+attach_node_and_children()使用of_attach_node()将节点附加到实时树上，如下所
+述。为了解释这一点，图2中描述的测试数据树被附加到图1中描述的实时树上::
+
+    root ('/')
+	|
+    testcase-data
+	|
+    test-child0 -> test-sibling1 -> test-sibling2 -> test-sibling3 -> null
+	|               |                |                |
+    test-child01      null             null             null
+
+
+Figure 2: 将测试数据树附在实时树上的例子。
+
+根据上面的方案，实时树已经存在，所以不需要附加根('/')节点。所有其他节点都是通过在
+每个节点上调用of_attach_node()来附加的。
+
+在函数of_attach_node()中，新的节点被附在实时树中给定的父节点的子节点上。但是，如
+果父节点已经有了一个孩子，那么新节点就会取代当前的孩子，并将其变成其兄弟姐妹。因此，
+当测试案例的数据节点被连接到上面的实时树（图1）时，最终的结构如图3所示::
+
+    root ('/')
+    |
+    testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |               |          |           |           |
+    (...)             |          |           |          null
+		    |          |         child31 -> sibling32 -> null
+		    |          |           |           |
+		    |          |          null        null
+		    |          |
+		    |        child21 -> sibling22 -> sibling23 -> null
+		    |          |           |            |
+		    |         null        null         null
+		    |
+		    child11 -> sibling12 -> sibling13 -> sibling14 -> null
+		    |          |            |            |
+		    null       null          |           null
+					    |
+					    child131 -> null
+					    |
+					    null
+    -----------------------------------------------------------------------
+
+    root ('/')
+    |
+    testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+    |               |          |           |           |
+    |             (...)      (...)       (...)        null
+    |
+    test-sibling3 -> test-sibling2 -> test-sibling1 -> test-child0 -> null
+    |                |                   |                |
+    null             null                null         test-child01
+
+
+Figure 3: 附加测试案例数据后的实时设备树结构。
+
+
+聪明的读者会注意到，与先前的结构相比，test-child0节点成为最后一个兄弟姐妹（图2）。
+在连接了第一个test-child0节点之后，又连接了test-sibling1节点，该节点推动子节点
+（即test-child0）成为兄弟姐妹，并使自己成为子节点，如上所述。
+
+如果发现一个重复的节点（即如果一个具有相同full_name属性的节点已经存在于实时树中），
+那么该节点不会被附加，而是通过调用函数update_node_properties()将其属性更新到活
+树的节点中。
+
+
+2.2. 删除测试数据
+-----------------
+
+一旦测试用例执行完，selftest_data_remove被调用，以移除最初连接的设备节点（首先是
+叶子节点被分离，然后向上移动父节点被移除，最后是整个树）。selftest_data_remove()
+调用detach_node_and_children()，使用of_detach_node()将节点从实时设备树上分离。
+
+为了分离一个节点，of_detach_node()要么将给定节点的父节点的子节点指针更新为其同级节
+点，要么根据情况将前一个同级节点附在给定节点的同级节点上。就这样吧。 :)
-- 
GitLab


From d29360ff239cd83aaf74897e3abfa7cd6dc2b06c Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Wed, 9 Mar 2022 23:36:59 +0800
Subject: [PATCH 1194/1586] docs/zh_CN: Add sched-stats Chinese translation

Translate scheduler/sched-stats.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20220309153659.24437-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/scheduler/index.rst    |   3 +-
 .../zh_CN/scheduler/sched-stats.rst           | 156 ++++++++++++++++++
 2 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/scheduler/sched-stats.rst

diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index ab79259802668..c020f83827854 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -25,14 +25,13 @@ Linux调度器
     sched-domains
     sched-capacity
     sched-energy
-
+    sched-stats
 
 TODOList:
 
     sched-deadline
     sched-nice-design
     sched-rt-group
-    sched-stats
 
     text_files
 
diff --git a/Documentation/translations/zh_CN/scheduler/sched-stats.rst b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
new file mode 100644
index 0000000000000..1c68c3d1c2834
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-stats.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-stats.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+==============
+调度器统计数据
+==============
+
+第15版schedstats去掉了sched_yield的一些计数器：yld_exp_empty，yld_act_empty
+和yld_both_empty。在其它方面和第14版完全相同。
+
+第14版schedstats包括对sched_domains（译注：调度域）的支持，该特性进入内核
+主线2.6.20，不过这一版schedstats与2.6.13-2.6.19内核的版本12的统计数据是完全
+相同的（内核未发布第13版）。有些计数器按每个运行队列统计是更有意义的，其它则
+按每个调度域统计是更有意义的。注意，调度域（以及它们的附属信息）仅在开启
+CONFIG_SMP的机器上是相关的和可用的。
+
+在第14版schedstat中，每个被列出的CPU至少会有一级域统计数据，且很可能有一个
+以上的域。在这个实现中，域没有特别的名字，但是编号最高的域通常在机器上所有的
+CPU上仲裁平衡，而domain0是最紧密聚焦的域，有时仅在一对CPU之间进行平衡。此时，
+没有任何体系结构需要3层以上的域。域统计数据中的第一个字段是一个位图，表明哪些
+CPU受该域的影响。
+
+这些字段是计数器，而且只能递增。使用这些字段的程序将需要从基线观测开始，然后在
+后续每一个观测中计算出计数器的变化。一个能以这种方式处理其中很多字段的perl脚本
+可见
+
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/
+
+请注意，任何这样的脚本都必须是特定于版本的，改变版本的主要原因是输出格式的变化。
+对于那些希望编写自己的脚本的人，可以参考这里描述的各个字段。
+
+CPU统计数据
+-----------
+cpu<N> 1 2 3 4 5 6 7 8 9
+
+第一个字段是sched_yield()的统计数据：
+
+     1) sched_yield()被调用了#次
+
+接下来的三个是schedule()的统计数据：
+
+     2) 这个字段是一个过时的数组过期计数，在O(1)调度器中使用。为了ABI兼容性，
+	我们保留了它，但它总是被设置为0。
+     3) schedule()被调用了#次
+     4) 调用schedule()导致处理器变为空闲了#次
+
+接下来的两个是try_to_wake_up()的统计数据：
+
+     5) try_to_wake_up()被调用了#次
+     6) 调用try_to_wake_up()导致本地CPU被唤醒了#次
+
+接下来的三个统计数据描述了调度延迟：
+
+     7) 本处理器运行任务的总时间，单位是jiffies
+     8) 本处理器任务等待运行的时间，单位是jiffies
+     9) 本CPU运行了#个时间片
+
+域统计数据
+----------
+
+对于每个被描述的CPU，和它相关的每一个调度域均会产生下面一行数据（注意，如果
+CONFIG_SMP没有被定义，那么*没有*调度域被使用，这些行不会出现在输出中）。
+
+domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+
+第一个字段是一个位掩码，表明该域在操作哪些CPU。
+
+接下来的24个字段是load_balance()函数的各个统计数据，按空闲类型分组（空闲，
+繁忙，新空闲）：
+
+
+    1)  当CPU空闲时，load_balance()在这个调度域中被调用了#次
+    2)  当CPU空闲时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    3)  当CPU空闲时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    4)  当CPU空闲时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    5)  当CPU空闲时，pull_task()在这个调度域中被调用#次
+    6)  当CPU空闲时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    7)  当CPU空闲时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    8)  当CPU空闲时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+    9)  当CPU繁忙时，load_balance()在这个调度域中被调用了#次
+    10) 当CPU繁忙时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    11) 当CPU繁忙时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    12) 当CPU繁忙时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    13) 当CPU繁忙时，pull_task()在这个调度域中被调用#次
+    14) 当CPU繁忙时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    15) 当CPU繁忙时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    16) 当CPU繁忙时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+    17) 当CPU新空闲时，load_balance()在这个调度域中被调用了#次
+    18) 当CPU新空闲时，load_balance()在这个调度域中被调用，但是发现负载无需
+        均衡#次
+    19) 当CPU新空闲时，load_balance()在这个调度域中被调用，试图迁移1个或更多
+        任务且失败了#次
+    20) 当CPU新空闲时，load_balance()在这个调度域中被调用，发现不均衡（如果有）
+        #次
+    21) 当CPU新空闲时，pull_task()在这个调度域中被调用#次
+    22) 当CPU新空闲时，尽管目标任务是热缓存状态，pull_task()依然被调用#次
+    23) 当CPU新空闲时，load_balance()在这个调度域中被调用，未能找到更繁忙的
+        队列#次
+    24) 当CPU新空闲时，在调度域中找到了更繁忙的队列，但未找到更繁忙的调度组
+        #次
+
+接下来的3个字段是active_load_balance()函数的各个统计数据：
+
+    25) active_load_balance()被调用了#次
+    26) active_load_balance()被调用，试图迁移1个或更多任务且失败了#次
+    27) active_load_balance()被调用，成功迁移了#次任务
+
+接下来的3个字段是sched_balance_exec()函数的各个统计数据：
+
+    28) sbe_cnt不再被使用
+    29) sbe_balanced不再被使用
+    30) sbe_pushed不再被使用
+
+接下来的3个字段是sched_balance_fork()函数的各个统计数据：
+
+    31) sbf_cnt不再被使用
+    32) sbf_balanced不再被使用
+    33) sbf_pushed不再被使用
+
+接下来的3个字段是try_to_wake_up()函数的各个统计数据：
+
+    34) 在这个调度域中调用try_to_wake_up()唤醒任务时，任务在调度域中一个
+        和上次运行不同的新CPU上运行了#次
+    35) 在这个调度域中调用try_to_wake_up()唤醒任务时，任务被迁移到发生唤醒
+        的CPU次数为#，因为该任务在原CPU是冷缓存状态
+    36) 在这个调度域中调用try_to_wake_up()唤醒任务时，引发被动负载均衡#次
+
+/proc/<pid>/schedstat
+---------------------
+schedstats还添加了一个新的/proc/<pid>/schedstat文件，来提供一些进程级的
+相同信息。这个文件中，有三个字段与该进程相关：
+
+     1) 在CPU上运行花费的时间
+     2) 在运行队列上等待的时间
+     3) 在CPU上运行了#个时间片
+
+可以很容易地编写一个程序，利用这些额外的字段来报告一个特定的进程或一组进程在
+调度器策略下的表现如何。这样的程序的一个简单版本可在下面的链接找到
+
+    http://eaglet.pdxhosts.com/rick/linux/schedstat/v12/latency.c
-- 
GitLab


From 2abc9c246e0548e52985b10440c9ea3e9f65f793 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 7 Feb 2022 21:24:47 -0800
Subject: [PATCH 1195/1586] KEYS: asymmetric: enforce that sig algo matches key
 algo

Most callers of public_key_verify_signature(), including most indirect
callers via verify_signature() as well as pkcs7_verify_sig_chain(),
don't check that public_key_signature::pkey_algo matches
public_key::pkey_algo.  These should always match.  However, a malicious
signature could intentionally declare an unintended algorithm.  It is
essential that such signatures be rejected outright, or that the
algorithm of the *key* be used -- not the algorithm of the signature as
that would allow attackers to choose the algorithm used.

Currently, public_key_verify_signature() correctly uses the key's
algorithm when deciding which akcipher to allocate.  That's good.
However, it uses the signature's algorithm when deciding whether to do
the first step of SM2, which is incorrect.  Also, v4.19 and older
kernels used the signature's algorithm for the entire process.

Prevent such errors by making public_key_verify_signature() enforce that
the signature's algorithm (if given) matches the key's algorithm.

Also remove two checks of this done by callers, which are now redundant.

Cc: stable@vger.kernel.org
Tested-by: Stefan Berger <stefanb@linux.ibm.com>
Tested-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Vitaly Chikunov <vt@altlinux.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/pkcs7_verify.c    |  6 ------
 crypto/asymmetric_keys/public_key.c      | 15 +++++++++++++++
 crypto/asymmetric_keys/x509_public_key.c |  6 ------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index d37b187faf9ae..f6321c785714c 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7,
 		pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
 			 sinfo->index, certix);
 
-		if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
-			pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
-				sinfo->index);
-			continue;
-		}
-
 		sinfo->signer = x509;
 		return 0;
 	}
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 4fefb219bfdc8..e36213945686f 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -325,6 +325,21 @@ int public_key_verify_signature(const struct public_key *pkey,
 	BUG_ON(!sig);
 	BUG_ON(!sig->s);
 
+	/*
+	 * If the signature specifies a public key algorithm, it *must* match
+	 * the key's actual public key algorithm.
+	 *
+	 * Small exception: ECDSA signatures don't specify the curve, but ECDSA
+	 * keys do.  So the strings can mismatch slightly in that case:
+	 * "ecdsa-nist-*" for the key, but "ecdsa" for the signature.
+	 */
+	if (sig->pkey_algo) {
+		if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 &&
+		    (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 ||
+		     strcmp(sig->pkey_algo, "ecdsa") != 0))
+			return -EKEYREJECTED;
+	}
+
 	ret = software_key_determine_akcipher(sig->encoding,
 					      sig->hash_algo,
 					      pkey, alg_name);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 8c77a297a82d4..91a4ad50dea26 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -116,12 +116,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
 			goto out;
 	}
 
-	ret = -EKEYREJECTED;
-	if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 &&
-	    (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 ||
-	     strcmp(cert->sig->pkey_algo, "ecdsa") != 0))
-		goto out;
-
 	ret = public_key_verify_signature(cert->pub, cert->sig);
 	if (ret < 0) {
 		if (ret == -ENOPKG) {
-- 
GitLab


From 590bfb57b2328951d5833979e7ca1d5fde2e609a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 7 Feb 2022 21:24:48 -0800
Subject: [PATCH 1196/1586] KEYS: asymmetric: properly validate hash_algo and
 encoding

It is insecure to allow arbitrary hash algorithms and signature
encodings to be used with arbitrary signature algorithms.  Notably,
ECDSA, ECRDSA, and SM2 all sign/verify raw hash values and don't
disambiguate between different hash algorithms like RSA PKCS#1 v1.5
padding does.  Therefore, they need to be restricted to certain sets of
hash algorithms (ideally just one, but in practice small sets are used).
Additionally, the encoding is an integral part of modern signature
algorithms, and is not supposed to vary.

Therefore, tighten the checks of hash_algo and encoding done by
software_key_determine_akcipher().

Also rearrange the parameters to software_key_determine_akcipher() to
put the public_key first, as this is the most important parameter and it
often determines everything else.

Fixes: 299f561a6693 ("x509: Add support for parsing x509 certs with ECDSA keys")
Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification")
Fixes: 0d7a78643f69 ("crypto: ecrdsa - add EC-RDSA (GOST 34.10) algorithm")
Cc: stable@vger.kernel.org
Tested-by: Stefan Berger <stefanb@linux.ibm.com>
Tested-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Vitaly Chikunov <vt@altlinux.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 crypto/asymmetric_keys/public_key.c | 111 +++++++++++++++++++---------
 1 file changed, 76 insertions(+), 35 deletions(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index e36213945686f..7c9e6be35c30c 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3)
 }
 
 /*
- * Determine the crypto algorithm name.
+ * Given a public_key, and an encoding and hash_algo to be used for signing
+ * and/or verification with that key, determine the name of the corresponding
+ * akcipher algorithm.  Also check that encoding and hash_algo are allowed.
  */
-static
-int software_key_determine_akcipher(const char *encoding,
-				    const char *hash_algo,
-				    const struct public_key *pkey,
-				    char alg_name[CRYPTO_MAX_ALG_NAME])
+static int
+software_key_determine_akcipher(const struct public_key *pkey,
+				const char *encoding, const char *hash_algo,
+				char alg_name[CRYPTO_MAX_ALG_NAME])
 {
 	int n;
 
-	if (strcmp(encoding, "pkcs1") == 0) {
-		/* The data wangled by the RSA algorithm is typically padded
-		 * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
-		 * sec 8.2].
+	if (!encoding)
+		return -EINVAL;
+
+	if (strcmp(pkey->pkey_algo, "rsa") == 0) {
+		/*
+		 * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2].
+		 */
+		if (strcmp(encoding, "pkcs1") == 0) {
+			if (!hash_algo)
+				n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+					     "pkcs1pad(%s)",
+					     pkey->pkey_algo);
+			else
+				n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+					     "pkcs1pad(%s,%s)",
+					     pkey->pkey_algo, hash_algo);
+			return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+		}
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		/*
+		 * Raw RSA cannot differentiate between different hash
+		 * algorithms.
+		 */
+		if (hash_algo)
+			return -EINVAL;
+	} else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) {
+		if (strcmp(encoding, "x962") != 0)
+			return -EINVAL;
+		/*
+		 * ECDSA signatures are taken over a raw hash, so they don't
+		 * differentiate between different hash algorithms.  That means
+		 * that the verifier should hard-code a specific hash algorithm.
+		 * Unfortunately, in practice ECDSA is used with multiple SHAs,
+		 * so we have to allow all of them and not just one.
 		 */
 		if (!hash_algo)
-			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
-				     "pkcs1pad(%s)",
-				     pkey->pkey_algo);
-		else
-			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
-				     "pkcs1pad(%s,%s)",
-				     pkey->pkey_algo, hash_algo);
-		return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
-	}
-
-	if (strcmp(encoding, "raw") == 0 ||
-	    strcmp(encoding, "x962") == 0) {
-		strcpy(alg_name, pkey->pkey_algo);
-		return 0;
+			return -EINVAL;
+		if (strcmp(hash_algo, "sha1") != 0 &&
+		    strcmp(hash_algo, "sha224") != 0 &&
+		    strcmp(hash_algo, "sha256") != 0 &&
+		    strcmp(hash_algo, "sha384") != 0 &&
+		    strcmp(hash_algo, "sha512") != 0)
+			return -EINVAL;
+	} else if (strcmp(pkey->pkey_algo, "sm2") == 0) {
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		if (!hash_algo)
+			return -EINVAL;
+		if (strcmp(hash_algo, "sm3") != 0)
+			return -EINVAL;
+	} else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) {
+		if (strcmp(encoding, "raw") != 0)
+			return -EINVAL;
+		if (!hash_algo)
+			return -EINVAL;
+		if (strcmp(hash_algo, "streebog256") != 0 &&
+		    strcmp(hash_algo, "streebog512") != 0)
+			return -EINVAL;
+	} else {
+		/* Unknown public key algorithm */
+		return -ENOPKG;
 	}
-
-	return -ENOPKG;
+	if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0)
+		return -EINVAL;
+	return 0;
 }
 
 static u8 *pkey_pack_u32(u8 *dst, u32 val)
@@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params,
 	u8 *key, *ptr;
 	int ret, len;
 
-	ret = software_key_determine_akcipher(params->encoding,
-					      params->hash_algo,
-					      pkey, alg_name);
+	ret = software_key_determine_akcipher(pkey, params->encoding,
+					      params->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
@@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
 
 	pr_devel("==>%s()\n", __func__);
 
-	ret = software_key_determine_akcipher(params->encoding,
-					      params->hash_algo,
-					      pkey, alg_name);
+	ret = software_key_determine_akcipher(pkey, params->encoding,
+					      params->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
@@ -340,9 +382,8 @@ int public_key_verify_signature(const struct public_key *pkey,
 			return -EKEYREJECTED;
 	}
 
-	ret = software_key_determine_akcipher(sig->encoding,
-					      sig->hash_algo,
-					      pkey, alg_name);
+	ret = software_key_determine_akcipher(pkey, sig->encoding,
+					      sig->hash_algo, alg_name);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From fb5abce6b2bb5cb3d628aaa63fa821da8c4600f9 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Mon, 7 Mar 2022 15:58:03 -0500
Subject: [PATCH 1197/1586] tpm: use try_get_ops() in tpm-space.c

As part of the series conversion to remove nested TPM operations:

https://lore.kernel.org/all/20190205224723.19671-1-jarkko.sakkinen@linux.intel.com/

exposure of the chip->tpm_mutex was removed from much of the upper
level code.  In this conversion, tpm2_del_space() was missed.  This
didn't matter much because it's usually called closely after a
converted operation, so there's only a very tiny race window where the
chip can be removed before the space flushing is done which causes a
NULL deref on the mutex.  However, there are reports of this window
being hit in practice, so fix this by converting tpm2_del_space() to
use tpm_try_get_ops(), which performs all the teardown checks before
acquring the mutex.

Cc: stable@vger.kernel.org # 5.4.x
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm2-space.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 265ec72b1d816..ffb35f0154c16 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size)
 
 void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space)
 {
-	mutex_lock(&chip->tpm_mutex);
-	if (!tpm_chip_start(chip)) {
+
+	if (tpm_try_get_ops(chip) == 0) {
 		tpm2_flush_sessions(chip, space);
-		tpm_chip_stop(chip);
+		tpm_put_ops(chip);
 	}
-	mutex_unlock(&chip->tpm_mutex);
+
 	kfree(space->context_buf);
 	kfree(space->session_buf);
 }
-- 
GitLab


From d13732cc0cc920ee58d45a38d8472769171aaa1d Mon Sep 17 00:00:00 2001
From: Jia Yang <jiayang5@huawei.com>
Date: Wed, 9 Mar 2022 19:09:35 +0800
Subject: [PATCH 1198/1586] f2fs: remove unnecessary read for
 F2FS_FITS_IN_INODE

F2FS_FITS_IN_INODE only cares the type of f2fs inode, so there
is no need to read node page of f2fs inode.

Signed-off-by: Jia Yang <jiayang5@huawei.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index cfdc41f87f5de..7049be29bc2ed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -812,7 +812,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 {
 	struct inode *inode = d_inode(path->dentry);
 	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct f2fs_inode *ri;
+	struct f2fs_inode *ri = NULL;
 	unsigned int flags;
 
 	if (f2fs_has_extra_attr(inode) &&
@@ -2999,7 +2999,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
 {
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct page *ipage;
+	struct f2fs_inode *ri = NULL;
 	kprojid_t kprojid;
 	int err;
 
@@ -3023,17 +3023,8 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
 	if (IS_NOQUOTA(inode))
 		return err;
 
-	ipage = f2fs_get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage))
-		return PTR_ERR(ipage);
-
-	if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
-								i_projid)) {
-		err = -EOVERFLOW;
-		f2fs_put_page(ipage, 1);
-		return err;
-	}
-	f2fs_put_page(ipage, 1);
+	if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
+		return -EOVERFLOW;
 
 	err = f2fs_dquot_initialize(inode);
 	if (err)
-- 
GitLab


From 9a564bccb78a76740ea9d75a259942df8143d02c Mon Sep 17 00:00:00 2001
From: Haimin Zhang <tcs_kernel@tencent.com>
Date: Tue, 8 Mar 2022 11:20:28 +0800
Subject: [PATCH 1199/1586] af_key: add __GFP_ZERO flag for
 compose_sadb_supported in function pfkey_register

Add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
to initialize the buffer of supp_skb to fix a kernel-info-leak issue.
1) Function pfkey_register calls compose_sadb_supported to request
a sk_buff. 2) compose_sadb_supported calls alloc_sbk to allocate
a sk_buff, but it doesn't zero it. 3) If auth_len is greater 0, then
compose_sadb_supported treats the memory as a struct sadb_supported and
begins to initialize. But it just initializes the field sadb_supported_len
and field sadb_supported_exttype without field sadb_supported_reserved.

Reported-by: TCS Robot <tcs_robot@tencent.com>
Signed-off-by: Haimin Zhang <tcs_kernel@tencent.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9bf52a09b5ff3..fd51db3be91c4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
 
 	xfrm_probe_algs();
 
-	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
 	if (!supp_skb) {
 		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
 			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
-- 
GitLab


From dc29812dbc873ae00bf19a4b8661984d7cce7a2e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 10 Mar 2022 08:34:58 +0000
Subject: [PATCH 1200/1586] irqchip/apple-aic: Fix cpumask allocation for FIQs

An emparassing typo: allocating a pointer instead of the object
pointed to. No harm done, as the pointer is large enough for
what we are using the object for, but still...

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220310050238.4478-1-guozhengkui@vivo.com
---
 drivers/irqchip/irq-apple-aic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index b40199c6625e3..3f1d2f3ccb7fa 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -810,7 +810,7 @@ static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
 	if (WARN_ON(n < 0))
 		return;
 
-	ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL);
+	ic->fiq_aff[fiq] = kzalloc(sizeof(*ic->fiq_aff[fiq]), GFP_KERNEL);
 	if (!ic->fiq_aff[fiq])
 		return;
 
-- 
GitLab


From 89eb1681b73ff584c5678f7185a2d8e3070e9b78 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:17 +0900
Subject: [PATCH 1201/1586] PCI: apple: Change MSI handling to handle 4-cell
 AIC fwspec form

AIC2 changes the IRQ fwspec to add a cell. Always use the second-to-last
cell for the MSI handling, so it will work for both AIC1 and AIC2 devices.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-2-marcan@marcan.st
---
 drivers/pci/controller/pcie-apple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 854d951631129..a2c3c207a04b7 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -219,7 +219,7 @@ static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
 	if (hwirq < 0)
 		return -ENOSPC;
 
-	fwspec.param[1] += hwirq;
+	fwspec.param[fwspec.param_count - 2] += hwirq;
 
 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec);
 	if (ret)
-- 
GitLab


From 9986c7650eb3d3e9f00def7bc10e9328a047112e Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Wed, 9 Mar 2022 13:59:43 -0800
Subject: [PATCH 1202/1586] docs: sysfs-devices-system-cpu: document "asymm"
 value for mte_tcf_preferred

It was added in commit 766121ba5de3 ("arm64/mte: Add userspace interface
for enabling asymmetric mode").

Signed-off-by: Evgenii Stepanov <eugenis@google.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220309215943.87831-1-eugenis@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 61f5676a7429a..2281d5eff4033 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -662,6 +662,7 @@ Description:	Preferred MTE tag checking mode
 
 		================  ==============================================
 		"sync"	  	  Prefer synchronous mode
+		"asymm"	  	  Prefer asymmetric mode
 		"async"	  	  Prefer asynchronous mode
 		================  ==============================================
 
-- 
GitLab


From 2cfdf0b4441aa918c3b18142740c92407b3c35a2 Mon Sep 17 00:00:00 2001
From: Yihao Han <hanyihao@vivo.com>
Date: Thu, 10 Mar 2022 01:48:06 -0800
Subject: [PATCH 1203/1586] spi: rockchip-sfc: fix platform_get_irq.cocci
 warning

Remove dev_err() messages after platform_get_irq*() failures.
platform_get_irq() already prints an error.

Generated by: scripts/coccinelle/api/platform_get_irq.cocci

Signed-off-by: Yihao Han <hanyihao@vivo.com>
Link: https://lore.kernel.org/r/20220310094806.13734-1-hanyihao@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip-sfc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index a46b385440273..bd87d3c92dd33 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -624,10 +624,8 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
 
 	/* Find the irq */
 	ret = platform_get_irq(pdev, 0);
-	if (ret < 0) {
-		dev_err(dev, "Failed to get the irq\n");
+	if (ret < 0)
 		goto err_irq;
-	}
 
 	ret = devm_request_irq(dev, ret, rockchip_sfc_irq_handler,
 			       0, pdev->name, sfc);
-- 
GitLab


From 95932ab2ea07b79cdb33121e2f40ccda9e6a73b5 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 10 Mar 2022 15:52:11 +0800
Subject: [PATCH 1204/1586] vhost: allow batching hint without size

Commit e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb
entries") tries to reject the IOTLB message whose size is zero. But
the size is not necessarily meaningful, one example is the batching
hint, so the commit breaks that.

Fixing this be reject zero size message only if the message is used to
update/invalidate the IOTLB.

Fixes: e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb entries")
Reported-by: Eli Cohen <elic@nvidia.com>
Cc: Anirudh Rayabharam <mail@anirudhrb.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20220310075211.4801-1-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Eli Cohen <elic@nvidia.com>
---
 drivers/vhost/vhost.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 082380c03a3e5..1768362115c6b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1170,7 +1170,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
 		goto done;
 	}
 
-	if (msg.size == 0) {
+	if ((msg.type == VHOST_IOTLB_UPDATE ||
+	     msg.type == VHOST_IOTLB_INVALIDATE) &&
+	     msg.size == 0) {
 		ret = -EINVAL;
 		goto done;
 	}
-- 
GitLab


From 2757be22c0f4c06d359f802fa1fc57286fa26975 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Fri, 4 Feb 2022 14:51:13 +0000
Subject: [PATCH 1205/1586] io_uring: remove trace for eventfd

The information on whether eventfd is registered is not very useful and
would result in the tracepoint being enclosed in an rcu_readlock in a
later patch that tries to avoid ring quiesce for registering eventfd.

Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Link: https://lore.kernel.org/r/20220204145117.1186568-2-usama.arif@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                   |  3 +--
 include/trace/events/io_uring.h | 13 +++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4715980e90150..8bddfacc3382a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -11179,8 +11179,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 	mutex_lock(&ctx->uring_lock);
 	ret = __io_uring_register(ctx, opcode, arg, nr_args);
 	mutex_unlock(&ctx->uring_lock);
-	trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
-							ctx->cq_ev_fd != NULL, ret);
+	trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
 out_fput:
 	fdput(f);
 	return ret;
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 7346f0164cf49..098beda7601a1 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -57,10 +57,9 @@ TRACE_EVENT(io_uring_create,
  * @opcode:		describes which operation to perform
  * @nr_user_files:	number of registered files
  * @nr_user_bufs:	number of registered buffers
- * @cq_ev_fd:		whether eventfs registered or not
  * @ret:		return code
  *
- * Allows to trace fixed files/buffers/eventfds, that could be registered to
+ * Allows to trace fixed files/buffers, that could be registered to
  * avoid an overhead of getting references to them for every operation. This
  * event, together with io_uring_file_get, can provide a full picture of how
  * much overhead one can reduce via fixing.
@@ -68,16 +67,15 @@ TRACE_EVENT(io_uring_create,
 TRACE_EVENT(io_uring_register,
 
 	TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files,
-			 unsigned nr_bufs, bool eventfd, long ret),
+			 unsigned nr_bufs, long ret),
 
-	TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret),
+	TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx			)
 		__field(  unsigned,	opcode		)
 		__field(  unsigned,	nr_files	)
 		__field(  unsigned,	nr_bufs		)
-		__field(  bool,		eventfd		)
 		__field(  long,		ret			)
 	),
 
@@ -86,14 +84,13 @@ TRACE_EVENT(io_uring_register,
 		__entry->opcode		= opcode;
 		__entry->nr_files	= nr_files;
 		__entry->nr_bufs	= nr_bufs;
-		__entry->eventfd	= eventfd;
 		__entry->ret		= ret;
 	),
 
 	TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, "
-			  "eventfd %d, ret %ld",
+			  "ret %ld",
 			  __entry->ctx, __entry->opcode, __entry->nr_files,
-			  __entry->nr_bufs, __entry->eventfd, __entry->ret)
+			  __entry->nr_bufs, __entry->ret)
 );
 
 /**
-- 
GitLab


From 77bc59b498174ea4f120d477d2cd0cf90fc58235 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Fri, 4 Feb 2022 14:51:14 +0000
Subject: [PATCH 1206/1586] io_uring: avoid ring quiesce while
 registering/unregistering eventfd

This is done by creating a new RCU data structure (io_ev_fd) as part of
io_ring_ctx that holds the eventfd_ctx.

The function io_eventfd_signal is executed under rcu_read_lock with a
single rcu_dereference to io_ev_fd so that if another thread unregisters
the eventfd while io_eventfd_signal is still being executed, the
eventfd_signal for which io_eventfd_signal was called completes
successfully.

The process of registering/unregistering eventfd is already done under
uring_lock so multiple threads won't enter a race condition while
registering/unregistering eventfd.

With the above approach ring quiesce can be avoided which is much more
expensive then using RCU lock. On the system tested, io_uring_register
with IORING_REGISTER_EVENTFD takes less than 1ms with RCU lock, compared
to 15ms before with ring quiesce.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Link: https://lore.kernel.org/r/20220204145117.1186568-3-usama.arif@bytedance.com
[axboe: long line fixups]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 89 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 68 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8bddfacc3382a..6c35e88be4ea5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -326,6 +326,11 @@ struct io_submit_state {
 	struct blk_plug		plug;
 };
 
+struct io_ev_fd {
+	struct eventfd_ctx	*cq_ev_fd;
+	struct rcu_head		rcu;
+};
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -399,7 +404,7 @@ struct io_ring_ctx {
 	struct {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;
-		struct eventfd_ctx	*cq_ev_fd;
+		struct io_ev_fd	__rcu	*io_ev_fd;
 		struct wait_queue_head	cq_wait;
 		unsigned		cq_extra;
 		atomic_t		cq_timeouts;
@@ -1726,13 +1731,36 @@ static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
 	return &rings->cqes[tail & mask];
 }
 
-static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
+static void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
-	if (likely(!ctx->cq_ev_fd))
-		return false;
+	struct io_ev_fd *ev_fd;
+
+	/* Return quickly if ctx->io_ev_fd doesn't exist */
+	if (likely(!rcu_dereference_raw(ctx->io_ev_fd)))
+		return;
+
+	rcu_read_lock();
+	/*
+	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
+	 * and eventfd_signal
+	 */
+	ev_fd = rcu_dereference(ctx->io_ev_fd);
+
+	/*
+	 * Check again if ev_fd exists incase an io_eventfd_unregister call
+	 * completed between the NULL check of ctx->io_ev_fd at the start of
+	 * the function and rcu_read_lock.
+	 */
+	if (unlikely(!ev_fd))
+		goto out;
 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
-		return false;
-	return !ctx->eventfd_async || io_wq_current_is_worker();
+		goto out;
+
+	if (!ctx->eventfd_async || io_wq_current_is_worker())
+		eventfd_signal(ev_fd->cq_ev_fd, 1);
+
+out:
+	rcu_read_unlock();
 }
 
 /*
@@ -1751,8 +1779,7 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 	 */
 	if (wq_has_sleeper(&ctx->cq_wait))
 		wake_up_all(&ctx->cq_wait);
-	if (io_should_trigger_evfd(ctx))
-		eventfd_signal(ctx->cq_ev_fd, 1);
+	io_eventfd_signal(ctx);
 }
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
@@ -1764,8 +1791,7 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 		if (waitqueue_active(&ctx->cq_wait))
 			wake_up_all(&ctx->cq_wait);
 	}
-	if (io_should_trigger_evfd(ctx))
-		eventfd_signal(ctx->cq_ev_fd, 1);
+	io_eventfd_signal(ctx);
 }
 
 /* Returns true if there are no backlogged entries after the flush */
@@ -9361,31 +9387,50 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 
 static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
 {
+	struct io_ev_fd *ev_fd;
 	__s32 __user *fds = arg;
-	int fd;
+	int fd, ret;
 
-	if (ctx->cq_ev_fd)
+	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+					lockdep_is_held(&ctx->uring_lock));
+	if (ev_fd)
 		return -EBUSY;
 
 	if (copy_from_user(&fd, fds, sizeof(*fds)))
 		return -EFAULT;
 
-	ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
-	if (IS_ERR(ctx->cq_ev_fd)) {
-		int ret = PTR_ERR(ctx->cq_ev_fd);
+	ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
+	if (!ev_fd)
+		return -ENOMEM;
 
-		ctx->cq_ev_fd = NULL;
+	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
+	if (IS_ERR(ev_fd->cq_ev_fd)) {
+		ret = PTR_ERR(ev_fd->cq_ev_fd);
+		kfree(ev_fd);
 		return ret;
 	}
 
-	return 0;
+	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
+	return ret;
+}
+
+static void io_eventfd_put(struct rcu_head *rcu)
+{
+	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+	eventfd_ctx_put(ev_fd->cq_ev_fd);
+	kfree(ev_fd);
 }
 
 static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 {
-	if (ctx->cq_ev_fd) {
-		eventfd_ctx_put(ctx->cq_ev_fd);
-		ctx->cq_ev_fd = NULL;
+	struct io_ev_fd *ev_fd;
+
+	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
+					lockdep_is_held(&ctx->uring_lock));
+	if (ev_fd) {
+		rcu_assign_pointer(ctx->io_ev_fd, NULL);
+		call_rcu(&ev_fd->rcu, io_eventfd_put);
 		return 0;
 	}
 
@@ -9450,8 +9495,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		__io_sqe_files_unregister(ctx);
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
-	mutex_unlock(&ctx->uring_lock);
 	io_eventfd_unregister(ctx);
+	mutex_unlock(&ctx->uring_lock);
 	io_destroy_buffers(ctx);
 	if (ctx->sq_creds)
 		put_cred(ctx->sq_creds);
@@ -10968,6 +11013,8 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_FILES:
 	case IORING_UNREGISTER_FILES:
 	case IORING_REGISTER_FILES_UPDATE:
+	case IORING_REGISTER_EVENTFD:
+	case IORING_UNREGISTER_EVENTFD:
 	case IORING_REGISTER_PROBE:
 	case IORING_REGISTER_PERSONALITY:
 	case IORING_UNREGISTER_PERSONALITY:
-- 
GitLab


From c75312dd592b31427caa88170b61fdc3ae5b2891 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Fri, 4 Feb 2022 14:51:15 +0000
Subject: [PATCH 1207/1586] io_uring: avoid ring quiesce while registering
 async eventfd

This is done using the RCU data structure (io_ev_fd). eventfd_async is
moved from io_ring_ctx to io_ev_fd which is RCU protected hence avoiding
ring quiesce which is much more expensive than an RCU lock. The place
where eventfd_async is read is already under rcu_read_lock so there is no
extra RCU read-side critical section needed.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Link: https://lore.kernel.org/r/20220204145117.1186568-4-usama.arif@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6c35e88be4ea5..961866b24b4f2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -328,6 +328,7 @@ struct io_submit_state {
 
 struct io_ev_fd {
 	struct eventfd_ctx	*cq_ev_fd;
+	unsigned int		eventfd_async: 1;
 	struct rcu_head		rcu;
 };
 
@@ -340,7 +341,6 @@ struct io_ring_ctx {
 		unsigned int		flags;
 		unsigned int		compat: 1;
 		unsigned int		drain_next: 1;
-		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
 		unsigned int		off_timeout_used: 1;
 		unsigned int		drain_active: 1;
@@ -1756,7 +1756,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
 		goto out;
 
-	if (!ctx->eventfd_async || io_wq_current_is_worker())
+	if (!ev_fd->eventfd_async || io_wq_current_is_worker())
 		eventfd_signal(ev_fd->cq_ev_fd, 1);
 
 out:
@@ -9385,7 +9385,8 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 	return done ? done : err;
 }
 
-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
+static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
+			       unsigned int eventfd_async)
 {
 	struct io_ev_fd *ev_fd;
 	__s32 __user *fds = arg;
@@ -9409,6 +9410,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
 		kfree(ev_fd);
 		return ret;
 	}
+	ev_fd->eventfd_async = eventfd_async;
 
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
 	return ret;
@@ -11014,6 +11016,7 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_UNREGISTER_FILES:
 	case IORING_REGISTER_FILES_UPDATE:
 	case IORING_REGISTER_EVENTFD:
+	case IORING_REGISTER_EVENTFD_ASYNC:
 	case IORING_UNREGISTER_EVENTFD:
 	case IORING_REGISTER_PROBE:
 	case IORING_REGISTER_PERSONALITY:
@@ -11114,17 +11117,16 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_register_files_update(ctx, arg, nr_args);
 		break;
 	case IORING_REGISTER_EVENTFD:
-	case IORING_REGISTER_EVENTFD_ASYNC:
 		ret = -EINVAL;
 		if (nr_args != 1)
 			break;
-		ret = io_eventfd_register(ctx, arg);
-		if (ret)
+		ret = io_eventfd_register(ctx, arg, 0);
+		break;
+	case IORING_REGISTER_EVENTFD_ASYNC:
+		ret = -EINVAL;
+		if (nr_args != 1)
 			break;
-		if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
-			ctx->eventfd_async = 1;
-		else
-			ctx->eventfd_async = 0;
+		ret = io_eventfd_register(ctx, arg, 1);
 		break;
 	case IORING_UNREGISTER_EVENTFD:
 		ret = -EINVAL;
-- 
GitLab


From ff16cfcfdaafa3c4287be92c6944fb8ea6dc75d1 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Fri, 4 Feb 2022 14:51:16 +0000
Subject: [PATCH 1208/1586] io_uring: avoid ring quiesce while registering
 restrictions and enabling rings

IORING_SETUP_R_DISABLED prevents submitting requests and so there will be
no requests until IORING_REGISTER_ENABLE_RINGS is called. And
IORING_REGISTER_RESTRICTIONS works only before
IORING_REGISTER_ENABLE_RINGS is called. Hence ring quiesce is not needed
for these opcodes.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Link: https://lore.kernel.org/r/20220204145117.1186568-5-usama.arif@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 961866b24b4f2..6ff9f67e2a1ee 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -11021,6 +11021,8 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_PROBE:
 	case IORING_REGISTER_PERSONALITY:
 	case IORING_UNREGISTER_PERSONALITY:
+	case IORING_REGISTER_ENABLE_RINGS:
+	case IORING_REGISTER_RESTRICTIONS:
 	case IORING_REGISTER_FILES2:
 	case IORING_REGISTER_FILES_UPDATE2:
 	case IORING_REGISTER_BUFFERS2:
-- 
GitLab


From 8bb649ee1da321ec3a1bbec048a425c5ea18ea21 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Fri, 4 Feb 2022 14:51:17 +0000
Subject: [PATCH 1209/1586] io_uring: remove ring quiesce for io_uring_register

None of the opcodes in io_uring_register use ring quiesce anymore. Hence
io_register_op_must_quiesce always returns false and io_ctx_quiesce is
never called.

Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Link: https://lore.kernel.org/r/20220204145117.1186568-6-usama.arif@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 83 ---------------------------------------------------
 1 file changed, 83 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6ff9f67e2a1ee..49bcc5fc688db 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1292,18 +1292,6 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req)
 	return __io_put_kbuf(req);
 }
 
-static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
-{
-	bool got = percpu_ref_tryget(ref);
-
-	/* already at zero, wait for ->release() */
-	if (!got)
-		wait_for_completion(compl);
-	percpu_ref_resurrect(ref);
-	if (got)
-		percpu_ref_put(ref);
-}
-
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 			  bool cancel_all)
 	__must_hold(&req->ctx->timeout_lock)
@@ -11007,66 +10995,6 @@ err:
 	return ret;
 }
 
-static bool io_register_op_must_quiesce(int op)
-{
-	switch (op) {
-	case IORING_REGISTER_BUFFERS:
-	case IORING_UNREGISTER_BUFFERS:
-	case IORING_REGISTER_FILES:
-	case IORING_UNREGISTER_FILES:
-	case IORING_REGISTER_FILES_UPDATE:
-	case IORING_REGISTER_EVENTFD:
-	case IORING_REGISTER_EVENTFD_ASYNC:
-	case IORING_UNREGISTER_EVENTFD:
-	case IORING_REGISTER_PROBE:
-	case IORING_REGISTER_PERSONALITY:
-	case IORING_UNREGISTER_PERSONALITY:
-	case IORING_REGISTER_ENABLE_RINGS:
-	case IORING_REGISTER_RESTRICTIONS:
-	case IORING_REGISTER_FILES2:
-	case IORING_REGISTER_FILES_UPDATE2:
-	case IORING_REGISTER_BUFFERS2:
-	case IORING_REGISTER_BUFFERS_UPDATE:
-	case IORING_REGISTER_IOWQ_AFF:
-	case IORING_UNREGISTER_IOWQ_AFF:
-	case IORING_REGISTER_IOWQ_MAX_WORKERS:
-		return false;
-	default:
-		return true;
-	}
-}
-
-static __cold int io_ctx_quiesce(struct io_ring_ctx *ctx)
-{
-	long ret;
-
-	percpu_ref_kill(&ctx->refs);
-
-	/*
-	 * Drop uring mutex before waiting for references to exit. If another
-	 * thread is currently inside io_uring_enter() it might need to grab the
-	 * uring_lock to make progress. If we hold it here across the drain
-	 * wait, then we can deadlock. It's safe to drop the mutex here, since
-	 * no new references will come in after we've killed the percpu ref.
-	 */
-	mutex_unlock(&ctx->uring_lock);
-	do {
-		ret = wait_for_completion_interruptible_timeout(&ctx->ref_comp, HZ);
-		if (ret) {
-			ret = min(0L, ret);
-			break;
-		}
-
-		ret = io_run_task_work_sig();
-		io_req_caches_free(ctx);
-	} while (ret >= 0);
-	mutex_lock(&ctx->uring_lock);
-
-	if (ret)
-		io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
-	return ret;
-}
-
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -11090,12 +11018,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			return -EACCES;
 	}
 
-	if (io_register_op_must_quiesce(opcode)) {
-		ret = io_ctx_quiesce(ctx);
-		if (ret)
-			return ret;
-	}
-
 	switch (opcode) {
 	case IORING_REGISTER_BUFFERS:
 		ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
@@ -11200,11 +11122,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		break;
 	}
 
-	if (io_register_op_must_quiesce(opcode)) {
-		/* bring the ctx back to life */
-		percpu_ref_reinit(&ctx->refs);
-		reinit_completion(&ctx->ref_comp);
-	}
 	return ret;
 }
 
-- 
GitLab


From f0a4e62bb5343b3b7163bc851cfb4bebfefcc4e7 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 7 Feb 2022 09:24:11 -0700
Subject: [PATCH 1210/1586] io_uring: Fix use of uninitialized ret in
 io_eventfd_register()

Clang warns:

  fs/io_uring.c:9396:9: warning: variable 'ret' is uninitialized when used here [-Wuninitialized]
          return ret;
                 ^~~
  fs/io_uring.c:9373:13: note: initialize the variable 'ret' to silence this warning
          int fd, ret;
                     ^
                      = 0
  1 warning generated.

Just return 0 directly and reduce the scope of ret to the if statement,
as that is the only place that it is used, which is how the function was
before the fixes commit.

Fixes: 1a75fac9a0f9 ("io_uring: avoid ring quiesce while registering/unregistering eventfd")
Link: https://github.com/ClangBuiltLinux/linux/issues/1579
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20220207162410.1013466-1-nathan@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 49bcc5fc688db..1dce3f6e70311 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9378,7 +9378,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 {
 	struct io_ev_fd *ev_fd;
 	__s32 __user *fds = arg;
-	int fd, ret;
+	int fd;
 
 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 					lockdep_is_held(&ctx->uring_lock));
@@ -9394,14 +9394,14 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 
 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
 	if (IS_ERR(ev_fd->cq_ev_fd)) {
-		ret = PTR_ERR(ev_fd->cq_ev_fd);
+		int ret = PTR_ERR(ev_fd->cq_ev_fd);
 		kfree(ev_fd);
 		return ret;
 	}
 	ev_fd->eventfd_async = eventfd_async;
 
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
-	return ret;
+	return 0;
 }
 
 static void io_eventfd_put(struct rcu_head *rcu)
-- 
GitLab


From 42abc95f05bff5180ac40c7ba5726b73c1d5e2f4 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Sun, 6 Feb 2022 17:52:39 +0800
Subject: [PATCH 1211/1586] io-wq: decouple work_list protection from the big
 wqe->lock

wqe->lock is abused, it now protects acct->work_list, hash stuff,
nr_workers, wqe->free_list and so on. Lets first get the work_list out
of the wqe-lock mess by introduce a specific lock for work list. This
is the first step to solve the huge contension between work insertion
and work consumption.
good thing:
  - split locking for bound and unbound work list
  - reduce contension between work_list visit and (worker's)free_list.

For the hash stuff, since there won't be a work with same file in both
bound and unbound work list, thus they won't visit same hash entry. it
works well to use the new lock to protect hash stuff.

Results:
set max_unbound_worker = 4, test with echo-server:
nice -n -15 ./io_uring_echo_server -p 8081 -f -n 1000 -l 16
(-n connection, -l workload)
before this patch:
Samples: 2M of event 'cycles:ppp', Event count (approx.): 1239982111074
Overhead  Command          Shared Object         Symbol
  28.59%  iou-wrk-10021    [kernel.vmlinux]      [k] native_queued_spin_lock_slowpath
   8.89%  io_uring_echo_s  [kernel.vmlinux]      [k] native_queued_spin_lock_slowpath
   6.20%  iou-wrk-10021    [kernel.vmlinux]      [k] _raw_spin_lock
   2.45%  io_uring_echo_s  [kernel.vmlinux]      [k] io_prep_async_work
   2.36%  iou-wrk-10021    [kernel.vmlinux]      [k] _raw_spin_lock_irqsave
   2.29%  iou-wrk-10021    [kernel.vmlinux]      [k] io_worker_handle_work
   1.29%  io_uring_echo_s  [kernel.vmlinux]      [k] io_wqe_enqueue
   1.06%  iou-wrk-10021    [kernel.vmlinux]      [k] io_wqe_worker
   1.06%  io_uring_echo_s  [kernel.vmlinux]      [k] _raw_spin_lock
   1.03%  iou-wrk-10021    [kernel.vmlinux]      [k] __schedule
   0.99%  iou-wrk-10021    [kernel.vmlinux]      [k] tcp_sendmsg_locked

with this patch:
Samples: 1M of event 'cycles:ppp', Event count (approx.): 708446691943
Overhead  Command          Shared Object         Symbol
  16.86%  iou-wrk-10893    [kernel.vmlinux]      [k] native_queued_spin_lock_slowpat
   9.10%  iou-wrk-10893    [kernel.vmlinux]      [k] _raw_spin_lock
   4.53%  io_uring_echo_s  [kernel.vmlinux]      [k] native_queued_spin_lock_slowpat
   2.87%  iou-wrk-10893    [kernel.vmlinux]      [k] io_worker_handle_work
   2.57%  iou-wrk-10893    [kernel.vmlinux]      [k] _raw_spin_lock_irqsave
   2.56%  io_uring_echo_s  [kernel.vmlinux]      [k] io_prep_async_work
   1.82%  io_uring_echo_s  [kernel.vmlinux]      [k] _raw_spin_lock
   1.33%  iou-wrk-10893    [kernel.vmlinux]      [k] io_wqe_worker
   1.26%  io_uring_echo_s  [kernel.vmlinux]      [k] try_to_wake_up

spin_lock failure from 25.59% + 8.89% =  34.48% to 16.86% + 4.53% = 21.39%
TPS is similar, while cpu usage is from almost 400% to 350%

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220206095241.121485-2-haoxu@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 96 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 44 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index bb7f161bb19cd..9595616ccaa3c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -76,6 +76,7 @@ struct io_wqe_acct {
 	unsigned max_workers;
 	int index;
 	atomic_t nr_running;
+	raw_spinlock_t lock;
 	struct io_wq_work_list work_list;
 	unsigned long flags;
 };
@@ -224,12 +225,12 @@ static void io_worker_exit(struct io_worker *worker)
 	if (worker->flags & IO_WORKER_F_FREE)
 		hlist_nulls_del_rcu(&worker->nulls_node);
 	list_del_rcu(&worker->all_list);
-	preempt_disable();
+	raw_spin_unlock(&wqe->lock);
 	io_wqe_dec_running(worker);
 	worker->flags = 0;
+	preempt_disable();
 	current->flags &= ~PF_IO_WORKER;
 	preempt_enable();
-	raw_spin_unlock(&wqe->lock);
 
 	kfree_rcu(worker, rcu);
 	io_worker_ref_put(wqe->wq);
@@ -385,7 +386,6 @@ fail:
 }
 
 static void io_wqe_dec_running(struct io_worker *worker)
-	__must_hold(wqe->lock)
 {
 	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 	struct io_wqe *wqe = worker->wqe;
@@ -393,13 +393,18 @@ static void io_wqe_dec_running(struct io_worker *worker)
 	if (!(worker->flags & IO_WORKER_F_UP))
 		return;
 
-	if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
-		atomic_inc(&acct->nr_running);
-		atomic_inc(&wqe->wq->worker_refs);
+	if (!atomic_dec_and_test(&acct->nr_running))
+		return;
+	raw_spin_lock(&wqe->lock);
+	if (!io_acct_run_queue(acct)) {
 		raw_spin_unlock(&wqe->lock);
-		io_queue_worker_create(worker, acct, create_worker_cb);
-		raw_spin_lock(&wqe->lock);
+		return;
 	}
+
+	raw_spin_unlock(&wqe->lock);
+	atomic_inc(&acct->nr_running);
+	atomic_inc(&wqe->wq->worker_refs);
+	io_queue_worker_create(worker, acct, create_worker_cb);
 }
 
 /*
@@ -407,11 +412,12 @@ static void io_wqe_dec_running(struct io_worker *worker)
  * it's currently on the freelist
  */
 static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
-	__must_hold(wqe->lock)
 {
 	if (worker->flags & IO_WORKER_F_FREE) {
 		worker->flags &= ~IO_WORKER_F_FREE;
+		raw_spin_lock(&wqe->lock);
 		hlist_nulls_del_init_rcu(&worker->nulls_node);
+		raw_spin_unlock(&wqe->lock);
 	}
 }
 
@@ -456,7 +462,7 @@ static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 
 static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
 					   struct io_worker *worker)
-	__must_hold(wqe->lock)
+	__must_hold(acct->lock)
 {
 	struct io_wq_work_node *node, *prev;
 	struct io_wq_work *work, *tail;
@@ -498,9 +504,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
 		 * work being added and clearing the stalled bit.
 		 */
 		set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
-		raw_spin_unlock(&wqe->lock);
+		raw_spin_unlock(&acct->lock);
 		unstalled = io_wait_on_hash(wqe, stall_hash);
-		raw_spin_lock(&wqe->lock);
+		raw_spin_lock(&acct->lock);
 		if (unstalled) {
 			clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
 			if (wq_has_sleeper(&wqe->wq->hash->wait))
@@ -538,7 +544,7 @@ static void io_assign_current_work(struct io_worker *worker,
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
 
 static void io_worker_handle_work(struct io_worker *worker)
-	__releases(wqe->lock)
+	__releases(acct->lock)
 {
 	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 	struct io_wqe *wqe = worker->wqe;
@@ -556,6 +562,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 		 * clear the stalled flag.
 		 */
 		work = io_get_next_work(acct, worker);
+		raw_spin_unlock(&acct->lock);
 		if (work) {
 			__io_worker_busy(wqe, worker);
 
@@ -569,10 +576,9 @@ static void io_worker_handle_work(struct io_worker *worker)
 			raw_spin_lock(&worker->lock);
 			worker->next_work = work;
 			raw_spin_unlock(&worker->lock);
-		}
-		raw_spin_unlock(&wqe->lock);
-		if (!work)
+		} else {
 			break;
+		}
 		io_assign_current_work(worker, work);
 		__set_current_state(TASK_RUNNING);
 
@@ -609,7 +615,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 			}
 		} while (work);
 
-		raw_spin_lock(&wqe->lock);
+		raw_spin_lock(&acct->lock);
 	} while (1);
 }
 
@@ -634,11 +640,14 @@ static int io_wqe_worker(void *data)
 
 		set_current_state(TASK_INTERRUPTIBLE);
 loop:
-		raw_spin_lock(&wqe->lock);
+		raw_spin_lock(&acct->lock);
 		if (io_acct_run_queue(acct)) {
 			io_worker_handle_work(worker);
 			goto loop;
+		} else {
+			raw_spin_unlock(&acct->lock);
 		}
+		raw_spin_lock(&wqe->lock);
 		/* timed out, exit unless we're the last worker */
 		if (last_timeout && acct->nr_workers > 1) {
 			acct->nr_workers--;
@@ -663,7 +672,7 @@ loop:
 	}
 
 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
-		raw_spin_lock(&wqe->lock);
+		raw_spin_lock(&acct->lock);
 		io_worker_handle_work(worker);
 	}
 
@@ -705,10 +714,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 		return;
 
 	worker->flags &= ~IO_WORKER_F_RUNNING;
-
-	raw_spin_lock(&worker->wqe->lock);
 	io_wqe_dec_running(worker);
-	raw_spin_unlock(&worker->wqe->lock);
 }
 
 static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
@@ -778,10 +784,12 @@ static void create_worker_cont(struct callback_head *cb)
 				.cancel_all	= true,
 			};
 
+			raw_spin_unlock(&wqe->lock);
 			while (io_acct_cancel_pending_work(wqe, acct, &match))
-				raw_spin_lock(&wqe->lock);
+				;
+		} else {
+			raw_spin_unlock(&wqe->lock);
 		}
-		raw_spin_unlock(&wqe->lock);
 		io_worker_ref_put(wqe->wq);
 		kfree(worker);
 		return;
@@ -914,6 +922,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 {
 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+	struct io_cb_cancel_data match;
 	unsigned work_flags = work->flags;
 	bool do_create;
 
@@ -927,10 +936,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 		return;
 	}
 
-	raw_spin_lock(&wqe->lock);
+	raw_spin_lock(&acct->lock);
 	io_wqe_insert_work(wqe, work);
 	clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+	raw_spin_unlock(&acct->lock);
 
+	raw_spin_lock(&wqe->lock);
 	rcu_read_lock();
 	do_create = !io_wqe_activate_free_worker(wqe, acct);
 	rcu_read_unlock();
@@ -946,18 +957,18 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 			return;
 
 		raw_spin_lock(&wqe->lock);
-		/* fatal condition, failed to create the first worker */
-		if (!acct->nr_workers) {
-			struct io_cb_cancel_data match = {
-				.fn		= io_wq_work_match_item,
-				.data		= work,
-				.cancel_all	= false,
-			};
-
-			if (io_acct_cancel_pending_work(wqe, acct, &match))
-				raw_spin_lock(&wqe->lock);
+		if (acct->nr_workers) {
+			raw_spin_unlock(&wqe->lock);
+			return;
 		}
 		raw_spin_unlock(&wqe->lock);
+
+		/* fatal condition, failed to create the first worker */
+		match.fn		= io_wq_work_match_item,
+		match.data		= work,
+		match.cancel_all	= false,
+
+		io_acct_cancel_pending_work(wqe, acct, &match);
 	}
 }
 
@@ -1032,22 +1043,23 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
 static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
 					struct io_wqe_acct *acct,
 					struct io_cb_cancel_data *match)
-	__releases(wqe->lock)
 {
 	struct io_wq_work_node *node, *prev;
 	struct io_wq_work *work;
 
+	raw_spin_lock(&acct->lock);
 	wq_list_for_each(node, prev, &acct->work_list) {
 		work = container_of(node, struct io_wq_work, list);
 		if (!match->fn(work, match->data))
 			continue;
 		io_wqe_remove_pending(wqe, work, prev);
-		raw_spin_unlock(&wqe->lock);
+		raw_spin_unlock(&acct->lock);
 		io_run_cancel(work, wqe);
 		match->nr_pending++;
 		/* not safe to continue after unlock */
 		return true;
 	}
+	raw_spin_unlock(&acct->lock);
 
 	return false;
 }
@@ -1061,7 +1073,6 @@ retry:
 		struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
 
 		if (io_acct_cancel_pending_work(wqe, acct, match)) {
-			raw_spin_lock(&wqe->lock);
 			if (match->cancel_all)
 				goto retry;
 			break;
@@ -1103,13 +1114,11 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
 	for_each_node(node) {
 		struct io_wqe *wqe = wq->wqes[node];
 
-		raw_spin_lock(&wqe->lock);
 		io_wqe_cancel_pending_work(wqe, &match);
-		if (match.nr_pending && !match.cancel_all) {
-			raw_spin_unlock(&wqe->lock);
+		if (match.nr_pending && !match.cancel_all)
 			return IO_WQ_CANCEL_OK;
-		}
 
+		raw_spin_lock(&wqe->lock);
 		io_wqe_cancel_running_work(wqe, &match);
 		raw_spin_unlock(&wqe->lock);
 		if (match.nr_running && !match.cancel_all)
@@ -1190,6 +1199,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 			acct->index = i;
 			atomic_set(&acct->nr_running, 0);
 			INIT_WQ_LIST(&acct->work_list);
+			raw_spin_lock_init(&acct->lock);
 		}
 		wqe->wq = wq;
 		raw_spin_lock_init(&wqe->lock);
@@ -1282,9 +1292,7 @@ static void io_wq_destroy(struct io_wq *wq)
 			.fn		= io_wq_work_match_all,
 			.cancel_all	= true,
 		};
-		raw_spin_lock(&wqe->lock);
 		io_wqe_cancel_pending_work(wqe, &match);
-		raw_spin_unlock(&wqe->lock);
 		free_cpumask_var(wqe->cpu_mask);
 		kfree(wqe);
 	}
-- 
GitLab


From e13fb1fe1483f6cd6452f25b866ffadf5ee0eff6 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Sun, 6 Feb 2022 17:52:40 +0800
Subject: [PATCH 1212/1586] io-wq: reduce acct->lock crossing functions
 lock/unlock

reduce acct->lock lock and unlock in different functions to make the
code clearer.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220206095241.121485-3-haoxu@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 9595616ccaa3c..f7b7fa396fafc 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -239,10 +239,15 @@ static void io_worker_exit(struct io_worker *worker)
 
 static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
 {
+	bool ret = false;
+
+	raw_spin_lock(&acct->lock);
 	if (!wq_list_empty(&acct->work_list) &&
 	    !test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
-		return true;
-	return false;
+		ret = true;
+	raw_spin_unlock(&acct->lock);
+
+	return ret;
 }
 
 /*
@@ -395,13 +400,9 @@ static void io_wqe_dec_running(struct io_worker *worker)
 
 	if (!atomic_dec_and_test(&acct->nr_running))
 		return;
-	raw_spin_lock(&wqe->lock);
-	if (!io_acct_run_queue(acct)) {
-		raw_spin_unlock(&wqe->lock);
+	if (!io_acct_run_queue(acct))
 		return;
-	}
 
-	raw_spin_unlock(&wqe->lock);
 	atomic_inc(&acct->nr_running);
 	atomic_inc(&wqe->wq->worker_refs);
 	io_queue_worker_create(worker, acct, create_worker_cb);
@@ -544,7 +545,6 @@ static void io_assign_current_work(struct io_worker *worker,
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
 
 static void io_worker_handle_work(struct io_worker *worker)
-	__releases(acct->lock)
 {
 	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 	struct io_wqe *wqe = worker->wqe;
@@ -561,6 +561,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 		 * can't make progress, any work completion or insertion will
 		 * clear the stalled flag.
 		 */
+		raw_spin_lock(&acct->lock);
 		work = io_get_next_work(acct, worker);
 		raw_spin_unlock(&acct->lock);
 		if (work) {
@@ -614,8 +615,6 @@ static void io_worker_handle_work(struct io_worker *worker)
 					wake_up(&wq->hash->wait);
 			}
 		} while (work);
-
-		raw_spin_lock(&acct->lock);
 	} while (1);
 }
 
@@ -639,14 +638,9 @@ static int io_wqe_worker(void *data)
 		long ret;
 
 		set_current_state(TASK_INTERRUPTIBLE);
-loop:
-		raw_spin_lock(&acct->lock);
-		if (io_acct_run_queue(acct)) {
+		while (io_acct_run_queue(acct))
 			io_worker_handle_work(worker);
-			goto loop;
-		} else {
-			raw_spin_unlock(&acct->lock);
-		}
+
 		raw_spin_lock(&wqe->lock);
 		/* timed out, exit unless we're the last worker */
 		if (last_timeout && acct->nr_workers > 1) {
@@ -671,10 +665,8 @@ loop:
 		last_timeout = !ret;
 	}
 
-	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
-		raw_spin_lock(&acct->lock);
+	if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
 		io_worker_handle_work(worker);
-	}
 
 	audit_free(current);
 	io_worker_exit(worker);
-- 
GitLab


From 86127bb18aea7e553cfd0842bcd33a6dc80bfbc8 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Sun, 6 Feb 2022 17:52:41 +0800
Subject: [PATCH 1213/1586] io-wq: use IO_WQ_ACCT_NR rather than hardcoded
 number

It's better to use the defined enum stuff not the hardcoded number to
define array.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220206095241.121485-4-haoxu@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index f7b7fa396fafc..5b93fa67d3464 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -92,7 +92,7 @@ enum {
  */
 struct io_wqe {
 	raw_spinlock_t lock;
-	struct io_wqe_acct acct[2];
+	struct io_wqe_acct acct[IO_WQ_ACCT_NR];
 
 	int node;
 
@@ -1376,7 +1376,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
 	BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
 	BUILD_BUG_ON((int) IO_WQ_ACCT_NR      != 2);
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < IO_WQ_ACCT_NR; i++) {
 		if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
 			new_count[i] = task_rlimit(current, RLIMIT_NPROC);
 	}
-- 
GitLab


From d5ec1dfaf59bf1632d7f2114d209bf80bfbd907a Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Mon, 14 Feb 2022 10:04:29 -0800
Subject: [PATCH 1214/1586] io-uring: add __fill_cqe function

This introduces the __fill_cqe function. This is necessary
to correctly issue the io_uring_complete tracepoint.

Signed-off-by: Stefan Roesch <shr@fb.com>
Link: https://lore.kernel.org/r/20220214180430.70572-2-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1dce3f6e70311..b09c403534159 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1914,13 +1914,11 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	return true;
 }
 
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
+static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 				 s32 res, u32 cflags)
 {
 	struct io_uring_cqe *cqe;
 
-	trace_io_uring_complete(ctx, user_data, res, cflags);
-
 	/*
 	 * If we can't get a cq entry, userspace overflowed the
 	 * submission (by quite a lot). Increment the overflow count in
@@ -1936,17 +1934,24 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 	return io_cqring_event_overflow(ctx, user_data, res, cflags);
 }
 
+static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
+{
+	trace_io_uring_complete(req->ctx, req->user_data, res, cflags);
+	return __fill_cqe(req->ctx, req->user_data, res, cflags);
+}
+
 static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(req->ctx, req->user_data, res, cflags);
+		__io_fill_cqe(req, res, cflags);
 }
 
 static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 				     s32 res, u32 cflags)
 {
 	ctx->cq_extra++;
-	return __io_fill_cqe(ctx, user_data, res, cflags);
+	trace_io_uring_complete(ctx, user_data, res, cflags);
+	return __fill_cqe(ctx, user_data, res, cflags);
 }
 
 static void __io_req_complete_post(struct io_kiocb *req, s32 res,
@@ -1955,7 +1960,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
 	struct io_ring_ctx *ctx = req->ctx;
 
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(ctx, req->user_data, res, cflags);
+		__io_fill_cqe(req, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -2544,8 +2549,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 						    comp_list);
 
 			if (!(req->flags & REQ_F_CQE_SKIP))
-				__io_fill_cqe(ctx, req->user_data, req->result,
-					      req->cflags);
+				__io_fill_cqe(req, req->result, req->cflags);
 		}
 
 		io_commit_cqring(ctx);
@@ -2667,7 +2671,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (unlikely(req->flags & REQ_F_CQE_SKIP))
 			continue;
 
-		__io_fill_cqe(ctx, req->user_data, req->result, io_put_kbuf(req));
+		__io_fill_cqe(req, req->result, io_put_kbuf(req));
 		nr_events++;
 	}
 
-- 
GitLab


From 502c87d65564cbfd65b1621309bcd900390eca81 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Mon, 14 Feb 2022 10:04:30 -0800
Subject: [PATCH 1215/1586] io-uring: Make tracepoints consistent.

This makes the io-uring tracepoints consistent. Where it makes sense
the tracepoints start with the following four fields:
- context (ring)
- request
- user_data
- opcode.

Signed-off-by: Stefan Roesch <shr@fb.com>
Link: https://lore.kernel.org/r/20220214180430.70572-3-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                   |  24 +--
 include/trace/events/io_uring.h | 318 +++++++++++++++-----------------
 2 files changed, 166 insertions(+), 176 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b09c403534159..5ec24b0438f13 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1603,8 +1603,8 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
 	if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
 		req->work.flags |= IO_WQ_WORK_CANCEL;
 
-	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
-					&req->work, req->flags);
+	trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
+					&req->work, io_wq_is_hashed(&req->work));
 	io_wq_enqueue(tctx->io_wq, &req->work);
 	if (link)
 		io_queue_linked_timeout(link);
@@ -1936,7 +1936,7 @@ static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 
 static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 {
-	trace_io_uring_complete(req->ctx, req->user_data, res, cflags);
+	trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
 	return __fill_cqe(req->ctx, req->user_data, res, cflags);
 }
 
@@ -1950,7 +1950,7 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 				     s32 res, u32 cflags)
 {
 	ctx->cq_extra++;
-	trace_io_uring_complete(ctx, user_data, res, cflags);
+	trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
 	return __fill_cqe(ctx, user_data, res, cflags);
 }
 
@@ -2202,7 +2202,9 @@ static void io_fail_links(struct io_kiocb *req)
 		nxt = link->link;
 		link->link = NULL;
 
-		trace_io_uring_fail_link(req, link);
+		trace_io_uring_fail_link(req->ctx, req, req->user_data,
+					req->opcode, link);
+
 		if (!ignore_cqes) {
 			link->flags &= ~REQ_F_CQE_SKIP;
 			io_fill_cqe_req(link, res, 0);
@@ -5629,7 +5631,7 @@ static void __io_poll_execute(struct io_kiocb *req, int mask)
 	else
 		req->io_task_work.func = io_apoll_task_func;
 
-	trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
+	trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
 	io_req_task_work_add(req, false);
 }
 
@@ -5858,7 +5860,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 	if (ret || ipt.error)
 		return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
 
-	trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
+	trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
 				mask, apoll->poll.events);
 	return IO_APOLL_OK;
 }
@@ -6667,7 +6669,7 @@ fail:
 		goto queue;
 	}
 
-	trace_io_uring_defer(ctx, req, req->user_data);
+	trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
 	de->req = req;
 	de->seq = seq;
 	list_add_tail(&de->list, &ctx->defer_list);
@@ -7001,7 +7003,7 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
 {
 	struct file *file = fget(fd);
 
-	trace_io_uring_file_get(ctx, fd);
+	trace_io_uring_file_get(ctx, req, req->user_data, fd);
 
 	/* we don't allow fixed io_uring files */
 	if (file && unlikely(file->f_op == &io_uring_fops))
@@ -7299,7 +7301,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	ret = io_init_req(ctx, req, sqe);
 	if (unlikely(ret)) {
-		trace_io_uring_req_failed(sqe, ret);
+		trace_io_uring_req_failed(sqe, ctx, req, ret);
 
 		/* fail even hard links since we don't submit */
 		if (link->head) {
@@ -7326,7 +7328,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	}
 
 	/* don't need @sqe from now on */
-	trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
+	trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
 				  req->flags, true,
 				  ctx->flags & IORING_SETUP_SQPOLL);
 
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 098beda7601a1..18d4341c581c3 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -29,15 +29,15 @@ TRACE_EVENT(io_uring_create,
 	TP_ARGS(fd, ctx, sq_entries, cq_entries, flags),
 
 	TP_STRUCT__entry (
-		__field(  int,		fd			)
-		__field(  void *,	ctx			)
+		__field(  int,		fd		)
+		__field(  void *,	ctx		)
 		__field(  u32,		sq_entries	)
 		__field(  u32,		cq_entries	)
 		__field(  u32,		flags		)
 	),
 
 	TP_fast_assign(
-		__entry->fd			= fd;
+		__entry->fd		= fd;
 		__entry->ctx		= ctx;
 		__entry->sq_entries	= sq_entries;
 		__entry->cq_entries	= cq_entries;
@@ -72,11 +72,11 @@ TRACE_EVENT(io_uring_register,
 	TP_ARGS(ctx, opcode, nr_files, nr_bufs, ret),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx			)
-		__field(  unsigned,	opcode		)
-		__field(  unsigned,	nr_files	)
-		__field(  unsigned,	nr_bufs		)
-		__field(  long,		ret			)
+		__field(  void *,	ctx	)
+		__field(  unsigned,	opcode	)
+		__field(  unsigned,	nr_files)
+		__field(  unsigned,	nr_bufs	)
+		__field(  long,		ret	)
 	),
 
 	TP_fast_assign(
@@ -97,6 +97,8 @@ TRACE_EVENT(io_uring_register,
  * io_uring_file_get - called before getting references to an SQE file
  *
  * @ctx:	pointer to a ring context structure
+ * @req:	pointer to a submitted request
+ * @user_data:	user data associated with the request
  * @fd:		SQE file descriptor
  *
  * Allows to trace out how often an SQE file reference is obtained, which can
@@ -105,59 +107,71 @@ TRACE_EVENT(io_uring_register,
  */
 TRACE_EVENT(io_uring_file_get,
 
-	TP_PROTO(void *ctx, int fd),
+	TP_PROTO(void *ctx, void *req, unsigned long long user_data, int fd),
 
-	TP_ARGS(ctx, fd),
+	TP_ARGS(ctx, req, user_data, fd),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx	)
-		__field(  int,		fd	)
+		__field(  void *,	ctx		)
+		__field(  void *,	req		)
+		__field(  u64,		user_data	)
+		__field(  int,		fd		)
 	),
 
 	TP_fast_assign(
-		__entry->ctx	= ctx;
+		__entry->ctx		= ctx;
+		__entry->req		= req;
+		__entry->user_data	= user_data;
 		__entry->fd		= fd;
 	),
 
-	TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd)
+	TP_printk("ring %p, req %p, user_data %llu, fd %d",
+		__entry->ctx, __entry->req, __entry->user_data, __entry->fd)
 );
 
 /**
  * io_uring_queue_async_work - called before submitting a new async work
  *
  * @ctx:	pointer to a ring context structure
- * @hashed:	type of workqueue, hashed or normal
  * @req:	pointer to a submitted request
+ * @user_data:	user data associated with the request
+ * @opcode:	opcode of request
+ * @flags	request flags
  * @work:	pointer to a submitted io_wq_work
+ * @rw:		type of workqueue, hashed or normal
  *
  * Allows to trace asynchronous work submission.
  */
 TRACE_EVENT(io_uring_queue_async_work,
 
-	TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work,
-			 unsigned int flags),
+	TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
+		unsigned int flags, struct io_wq_work *work, int rw),
 
-	TP_ARGS(ctx, rw, req, work, flags),
+	TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
 
 	TP_STRUCT__entry (
-		__field(  void *,			ctx	)
-		__field(  int,				rw	)
-		__field(  void *,			req	)
-		__field(  struct io_wq_work *,		work	)
-		__field(  unsigned int,			flags	)
+		__field(  void *,			ctx		)
+		__field(  void *,			req		)
+		__field(  u64,				user_data	)
+		__field(  u8,				opcode		)
+		__field(  unsigned int,			flags		)
+		__field(  struct io_wq_work *,		work		)
+		__field(  int,				rw		)
 	),
 
 	TP_fast_assign(
-		__entry->ctx	= ctx;
-		__entry->rw	= rw;
-		__entry->req	= req;
-		__entry->work	= work;
-		__entry->flags	= flags;
+		__entry->ctx		= ctx;
+		__entry->req		= req;
+		__entry->user_data	= user_data;
+		__entry->flags		= flags;
+		__entry->opcode		= opcode;
+		__entry->work		= work;
+		__entry->rw		= rw;
 	),
 
-	TP_printk("ring %p, request %p, flags %d, %s queue, work %p",
-			  __entry->ctx, __entry->req, __entry->flags,
-			  __entry->rw ? "hashed" : "normal", __entry->work)
+	TP_printk("ring %p, request %p, user_data %llu, opcode %d, flags %d, %s queue, work %p",
+		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+		__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
 );
 
 /**
@@ -166,30 +180,33 @@ TRACE_EVENT(io_uring_queue_async_work,
  * @ctx:	pointer to a ring context structure
  * @req:	pointer to a deferred request
  * @user_data:	user data associated with the request
+ * @opcode:	opcode of request
  *
  * Allows to track deferred requests, to get an insight about what requests are
  * not started immediately.
  */
 TRACE_EVENT(io_uring_defer,
 
-	TP_PROTO(void *ctx, void *req, unsigned long long user_data),
+	TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode),
 
-	TP_ARGS(ctx, req, user_data),
+	TP_ARGS(ctx, req, user_data, opcode),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  void *,	req		)
-		__field(  unsigned long long, data	)
+		__field(  void *,		ctx	)
+		__field(  void *,		req	)
+		__field(  unsigned long long,	data	)
+		__field(  u8,			opcode	)
 	),
 
 	TP_fast_assign(
 		__entry->ctx	= ctx;
 		__entry->req	= req;
 		__entry->data	= user_data;
+		__entry->opcode	= opcode;
 	),
 
-	TP_printk("ring %p, request %p user_data %llu", __entry->ctx,
-			__entry->req, __entry->data)
+	TP_printk("ring %p, request %p, user_data %llu, opcode %d",
+		__entry->ctx, __entry->req, __entry->data, __entry->opcode)
 );
 
 /**
@@ -247,7 +264,7 @@ TRACE_EVENT(io_uring_cqring_wait,
 	),
 
 	TP_fast_assign(
-		__entry->ctx	= ctx;
+		__entry->ctx		= ctx;
 		__entry->min_events	= min_events;
 	),
 
@@ -257,7 +274,10 @@ TRACE_EVENT(io_uring_cqring_wait,
 /**
  * io_uring_fail_link - called before failing a linked request
  *
+ * @ctx:	pointer to a ring context structure
  * @req:	request, which links were cancelled
+ * @user_data:	user data associated with the request
+ * @opcode:	opcode of request
  * @link:	cancelled link
  *
  * Allows to track linked requests cancellation, to see not only that some work
@@ -265,27 +285,36 @@ TRACE_EVENT(io_uring_cqring_wait,
  */
 TRACE_EVENT(io_uring_fail_link,
 
-	TP_PROTO(void *req, void *link),
+	TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, void *link),
 
-	TP_ARGS(req, link),
+	TP_ARGS(ctx, req, user_data, opcode, link),
 
 	TP_STRUCT__entry (
-		__field(  void *,	req	)
-		__field(  void *,	link	)
+		__field(  void *,		ctx		)
+		__field(  void *,		req		)
+		__field(  unsigned long long,	user_data	)
+		__field(  u8,			opcode		)
+		__field(  void *,		link		)
 	),
 
 	TP_fast_assign(
-		__entry->req	= req;
-		__entry->link	= link;
+		__entry->ctx		= ctx;
+		__entry->req		= req;
+		__entry->user_data	= user_data;
+		__entry->opcode		= opcode;
+		__entry->link		= link;
 	),
 
-	TP_printk("request %p, link %p", __entry->req, __entry->link)
+	TP_printk("ring %p, request %p, user_data %llu, opcode %d, link %p",
+		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+		__entry->link)
 );
 
 /**
  * io_uring_complete - called when completing an SQE
  *
  * @ctx:		pointer to a ring context structure
+ * @req:		pointer to a submitted request
  * @user_data:		user data associated with the request
  * @res:		result of the request
  * @cflags:		completion flags
@@ -293,12 +322,13 @@ TRACE_EVENT(io_uring_fail_link,
  */
 TRACE_EVENT(io_uring_complete,
 
-	TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
+	TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
 
-	TP_ARGS(ctx, user_data, res, cflags),
+	TP_ARGS(ctx, req, user_data, res, cflags),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
+		__field(  void *,	req		)
 		__field(  u64,		user_data	)
 		__field(  int,		res		)
 		__field(  unsigned,	cflags		)
@@ -306,14 +336,16 @@ TRACE_EVENT(io_uring_complete,
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
+		__entry->req		= req;
 		__entry->user_data	= user_data;
 		__entry->res		= res;
 		__entry->cflags		= cflags;
 	),
 
-	TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
-			  __entry->ctx, (unsigned long long)__entry->user_data,
-			  __entry->res, __entry->cflags)
+	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags %x",
+		__entry->ctx, __entry->req,
+		(unsigned long long)__entry->user_data,
+		__entry->res, __entry->cflags)
 );
 
 /**
@@ -321,8 +353,8 @@ TRACE_EVENT(io_uring_complete,
  *
  * @ctx:		pointer to a ring context structure
  * @req:		pointer to a submitted request
- * @opcode:		opcode of request
  * @user_data:		user data associated with the request
+ * @opcode:		opcode of request
  * @flags		request flags
  * @force_nonblock:	whether a context blocking or not
  * @sq_thread:		true if sq_thread has submitted this SQE
@@ -332,34 +364,34 @@ TRACE_EVENT(io_uring_complete,
  */
 TRACE_EVENT(io_uring_submit_sqe,
 
-	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
+	TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, u32 flags,
 		 bool force_nonblock, bool sq_thread),
 
-	TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
+	TP_ARGS(ctx, req, user_data, opcode, flags, force_nonblock, sq_thread),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  void *,	req		)
-		__field(  u8,		opcode		)
-		__field(  u64,		user_data	)
-		__field(  u32,		flags		)
-		__field(  bool,		force_nonblock	)
-		__field(  bool,		sq_thread	)
+		__field(  void *,		ctx		)
+		__field(  void *,		req		)
+		__field(  unsigned long long,	user_data	)
+		__field(  u8,			opcode		)
+		__field(  u32,			flags		)
+		__field(  bool,			force_nonblock	)
+		__field(  bool,			sq_thread	)
 	),
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
 		__entry->req		= req;
-		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
+		__entry->opcode		= opcode;
 		__entry->flags		= flags;
 		__entry->force_nonblock	= force_nonblock;
 		__entry->sq_thread	= sq_thread;
 	),
 
-	TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
+	TP_printk("ring %p, req %p, user_data %llu, opcode %d, flags %u, "
 		  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
-		  __entry->opcode, (unsigned long long)__entry->user_data,
+		  __entry->user_data, __entry->opcode,
 		  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
 );
 
@@ -368,8 +400,8 @@ TRACE_EVENT(io_uring_submit_sqe,
  *
  * @ctx:		pointer to a ring context structure
  * @req:		pointer to the armed request
- * @opcode:		opcode of request
  * @user_data:		user data associated with the request
+ * @opcode:		opcode of request
  * @mask:		request poll events mask
  * @events:		registered events of interest
  *
@@ -378,155 +410,110 @@ TRACE_EVENT(io_uring_submit_sqe,
  */
 TRACE_EVENT(io_uring_poll_arm,
 
-	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
+	TP_PROTO(void *ctx, void *req, u64 user_data, u8 opcode,
 		 int mask, int events),
 
-	TP_ARGS(ctx, req, opcode, user_data, mask, events),
+	TP_ARGS(ctx, req, user_data, opcode, mask, events),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  void *,	req		)
-		__field(  u8,		opcode		)
-		__field(  u64,		user_data	)
-		__field(  int,		mask		)
-		__field(  int,		events		)
+		__field(  void *,		ctx		)
+		__field(  void *,		req		)
+		__field(  unsigned long long,	user_data	)
+		__field(  u8,			opcode		)
+		__field(  int,			mask		)
+		__field(  int,			events		)
 	),
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
 		__entry->req		= req;
-		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
+		__entry->opcode		= opcode;
 		__entry->mask		= mask;
 		__entry->events		= events;
 	),
 
-	TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
-		  __entry->ctx, __entry->req, __entry->opcode,
-		  (unsigned long long) __entry->user_data,
+	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask 0x%x, events 0x%x",
+		  __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		  __entry->mask, __entry->events)
 );
 
-TRACE_EVENT(io_uring_poll_wake,
-
-	TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
-
-	TP_ARGS(ctx, opcode, user_data, mask),
-
-	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  u8,		opcode		)
-		__field(  u64,		user_data	)
-		__field(  int,		mask		)
-	),
-
-	TP_fast_assign(
-		__entry->ctx		= ctx;
-		__entry->opcode		= opcode;
-		__entry->user_data	= user_data;
-		__entry->mask		= mask;
-	),
-
-	TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x",
-			  __entry->ctx, __entry->opcode,
-			  (unsigned long long) __entry->user_data,
-			  __entry->mask)
-);
-
-TRACE_EVENT(io_uring_task_add,
-
-	TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask),
-
-	TP_ARGS(ctx, opcode, user_data, mask),
-
-	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  u8,		opcode		)
-		__field(  u64,		user_data	)
-		__field(  int,		mask		)
-	),
-
-	TP_fast_assign(
-		__entry->ctx		= ctx;
-		__entry->opcode		= opcode;
-		__entry->user_data	= user_data;
-		__entry->mask		= mask;
-	),
-
-	TP_printk("ring %p, op %d, data 0x%llx, mask %x",
-			  __entry->ctx, __entry->opcode,
-			  (unsigned long long) __entry->user_data,
-			  __entry->mask)
-);
-
 /*
- * io_uring_task_run - called when task_work_run() executes the poll events
- *                     notification callbacks
+ * io_uring_task_add - called after adding a task
  *
  * @ctx:		pointer to a ring context structure
- * @req:		pointer to the armed request
- * @opcode:		opcode of request
+ * @req:		pointer to request
  * @user_data:		user data associated with the request
+ * @opcode:		opcode of request
+ * @mask:		request poll events mask
  *
- * Allows to track when notified poll events are processed
  */
-TRACE_EVENT(io_uring_task_run,
+TRACE_EVENT(io_uring_task_add,
 
-	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
+	TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, int mask),
 
-	TP_ARGS(ctx, req, opcode, user_data),
+	TP_ARGS(ctx, req, user_data, opcode, mask),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx		)
-		__field(  void *,	req		)
-		__field(  u8,		opcode		)
-		__field(  u64,		user_data	)
+		__field(  void *,		ctx		)
+		__field(  void *,		req		)
+		__field(  unsigned long long,	user_data	)
+		__field(  u8,			opcode		)
+		__field(  int,			mask		)
 	),
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
 		__entry->req		= req;
-		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
+		__entry->opcode		= opcode;
+		__entry->mask		= mask;
 	),
 
-	TP_printk("ring %p, req %p, op %d, data 0x%llx",
-		  __entry->ctx, __entry->req, __entry->opcode,
-		  (unsigned long long) __entry->user_data)
+	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask %x",
+		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+		__entry->mask)
 );
 
 /*
  * io_uring_req_failed - called when an sqe is errored dring submission
  *
  * @sqe:		pointer to the io_uring_sqe that failed
+ * @ctx:		pointer to a ring context structure
+ * @req:		pointer to request
  * @error:		error it failed with
  *
  * Allows easier diagnosing of malformed requests in production systems.
  */
 TRACE_EVENT(io_uring_req_failed,
 
-	TP_PROTO(const struct io_uring_sqe *sqe, int error),
+	TP_PROTO(const struct io_uring_sqe *sqe, void *ctx, void *req, int error),
 
-	TP_ARGS(sqe, error),
+	TP_ARGS(sqe, ctx, req, error),
 
 	TP_STRUCT__entry (
-		__field(  u8,	opcode )
-		__field(  u8,	flags )
-		__field(  u8,	ioprio )
-		__field( u64,	off )
-		__field( u64,	addr )
-		__field( u32,	len )
-		__field( u32,	op_flags )
-		__field( u64,	user_data )
-		__field( u16,	buf_index )
-		__field( u16,	personality )
-		__field( u32,	file_index )
-		__field( u64,	pad1 )
-		__field( u64,	pad2 )
-		__field( int,	error )
+		__field(  void *,		ctx		)
+		__field(  void *,		req		)
+		__field(  unsigned long long,	user_data	)
+		__field(  u8,			opcode		)
+		__field(  u8,			flags		)
+		__field(  u8,			ioprio		)
+		__field( u64,			off		)
+		__field( u64,			addr		)
+		__field( u32,			len		)
+		__field( u32,			op_flags	)
+		__field( u16,			buf_index	)
+		__field( u16,			personality	)
+		__field( u32,			file_index	)
+		__field( u64,			pad1		)
+		__field( u64,			pad2		)
+		__field( int,			error		)
 	),
 
 	TP_fast_assign(
+		__entry->ctx		= ctx;
+		__entry->req		= req;
+		__entry->user_data	= sqe->user_data;
 		__entry->opcode		= sqe->opcode;
 		__entry->flags		= sqe->flags;
 		__entry->ioprio		= sqe->ioprio;
@@ -534,7 +521,6 @@ TRACE_EVENT(io_uring_req_failed,
 		__entry->addr		= sqe->addr;
 		__entry->len		= sqe->len;
 		__entry->op_flags	= sqe->rw_flags;
-		__entry->user_data	= sqe->user_data;
 		__entry->buf_index	= sqe->buf_index;
 		__entry->personality	= sqe->personality;
 		__entry->file_index	= sqe->file_index;
@@ -543,13 +529,15 @@ TRACE_EVENT(io_uring_req_failed,
 		__entry->error		= error;
 	),
 
-	TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
-		  "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
+	TP_printk("ring %p, req %p, user_data %llu, "
+		"op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+		  "len=%u, rw_flags=0x%x, buf_index=%d, "
 		  "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+		  __entry->ctx, __entry->req, __entry->user_data,
 		  __entry->opcode, __entry->flags, __entry->ioprio,
 		  (unsigned long long)__entry->off,
 		  (unsigned long long) __entry->addr, __entry->len,
-		  __entry->op_flags, (unsigned long long) __entry->user_data,
+		  __entry->op_flags,
 		  __entry->buf_index, __entry->personality, __entry->file_index,
 		  (unsigned long long) __entry->pad1,
 		  (unsigned long long) __entry->pad2, __entry->error)
-- 
GitLab


From c5020bc8d9295ad4a3e09d858a28b26a25d4afab Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Wed, 16 Feb 2022 14:53:42 -0500
Subject: [PATCH 1216/1586] io_uring: Remove unneeded test in
 io_run_task_work_sig()

Avoid testing TIF_NOTIFY_SIGNAL twice by calling task_sigpending()
directly from io_run_task_work_sig()

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/bd7c0495f7656e803e5736708591bb665e6eaacd.1645041650.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ec24b0438f13..8d3454a243b43 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7704,11 +7704,11 @@ static int io_run_task_work_sig(void)
 {
 	if (io_run_task_work())
 		return 1;
-	if (!signal_pending(current))
-		return 0;
 	if (test_thread_flag(TIF_NOTIFY_SIGNAL))
 		return -ERESTARTSYS;
-	return -EINTR;
+	if (task_sigpending(current))
+		return -EINTR;
+	return 0;
 }
 
 /* when returns >0, the caller should retry */
-- 
GitLab


From af9c45ecebaf1b428306f41421f4bcffe439f735 Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Tue, 22 Feb 2022 02:55:01 -0800
Subject: [PATCH 1217/1586] io_uring: remove duplicated calls to io_kiocb_ppos

io_kiocb_ppos is called in both branches, and it seems that the compiler
does not fuse this. Fusing removes a few bytes from loop_rw_iter.

Before:
$ nm -S fs/io_uring.o | grep loop_rw_iter
0000000000002430 0000000000000124 t loop_rw_iter

After:
$ nm -S fs/io_uring.o | grep loop_rw_iter
0000000000002430 000000000000010d t loop_rw_iter

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8d3454a243b43..d9c0fd042f176 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3420,6 +3420,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	struct file *file = req->file;
 	ssize_t ret = 0;
+	loff_t *ppos;
 
 	/*
 	 * Don't support polled IO through this interface, and we can't
@@ -3432,6 +3433,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 	    !(kiocb->ki_filp->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
+	ppos = io_kiocb_ppos(kiocb);
+
 	while (iov_iter_count(iter)) {
 		struct iovec iovec;
 		ssize_t nr;
@@ -3445,10 +3448,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 
 		if (rw == READ) {
 			nr = file->f_op->read(file, iovec.iov_base,
-					      iovec.iov_len, io_kiocb_ppos(kiocb));
+					      iovec.iov_len, ppos);
 		} else {
 			nr = file->f_op->write(file, iovec.iov_base,
-					       iovec.iov_len, io_kiocb_ppos(kiocb));
+					       iovec.iov_len, ppos);
 		}
 
 		if (nr < 0) {
-- 
GitLab


From d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Tue, 22 Feb 2022 02:55:02 -0800
Subject: [PATCH 1218/1586] io_uring: update kiocb->ki_pos at execution time

Update kiocb->ki_pos at execution time rather than in io_prep_rw().
io_prep_rw() happens before the job is enqueued to a worker and so the
offset might be read multiple times before being executed once.

Ensures that the file position in a set of _linked_ SQEs will be only
obtained after earlier SQEs have completed, and so will include their
incremented file position.

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d9c0fd042f176..d0f3e084da16e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3020,14 +3020,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
-	if (kiocb->ki_pos == -1) {
-		if (!(file->f_mode & FMODE_STREAM)) {
-			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = file->f_pos;
-		} else {
-			kiocb->ki_pos = 0;
-		}
-	}
 	kiocb->ki_flags = iocb_flags(file);
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
 	if (unlikely(ret))
@@ -3094,6 +3086,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 	}
 }
 
+static inline void io_kiocb_update_pos(struct io_kiocb *req)
+{
+	struct kiocb *kiocb = &req->rw.kiocb;
+
+	if (kiocb->ki_pos == -1) {
+		if (!(req->file->f_mode & FMODE_STREAM)) {
+			req->flags |= REQ_F_CUR_POS;
+			kiocb->ki_pos = req->file->f_pos;
+		} else {
+			kiocb->ki_pos = 0;
+		}
+	}
+}
+
 static void kiocb_done(struct io_kiocb *req, ssize_t ret,
 		       unsigned int issue_flags)
 {
@@ -3682,6 +3688,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
+	io_kiocb_update_pos(req);
+
 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
 	if (unlikely(ret)) {
 		kfree(iovec);
@@ -3811,6 +3819,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
+	io_kiocb_update_pos(req);
+
 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
 	if (unlikely(ret))
 		goto out_free;
-- 
GitLab


From b4aec40015953b65f2f114641e7fd7714c8df8e6 Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Tue, 22 Feb 2022 02:55:03 -0800
Subject: [PATCH 1219/1586] io_uring: do not recalculate ppos unnecessarily

There is a slight optimisation to be had by calculating the correct pos
pointer inside io_kiocb_update_pos and then using that later.

It seems code size drops by a bit:
000000000000a1b0 0000000000000400 t io_read
000000000000a5b0 0000000000000319 t io_write

vs
000000000000a1b0 00000000000003f6 t io_read
000000000000a5b0 0000000000000310 t io_write

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d0f3e084da16e..c9efb1d32fd28 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3086,18 +3086,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 	}
 }
 
-static inline void io_kiocb_update_pos(struct io_kiocb *req)
+static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
+	bool is_stream = req->file->f_mode & FMODE_STREAM;
 
 	if (kiocb->ki_pos == -1) {
-		if (!(req->file->f_mode & FMODE_STREAM)) {
+		if (!is_stream) {
 			req->flags |= REQ_F_CUR_POS;
 			kiocb->ki_pos = req->file->f_pos;
+			return &kiocb->ki_pos;
 		} else {
 			kiocb->ki_pos = 0;
+			return NULL;
 		}
 	}
+	return is_stream ? NULL : &kiocb->ki_pos;
 }
 
 static void kiocb_done(struct io_kiocb *req, ssize_t ret,
@@ -3658,6 +3662,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 	struct io_async_rw *rw;
 	ssize_t ret, ret2;
+	loff_t *ppos;
 
 	if (!req_has_async_data(req)) {
 		ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
@@ -3688,9 +3693,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	io_kiocb_update_pos(req);
+	ppos = io_kiocb_update_pos(req);
 
-	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+	ret = rw_verify_area(READ, req->file, ppos, req->result);
 	if (unlikely(ret)) {
 		kfree(iovec);
 		return ret;
@@ -3789,6 +3794,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	struct kiocb *kiocb = &req->rw.kiocb;
 	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 	ssize_t ret, ret2;
+	loff_t *ppos;
 
 	if (!req_has_async_data(req)) {
 		ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags);
@@ -3819,9 +3825,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
 	}
 
-	io_kiocb_update_pos(req);
+	ppos = io_kiocb_update_pos(req);
 
-	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+	ret = rw_verify_area(WRITE, req->file, ppos, req->result);
 	if (unlikely(ret))
 		goto out_free;
 
-- 
GitLab


From 63c36549737e8132e89ec6563d26523895ae3121 Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Thu, 24 Feb 2022 02:51:57 -0800
Subject: [PATCH 1220/1586] io_uring: documentation fixup

Fix incorrect name reference in comment. ki_filp does not exist in the
struct, but file does.

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Link: https://lore.kernel.org/r/20220224105157.1332353-1-dylany@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c9efb1d32fd28..f8fd3b2bb30dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -830,7 +830,7 @@ enum {
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
  * access the file pointer through any of the sub-structs,
- * or directly as just 'ki_filp' in this struct.
+ * or directly as just 'file' in this struct.
  */
 struct io_kiocb {
 	union {
-- 
GitLab


From e7a6c00dc77aedf27a601738ea509f1caea6d673 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 4 Mar 2022 08:22:22 -0700
Subject: [PATCH 1221/1586] io_uring: add support for registering ring file
 descriptors

Lots of workloads use multiple threads, in which case the file table is
shared between them. This makes getting and putting the ring file
descriptor for each io_uring_enter(2) system call more expensive, as it
involves an atomic get and put for each call.

Similarly to how we allow registering normal file descriptors to avoid
this overhead, add support for an io_uring_register(2) API that allows
to register the ring fds themselves:

1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update
   structs, and registers them with the task.
2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update
   structs, and unregisters them.

When a ring fd is registered, it is internally represented by an offset.
This offset is returned to the application, and the application then
uses this offset and sets IORING_ENTER_REGISTERED_RING for the
io_uring_enter(2) system call. This works just like using a registered
file descriptor, rather than a real one, in an SQE, where
IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal
offset/descriptor rather than a real file descriptor.

In initial testing, this provides a nice bump in performance for
threaded applications in real world cases where the batch count (eg
number of requests submitted per io_uring_enter(2) invocation) is low.
In a microbenchmark, submitting NOP requests, we see the following
increases in performance:

Requests per syscall	Baseline	Registered	Increase
----------------------------------------------------------------
1			 ~7030K		 ~8080K		+15%
2			~13120K		~14800K		+13%
4			~22740K		~25300K		+11%

Co-developed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 182 +++++++++++++++++++++++++++++++++-
 include/linux/io_uring.h      |   5 +-
 include/uapi/linux/io_uring.h |  13 ++-
 3 files changed, 190 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f8fd3b2bb30dc..daa3609b742f2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -466,6 +466,11 @@ struct io_ring_ctx {
 	};
 };
 
+/*
+ * Arbitrary limit, can be raised if need be
+ */
+#define IO_RINGFD_REG_MAX 16
+
 struct io_uring_task {
 	/* submission side */
 	int			cached_refs;
@@ -481,6 +486,7 @@ struct io_uring_task {
 	struct io_wq_work_list	task_list;
 	struct io_wq_work_list	prior_task_list;
 	struct callback_head	task_work;
+	struct file		**registered_rings;
 	bool			task_running;
 };
 
@@ -8788,8 +8794,16 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
 	if (unlikely(!tctx))
 		return -ENOMEM;
 
+	tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX,
+					 sizeof(struct file *), GFP_KERNEL);
+	if (unlikely(!tctx->registered_rings)) {
+		kfree(tctx);
+		return -ENOMEM;
+	}
+
 	ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
 	if (unlikely(ret)) {
+		kfree(tctx->registered_rings);
 		kfree(tctx);
 		return ret;
 	}
@@ -8798,6 +8812,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
 	if (IS_ERR(tctx->io_wq)) {
 		ret = PTR_ERR(tctx->io_wq);
 		percpu_counter_destroy(&tctx->inflight);
+		kfree(tctx->registered_rings);
 		kfree(tctx);
 		return ret;
 	}
@@ -8822,6 +8837,7 @@ void __io_uring_free(struct task_struct *tsk)
 	WARN_ON_ONCE(tctx->io_wq);
 	WARN_ON_ONCE(tctx->cached_refs);
 
+	kfree(tctx->registered_rings);
 	percpu_counter_destroy(&tctx->inflight);
 	kfree(tctx);
 	tsk->io_uring = NULL;
@@ -10043,6 +10059,139 @@ void __io_uring_cancel(bool cancel_all)
 	io_uring_cancel_generic(cancel_all, NULL);
 }
 
+void io_uring_unreg_ringfd(void)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	int i;
+
+	for (i = 0; i < IO_RINGFD_REG_MAX; i++) {
+		if (tctx->registered_rings[i]) {
+			fput(tctx->registered_rings[i]);
+			tctx->registered_rings[i] = NULL;
+		}
+	}
+}
+
+static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
+				     int start, int end)
+{
+	struct file *file;
+	int offset;
+
+	for (offset = start; offset < end; offset++) {
+		offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
+		if (tctx->registered_rings[offset])
+			continue;
+
+		file = fget(fd);
+		if (!file) {
+			return -EBADF;
+		} else if (file->f_op != &io_uring_fops) {
+			fput(file);
+			return -EOPNOTSUPP;
+		}
+		tctx->registered_rings[offset] = file;
+		return offset;
+	}
+
+	return -EBUSY;
+}
+
+/*
+ * Register a ring fd to avoid fdget/fdput for each io_uring_enter()
+ * invocation. User passes in an array of struct io_uring_rsrc_update
+ * with ->data set to the ring_fd, and ->offset given for the desired
+ * index. If no index is desired, application may set ->offset == -1U
+ * and we'll find an available index. Returns number of entries
+ * successfully processed, or < 0 on error if none were processed.
+ */
+static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
+			      unsigned nr_args)
+{
+	struct io_uring_rsrc_update __user *arg = __arg;
+	struct io_uring_rsrc_update reg;
+	struct io_uring_task *tctx;
+	int ret, i;
+
+	if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+		return -EINVAL;
+
+	mutex_unlock(&ctx->uring_lock);
+	ret = io_uring_add_tctx_node(ctx);
+	mutex_lock(&ctx->uring_lock);
+	if (ret)
+		return ret;
+
+	tctx = current->io_uring;
+	for (i = 0; i < nr_args; i++) {
+		int start, end;
+
+		if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		if (reg.offset == -1U) {
+			start = 0;
+			end = IO_RINGFD_REG_MAX;
+		} else {
+			if (reg.offset >= IO_RINGFD_REG_MAX) {
+				ret = -EINVAL;
+				break;
+			}
+			start = reg.offset;
+			end = start + 1;
+		}
+
+		ret = io_ring_add_registered_fd(tctx, reg.data, start, end);
+		if (ret < 0)
+			break;
+
+		reg.offset = ret;
+		if (copy_to_user(&arg[i], &reg, sizeof(reg))) {
+			fput(tctx->registered_rings[reg.offset]);
+			tctx->registered_rings[reg.offset] = NULL;
+			ret = -EFAULT;
+			break;
+		}
+	}
+
+	return i ? i : ret;
+}
+
+static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
+				unsigned nr_args)
+{
+	struct io_uring_rsrc_update __user *arg = __arg;
+	struct io_uring_task *tctx = current->io_uring;
+	struct io_uring_rsrc_update reg;
+	int ret = 0, i;
+
+	if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
+		return -EINVAL;
+	if (!tctx)
+		return 0;
+
+	for (i = 0; i < nr_args; i++) {
+		if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (reg.offset >= IO_RINGFD_REG_MAX) {
+			ret = -EINVAL;
+			break;
+		}
+
+		reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX);
+		if (tctx->registered_rings[reg.offset]) {
+			fput(tctx->registered_rings[reg.offset]);
+			tctx->registered_rings[reg.offset] = NULL;
+		}
+	}
+
+	return i ? i : ret;
+}
+
 static void *io_uring_validate_mmap_request(struct file *file,
 					    loff_t pgoff, size_t sz)
 {
@@ -10173,12 +10322,28 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	io_run_task_work();
 
 	if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
-			       IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)))
+			       IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
+			       IORING_ENTER_REGISTERED_RING)))
 		return -EINVAL;
 
-	f = fdget(fd);
-	if (unlikely(!f.file))
-		return -EBADF;
+	/*
+	 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
+	 * need only dereference our task private array to find it.
+	 */
+	if (flags & IORING_ENTER_REGISTERED_RING) {
+		struct io_uring_task *tctx = current->io_uring;
+
+		if (!tctx || fd >= IO_RINGFD_REG_MAX)
+			return -EINVAL;
+		fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
+		f.file = tctx->registered_rings[fd];
+		if (unlikely(!f.file))
+			return -EBADF;
+	} else {
+		f = fdget(fd);
+		if (unlikely(!f.file))
+			return -EBADF;
+	}
 
 	ret = -EOPNOTSUPP;
 	if (unlikely(f.file->f_op != &io_uring_fops))
@@ -10252,7 +10417,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 out:
 	percpu_ref_put(&ctx->refs);
 out_fput:
-	fdput(f);
+	if (!(flags & IORING_ENTER_REGISTERED_RING))
+		fdput(f);
 	return submitted ? submitted : ret;
 }
 
@@ -11142,6 +11308,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_iowq_max_workers(ctx, arg);
 		break;
+	case IORING_REGISTER_RING_FDS:
+		ret = io_ringfd_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_RING_FDS:
+		ret = io_ringfd_unregister(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 649a4d7c241bc..1814e698d8611 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -9,11 +9,14 @@
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
+void io_uring_unreg_ringfd(void);
 
 static inline void io_uring_files_cancel(void)
 {
-	if (current->io_uring)
+	if (current->io_uring) {
+		io_uring_unreg_ringfd();
 		__io_uring_cancel(false);
+	}
 }
 static inline void io_uring_task_cancel(void)
 {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2ae..42b2fe84dbcd5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -257,10 +257,11 @@ struct io_cqring_offsets {
 /*
  * io_uring_enter(2) flags
  */
-#define IORING_ENTER_GETEVENTS	(1U << 0)
-#define IORING_ENTER_SQ_WAKEUP	(1U << 1)
-#define IORING_ENTER_SQ_WAIT	(1U << 2)
-#define IORING_ENTER_EXT_ARG	(1U << 3)
+#define IORING_ENTER_GETEVENTS		(1U << 0)
+#define IORING_ENTER_SQ_WAKEUP		(1U << 1)
+#define IORING_ENTER_SQ_WAIT		(1U << 2)
+#define IORING_ENTER_EXT_ARG		(1U << 3)
+#define IORING_ENTER_REGISTERED_RING	(1U << 4)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -325,6 +326,10 @@ enum {
 	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
+	/* register/unregister io_uring fd with the ring */
+	IORING_REGISTER_RING_FDS		= 20,
+	IORING_UNREGISTER_RING_FDS		= 21,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
-- 
GitLab


From cc3cec8367cba76a8ae4c271eba8450f3efc1ba3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 8 Mar 2022 17:46:52 -0700
Subject: [PATCH 1222/1586] io_uring: speedup provided buffer handling

In testing high frequency workloads with provided buffers, we spend a
lot of time in allocating and freeing the buffer units themselves.
Rather than repeatedly free and alloc them, add a recycling cache
instead. There are two caches:

- ctx->io_buffers_cache. This is the one we grab from in the submission
  path, and it's protected by ctx->uring_lock. For inline completions,
  we can recycle straight back to this cache and not need any extra
  locking.

- ctx->io_buffers_comp. If we're not under uring_lock, then we use this
  list to recycle buffers. It's protected by the completion_lock.

On adding a new buffer, check io_buffers_cache. If it's empty, check if
we can splice entries from the io_buffers_comp_cache.

This reduces about 5-10% of overhead from provided buffers, bringing it
pretty close to the non-provided path.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 139 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 117 insertions(+), 22 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index daa3609b742f2..0f5e999e569f8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -384,6 +384,7 @@ struct io_ring_ctx {
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
 		struct xarray		io_buffers;
+		struct list_head	io_buffers_cache;
 		struct xarray		personalities;
 		u32			pers_next;
 		unsigned		sq_thread_idle;
@@ -426,6 +427,8 @@ struct io_ring_ctx {
 		struct hlist_head	*cancel_hash;
 		unsigned		cancel_hash_bits;
 		bool			poll_multi_queue;
+
+		struct list_head	io_buffers_comp;
 	} ____cacheline_aligned_in_smp;
 
 	struct io_restriction		restrictions;
@@ -441,6 +444,8 @@ struct io_ring_ctx {
 		struct llist_head		rsrc_put_llist;
 		struct list_head		rsrc_ref_list;
 		spinlock_t			rsrc_ref_lock;
+
+		struct list_head	io_buffers_pages;
 	};
 
 	/* Keep this last, we don't need it for the fast path */
@@ -1278,24 +1283,56 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
 	}
 }
 
-static unsigned int __io_put_kbuf(struct io_kiocb *req)
+static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list)
 {
 	struct io_buffer *kbuf = req->kbuf;
 	unsigned int cflags;
 
-	cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
-	cflags |= IORING_CQE_F_BUFFER;
+	cflags = IORING_CQE_F_BUFFER | (kbuf->bid << IORING_CQE_BUFFER_SHIFT);
 	req->flags &= ~REQ_F_BUFFER_SELECTED;
-	kfree(kbuf);
+	list_add(&kbuf->list, list);
 	req->kbuf = NULL;
 	return cflags;
 }
 
-static inline unsigned int io_put_kbuf(struct io_kiocb *req)
+static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
 {
 	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
 		return 0;
-	return __io_put_kbuf(req);
+	return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
+}
+
+static inline unsigned int io_put_kbuf(struct io_kiocb *req,
+				       unsigned issue_flags)
+{
+	unsigned int cflags;
+
+	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+		return 0;
+
+	/*
+	 * We can add this buffer back to two lists:
+	 *
+	 * 1) The io_buffers_cache list. This one is protected by the
+	 *    ctx->uring_lock. If we already hold this lock, add back to this
+	 *    list as we can grab it from issue as well.
+	 * 2) The io_buffers_comp list. This one is protected by the
+	 *    ctx->completion_lock.
+	 *
+	 * We migrate buffers from the comp_list to the issue cache list
+	 * when we need one.
+	 */
+	if (issue_flags & IO_URING_F_UNLOCKED) {
+		struct io_ring_ctx *ctx = req->ctx;
+
+		spin_lock(&ctx->completion_lock);
+		cflags = __io_put_kbuf(req, &ctx->io_buffers_comp);
+		spin_unlock(&ctx->completion_lock);
+	} else {
+		cflags = __io_put_kbuf(req, &req->ctx->io_buffers_cache);
+	}
+
+	return cflags;
 }
 
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
@@ -1443,6 +1480,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
+	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 	init_completion(&ctx->ref_comp);
 	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
@@ -1451,6 +1489,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	spin_lock_init(&ctx->completion_lock);
 	spin_lock_init(&ctx->timeout_lock);
 	INIT_WQ_LIST(&ctx->iopoll_list);
+	INIT_LIST_HEAD(&ctx->io_buffers_pages);
+	INIT_LIST_HEAD(&ctx->io_buffers_comp);
 	INIT_LIST_HEAD(&ctx->defer_list);
 	INIT_LIST_HEAD(&ctx->timeout_list);
 	INIT_LIST_HEAD(&ctx->ltimeout_list);
@@ -2329,7 +2369,8 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
 		if (likely(*uring_locked))
 			req->io_task_work.func(req, uring_locked);
 		else
-			__io_req_complete_post(req, req->result, io_put_kbuf(req));
+			__io_req_complete_post(req, req->result,
+						io_put_kbuf_comp(req));
 		node = next;
 	} while (node);
 
@@ -2679,7 +2720,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (unlikely(req->flags & REQ_F_CQE_SKIP))
 			continue;
 
-		__io_fill_cqe(req, req->result, io_put_kbuf(req));
+		__io_fill_cqe(req, req->result, io_put_kbuf(req, 0));
 		nr_events++;
 	}
 
@@ -2855,14 +2896,14 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 
 static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
 {
-	unsigned int cflags = io_put_kbuf(req);
 	int res = req->result;
 
 	if (*locked) {
-		io_req_complete_state(req, res, cflags);
+		io_req_complete_state(req, res, io_put_kbuf(req, 0));
 		io_req_add_compl_list(req);
 	} else {
-		io_req_complete_post(req, res, cflags);
+		io_req_complete_post(req, res,
+					io_put_kbuf(req, IO_URING_F_UNLOCKED));
 	}
 }
 
@@ -2871,7 +2912,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
 {
 	if (__io_complete_rw_common(req, res))
 		return;
-	__io_req_complete(req, issue_flags, req->result, io_put_kbuf(req));
+	__io_req_complete(req, issue_flags, req->result,
+				io_put_kbuf(req, issue_flags));
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res)
@@ -4518,13 +4560,11 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 
 		nxt = list_first_entry(&buf->list, struct io_buffer, list);
 		list_del(&nxt->list);
-		kfree(nxt);
 		if (++i == nbufs)
 			return i;
 		cond_resched();
 	}
 	i++;
-	kfree(buf);
 	xa_erase(&ctx->io_buffers, bgid);
 
 	return i;
@@ -4590,17 +4630,63 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
 	return 0;
 }
 
-static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+{
+	struct io_buffer *buf;
+	struct page *page;
+	int bufs_in_page;
+
+	/*
+	 * Completions that don't happen inline (eg not under uring_lock) will
+	 * add to ->io_buffers_comp. If we don't have any free buffers, check
+	 * the completion list and splice those entries first.
+	 */
+	if (!list_empty_careful(&ctx->io_buffers_comp)) {
+		spin_lock(&ctx->completion_lock);
+		if (!list_empty(&ctx->io_buffers_comp)) {
+			list_splice_init(&ctx->io_buffers_comp,
+						&ctx->io_buffers_cache);
+			spin_unlock(&ctx->completion_lock);
+			return 0;
+		}
+		spin_unlock(&ctx->completion_lock);
+	}
+
+	/*
+	 * No free buffers and no completion entries either. Allocate a new
+	 * page worth of buffer entries and add those to our freelist.
+	 */
+	page = alloc_page(GFP_KERNEL_ACCOUNT);
+	if (!page)
+		return -ENOMEM;
+
+	list_add(&page->lru, &ctx->io_buffers_pages);
+
+	buf = page_address(page);
+	bufs_in_page = PAGE_SIZE / sizeof(*buf);
+	while (bufs_in_page) {
+		list_add_tail(&buf->list, &ctx->io_buffers_cache);
+		buf++;
+		bufs_in_page--;
+	}
+
+	return 0;
+}
+
+static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
+			  struct io_buffer **head)
 {
 	struct io_buffer *buf;
 	u64 addr = pbuf->addr;
 	int i, bid = pbuf->bid;
 
 	for (i = 0; i < pbuf->nbufs; i++) {
-		buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
-		if (!buf)
+		if (list_empty(&ctx->io_buffers_cache) &&
+		    io_refill_buffer_cache(ctx))
 			break;
-
+		buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
+					list);
+		list_del(&buf->list);
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
@@ -4632,7 +4718,7 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	list = head = xa_load(&ctx->io_buffers, p->bgid);
 
-	ret = io_add_buffers(p, &head);
+	ret = io_add_buffers(ctx, p, &head);
 	if (ret >= 0 && !list) {
 		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
 		if (ret < 0)
@@ -5229,7 +5315,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	if (kmsg->free_iov)
 		kfree(kmsg->free_iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
-	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req));
+	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
 
@@ -5284,7 +5370,8 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 out_free:
 		req_set_fail(req);
 	}
-	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req));
+
+	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
 	return 0;
 }
 
@@ -6704,7 +6791,7 @@ fail:
 static void io_clean_op(struct io_kiocb *req)
 {
 	if (req->flags & REQ_F_BUFFER_SELECTED)
-		io_put_kbuf(req);
+		io_put_kbuf_comp(req);
 
 	if (req->flags & REQ_F_NEED_CLEANUP) {
 		switch (req->opcode) {
@@ -9475,6 +9562,14 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
 
 	xa_for_each(&ctx->io_buffers, index, buf)
 		__io_remove_buffers(ctx, buf, index, -1U);
+
+	while (!list_empty(&ctx->io_buffers_pages)) {
+		struct page *page;
+
+		page = list_first_entry(&ctx->io_buffers_pages, struct page, lru);
+		list_del_init(&page->lru);
+		__free_page(page);
+	}
 }
 
 static void io_req_caches_free(struct io_ring_ctx *ctx)
-- 
GitLab


From 19e8b701e258701b52b1e6aea99572518d69a2fb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 9 Mar 2022 14:03:42 -0600
Subject: [PATCH 1223/1586] a.out: Stop building a.out/osf1 support on alpha
 and m68k

There has been repeated discussion on removing a.out support, the most
recent was[1].  Having read through a bunch of the discussion it looks
like no one has see any reason why we need to keep a.out support.

The m68k maintainer has even come out in favor of removing a.out
support[2].

At a practical level with only two rarely used architectures building
a.out support, it gets increasingly hard to test and to care about.
Which means the code will almost certainly bit-rot.

Let's see if anyone cares about a.out support on the last two
architectures that build it, by disabling the build of the support in
Kconfig.  If anyone cares, this can be easily reverted, and we can then
have a discussion about what it is going to take to support a.out
binaries in the long term.

[1] https://lkml.kernel.org/r/20220113160115.5375-1-bp@alien8.de
[2] https://lkml.kernel.org/r/CAMuHMdUbTNNr16YY1TFe=-uRLjg6yGzgw_RqtAFpyhnOMM5Pvw@mail.gmail.com

Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: linux-alpha@vger.kernel.org
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/87ilsmdhb5.fsf_-_@email.froward.int.ebiederm.org
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/lkml/CAMuHMdVLyu6LNONJa1QcMGv__bWSCRvVq9haD7=fOm1k5O3Pnw@mail.gmail.com
---
 arch/alpha/Kconfig | 1 -
 arch/m68k/Kconfig  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 4e87783c90ad7..14c97acea351c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -12,7 +12,6 @@ config ALPHA
 	select FORCE_PCI if !ALPHA_JENSEN
 	select PCI_DOMAINS if PCI
 	select PCI_SYSCALL if PCI
-	select HAVE_AOUT
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 936e1803c7c7d..268b3860d40db 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -17,7 +17,6 @@ config M68K
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_SHOW
-	select HAVE_AOUT if MMU
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
-- 
GitLab


From 4f57f06ce2186c31c3da52386125dc57b1cd6f96 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Mar 2022 06:27:26 -0700
Subject: [PATCH 1224/1586] io_uring: add support for IORING_OP_MSG_RING
 command

This adds support for IORING_OP_MSG_RING, which allows an SQE to signal
another ring. That allows either waking up someone waiting on the ring,
or even passing a 64-bit value via the user_data field in the CQE.

sqe->fd must contain the fd of a ring that should receive the CQE.
sqe->off will be propagated to the cqe->user_data on the target ring,
and sqe->len will be propagated to cqe->res. The results CQE will have
IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated
from a messaging request rather than a SQE issued locally on that ring.
This effectively allows passing a 64-bit and a 32-bit quantify between
the two rings.

This request type has the following request specific error cases:

- -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is
  of the io_uring type.
- -EOVERFLOW. Set if we were not able to deliver a request to the target
  ring.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 55 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  3 ++
 2 files changed, 58 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0f5e999e569f8..36b001365a797 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -706,6 +706,12 @@ struct io_hardlink {
 	int				flags;
 };
 
+struct io_msg {
+	struct file			*file;
+	u64 user_data;
+	u32 len;
+};
+
 struct io_async_connect {
 	struct sockaddr_storage		address;
 };
@@ -871,6 +877,7 @@ struct io_kiocb {
 		struct io_mkdir		mkdir;
 		struct io_symlink	symlink;
 		struct io_hardlink	hardlink;
+		struct io_msg		msg;
 	};
 
 	u8				opcode;
@@ -1121,6 +1128,9 @@ static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_MKDIRAT] = {},
 	[IORING_OP_SYMLINKAT] = {},
 	[IORING_OP_LINKAT] = {},
+	[IORING_OP_MSG_RING] = {
+		.needs_file		= 1,
+	},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -4322,6 +4332,46 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_msg_ring_prep(struct io_kiocb *req,
+			    const struct io_uring_sqe *sqe)
+{
+	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+		     sqe->rw_flags || sqe->splice_fd_in || sqe->buf_index ||
+		     sqe->personality))
+		return -EINVAL;
+
+	if (req->file->f_op != &io_uring_fops)
+		return -EBADFD;
+
+	req->msg.user_data = READ_ONCE(sqe->off);
+	req->msg.len = READ_ONCE(sqe->len);
+	return 0;
+}
+
+static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *target_ctx;
+	struct io_msg *msg = &req->msg;
+	int ret = -EOVERFLOW;
+	bool filled;
+
+	target_ctx = req->file->private_data;
+
+	spin_lock(&target_ctx->completion_lock);
+	filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len,
+					IORING_CQE_F_MSG);
+	io_commit_cqring(target_ctx);
+	spin_unlock(&target_ctx->completion_lock);
+
+	if (filled) {
+		io_cqring_ev_posted(target_ctx);
+		ret = 0;
+	}
+
+	__io_req_complete(req, issue_flags, ret, 0);
+	return 0;
+}
+
 static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -6700,6 +6750,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_symlinkat_prep(req, sqe);
 	case IORING_OP_LINKAT:
 		return io_linkat_prep(req, sqe);
+	case IORING_OP_MSG_RING:
+		return io_msg_ring_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6983,6 +7035,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_LINKAT:
 		ret = io_linkat(req, issue_flags);
 		break;
+	case IORING_OP_MSG_RING:
+		ret = io_msg_ring(req, issue_flags);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 42b2fe84dbcd5..8bd4bfdd9a898 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -143,6 +143,7 @@ enum {
 	IORING_OP_MKDIRAT,
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
+	IORING_OP_MSG_RING,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -199,9 +200,11 @@ struct io_uring_cqe {
  *
  * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
  * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_MSG	If set, CQE was generated with IORING_OP_MSG_RING
  */
 #define IORING_CQE_F_BUFFER		(1U << 0)
 #define IORING_CQE_F_MORE		(1U << 1)
+#define IORING_CQE_F_MSG		(1U << 2)
 
 enum {
 	IORING_CQE_BUFFER_SHIFT		= 16,
-- 
GitLab


From 950e79dd73131a263b2dd7fec521ceafd28d2724 Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Tue, 8 Mar 2022 17:17:21 -0500
Subject: [PATCH 1225/1586] io_uring: minor io_cqring_wait() optimization

Move up the block manipulating the sig variable to execute code
that may encounter an error and exit first before continuing
executing the rest of the function and avoid useless computations

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/84513f7cc1b1fb31d8f4cb910aee033391d036b4.1646777484.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 36b001365a797..3d2d47540e1b2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7919,14 +7919,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			break;
 	} while (1);
 
-	if (uts) {
-		struct timespec64 ts;
-
-		if (get_timespec64(&ts, uts))
-			return -EFAULT;
-		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
-	}
-
 	if (sig) {
 #ifdef CONFIG_COMPAT
 		if (in_compat_syscall())
@@ -7940,6 +7932,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
+	if (uts) {
+		struct timespec64 ts;
+
+		if (get_timespec64(&ts, uts))
+			return -EFAULT;
+		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
+	}
+
 	init_waitqueue_func_entry(&iowq.wq, io_wake_function);
 	iowq.wq.private = current;
 	INIT_LIST_HEAD(&iowq.wq.entry);
-- 
GitLab


From adc8682ec69012b68d5ab7123e246d2ad9a6f94b Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Tue, 8 Mar 2022 17:17:26 -0500
Subject: [PATCH 1226/1586] io_uring: Add support for napi_busy_poll

The sqpoll thread can be used for performing the napi busy poll in a
similar way that it does io polling for file systems supporting direct
access bypassing the page cache.

The other way that io_uring can be used for napi busy poll is by
calling io_uring_enter() to get events.

If the user specify a timeout value, it is distributed between polling
and sleeping by using the systemwide setting
/proc/sys/net/core/busy_poll.

The changes have been tested with this program:
https://github.com/lano1106/io_uring_udp_ping

and the result is:
Without sqpoll:
NAPI busy loop disabled:
rtt min/avg/max/mdev = 40.631/42.050/58.667/1.547 us
NAPI busy loop enabled:
rtt min/avg/max/mdev = 30.619/31.753/61.433/1.456 us

With sqpoll:
NAPI busy loop disabled:
rtt min/avg/max/mdev = 42.087/44.438/59.508/1.533 us
NAPI busy loop enabled:
rtt min/avg/max/mdev = 35.779/37.347/52.201/0.924 us

Co-developed-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/810bd9408ffc510ff08269e78dca9df4af0b9e4e.1646777484.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 231 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3d2d47540e1b2..3d30f7b07677f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,6 +63,7 @@
 #include <net/sock.h>
 #include <net/af_unix.h>
 #include <net/scm.h>
+#include <net/busy_poll.h>
 #include <linux/anon_inodes.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
@@ -401,6 +402,11 @@ struct io_ring_ctx {
 	struct list_head	sqd_list;
 
 	unsigned long		check_cq_overflow;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	/* used to track busy poll napi_id */
+	struct list_head	napi_list;
+	spinlock_t		napi_lock;	/* napi_list lock */
+#endif
 
 	struct {
 		unsigned		cached_cq_tail;
@@ -1513,6 +1519,10 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	INIT_LIST_HEAD(&ctx->napi_list);
+	spin_lock_init(&ctx->napi_lock);
+#endif
 	return ctx;
 err:
 	kfree(ctx->dummy_ubuf);
@@ -5581,6 +5591,108 @@ IO_NETOP_FN(send);
 IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+
+#define NAPI_TIMEOUT			(60 * SEC_CONVERSION)
+
+struct napi_entry {
+	struct list_head	list;
+	unsigned int		napi_id;
+	unsigned long		timeout;
+};
+
+/*
+ * Add busy poll NAPI ID from sk.
+ */
+static void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+	unsigned int napi_id;
+	struct socket *sock;
+	struct sock *sk;
+	struct napi_entry *ne;
+
+	if (!net_busy_loop_on())
+		return;
+
+	sock = sock_from_file(file);
+	if (!sock)
+		return;
+
+	sk = sock->sk;
+	if (!sk)
+		return;
+
+	napi_id = READ_ONCE(sk->sk_napi_id);
+
+	/* Non-NAPI IDs can be rejected */
+	if (napi_id < MIN_NAPI_ID)
+		return;
+
+	spin_lock(&ctx->napi_lock);
+	list_for_each_entry(ne, &ctx->napi_list, list) {
+		if (ne->napi_id == napi_id) {
+			ne->timeout = jiffies + NAPI_TIMEOUT;
+			goto out;
+		}
+	}
+
+	ne = kmalloc(sizeof(*ne), GFP_NOWAIT);
+	if (!ne)
+		goto out;
+
+	ne->napi_id = napi_id;
+	ne->timeout = jiffies + NAPI_TIMEOUT;
+	list_add_tail(&ne->list, &ctx->napi_list);
+out:
+	spin_unlock(&ctx->napi_lock);
+}
+
+static inline void io_check_napi_entry_timeout(struct napi_entry *ne)
+{
+	if (time_after(jiffies, ne->timeout)) {
+		list_del(&ne->list);
+		kfree(ne);
+	}
+}
+
+/*
+ * Busy poll if globally on and supporting sockets found
+ */
+static bool io_napi_busy_loop(struct list_head *napi_list)
+{
+	struct napi_entry *ne, *n;
+
+	list_for_each_entry_safe(ne, n, napi_list, list) {
+		napi_busy_loop(ne->napi_id, NULL, NULL, true,
+			       BUSY_POLL_BUDGET);
+		io_check_napi_entry_timeout(ne);
+	}
+	return !list_empty(napi_list);
+}
+
+static void io_free_napi_list(struct io_ring_ctx *ctx)
+{
+	spin_lock(&ctx->napi_lock);
+	while (!list_empty(&ctx->napi_list)) {
+		struct napi_entry *ne =
+			list_first_entry(&ctx->napi_list, struct napi_entry,
+					 list);
+
+		list_del(&ne->list);
+		kfree(ne);
+	}
+	spin_unlock(&ctx->napi_lock);
+}
+#else
+static inline void io_add_napi(struct file *file, struct io_ring_ctx *ctx)
+{
+}
+
+static inline void io_free_napi_list(struct io_ring_ctx *ctx)
+{
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
 struct io_poll_table {
 	struct poll_table_struct pt;
 	struct io_kiocb *req;
@@ -5727,6 +5839,7 @@ static int io_poll_check_events(struct io_kiocb *req)
 			if (unlikely(!filled))
 				return -ECANCELED;
 			io_cqring_ev_posted(ctx);
+			io_add_napi(req->file, ctx);
 		} else if (req->result) {
 			return 0;
 		}
@@ -5959,6 +6072,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 		__io_poll_execute(req, mask);
 		return 0;
 	}
+	io_add_napi(req->file, req->ctx);
 
 	/*
 	 * Release ownership. If someone tried to queue a tw while it was
@@ -7706,7 +7820,13 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 		    !(ctx->flags & IORING_SETUP_R_DISABLED))
 			ret = io_submit_sqes(ctx, to_submit);
 		mutex_unlock(&ctx->uring_lock);
-
+#ifdef CONFIG_NET_RX_BUSY_POLL
+		spin_lock(&ctx->napi_lock);
+		if (!list_empty(&ctx->napi_list) &&
+		    io_napi_busy_loop(&ctx->napi_list))
+			++ret;
+		spin_unlock(&ctx->napi_lock);
+#endif
 		if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
 			wake_up(&ctx->sqo_sq_wait);
 		if (creds)
@@ -7837,6 +7957,9 @@ struct io_wait_queue {
 	struct io_ring_ctx *ctx;
 	unsigned cq_tail;
 	unsigned nr_timeouts;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned busy_poll_to;
+#endif
 };
 
 static inline bool io_should_wake(struct io_wait_queue *iowq)
@@ -7898,6 +8021,87 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
 	return 1;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static void io_adjust_busy_loop_timeout(struct timespec64 *ts,
+					struct io_wait_queue *iowq)
+{
+	unsigned busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+	struct timespec64 pollto = ns_to_timespec64(1000 * (s64)busy_poll_to);
+
+	if (timespec64_compare(ts, &pollto) > 0) {
+		*ts = timespec64_sub(*ts, pollto);
+		iowq->busy_poll_to = busy_poll_to;
+	} else {
+		u64 to = timespec64_to_ns(ts);
+
+		do_div(to, 1000);
+		iowq->busy_poll_to = to;
+		ts->tv_sec = 0;
+		ts->tv_nsec = 0;
+	}
+}
+
+static inline bool io_busy_loop_timeout(unsigned long start_time,
+					unsigned long bp_usec)
+{
+	if (bp_usec) {
+		unsigned long end_time = start_time + bp_usec;
+		unsigned long now = busy_loop_current_time();
+
+		return time_after(now, end_time);
+	}
+	return true;
+}
+
+static bool io_busy_loop_end(void *p, unsigned long start_time)
+{
+	struct io_wait_queue *iowq = p;
+
+	return signal_pending(current) ||
+	       io_should_wake(iowq) ||
+	       io_busy_loop_timeout(start_time, iowq->busy_poll_to);
+}
+
+static void io_blocking_napi_busy_loop(struct list_head *napi_list,
+				       struct io_wait_queue *iowq)
+{
+	unsigned long start_time =
+		list_is_singular(napi_list) ? 0 :
+		busy_loop_current_time();
+
+	do {
+		if (list_is_singular(napi_list)) {
+			struct napi_entry *ne =
+				list_first_entry(napi_list,
+						 struct napi_entry, list);
+
+			napi_busy_loop(ne->napi_id, io_busy_loop_end, iowq,
+				       true, BUSY_POLL_BUDGET);
+			io_check_napi_entry_timeout(ne);
+			break;
+		}
+	} while (io_napi_busy_loop(napi_list) &&
+		 !io_busy_loop_end(iowq, start_time));
+}
+
+static void io_putback_napi_list(struct io_ring_ctx *ctx,
+				 struct list_head *napi_list)
+{
+	struct napi_entry *cne, *lne;
+
+	spin_lock(&ctx->napi_lock);
+	list_for_each_entry(cne, &ctx->napi_list, list)
+		list_for_each_entry(lne, napi_list, list)
+			if (cne->napi_id == lne->napi_id) {
+				list_del(&lne->list);
+				kfree(lne);
+				break;
+			}
+	list_splice(napi_list, &ctx->napi_list);
+	spin_unlock(&ctx->napi_lock);
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
 /*
  * Wait until events become available, if we don't already have some. The
  * application must reap them itself, as they reside on the shared cq ring.
@@ -7910,6 +8114,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	struct io_rings *rings = ctx->rings;
 	ktime_t timeout = KTIME_MAX;
 	int ret;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	LIST_HEAD(local_napi_list);
+#endif
 
 	do {
 		io_cqring_overflow_flush(ctx);
@@ -7932,13 +8139,29 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	iowq.busy_poll_to = 0;
+	if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
+		spin_lock(&ctx->napi_lock);
+		list_splice_init(&ctx->napi_list, &local_napi_list);
+		spin_unlock(&ctx->napi_lock);
+	}
+#endif
 	if (uts) {
 		struct timespec64 ts;
 
 		if (get_timespec64(&ts, uts))
 			return -EFAULT;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+		if (!list_empty(&local_napi_list))
+			io_adjust_busy_loop_timeout(&ts, &iowq);
+#endif
 		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
 	}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	else if (!list_empty(&local_napi_list))
+		iowq.busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+#endif
 
 	init_waitqueue_func_entry(&iowq.wq, io_wake_function);
 	iowq.wq.private = current;
@@ -7948,6 +8171,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
 
 	trace_io_uring_cqring_wait(ctx, min_events);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	if (iowq.busy_poll_to)
+		io_blocking_napi_busy_loop(&local_napi_list, &iowq);
+	if (!list_empty(&local_napi_list))
+		io_putback_napi_list(ctx, &local_napi_list);
+#endif
 	do {
 		/* if we can't even flush overflow, don't wait for more */
 		if (!io_cqring_overflow_flush(ctx)) {
@@ -9709,6 +9938,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_req_caches_free(ctx);
 	if (ctx->hash_map)
 		io_wq_put_hash(ctx->hash_map);
+	io_free_napi_list(ctx);
 	kfree(ctx->cancel_hash);
 	kfree(ctx->dummy_ubuf);
 	kfree(ctx);
-- 
GitLab


From 1b6fe6e0dfecf8c82a64fb87148ad9333fa2f62e Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@fb.com>
Date: Fri, 25 Feb 2022 10:53:26 -0800
Subject: [PATCH 1227/1586] io-uring: Make statx API stable

One of the key architectual tenets is to keep the parameters for
io-uring stable. After the call has been submitted, its value can
be changed. Unfortunaltely this is not the case for the current statx
implementation.

IO-Uring change:
This changes replaces the const char * filename pointer in the io_statx
structure with a struct filename *. In addition it also creates the
filename object during the prepare phase.

With this change, the opcode also needs to invoke cleanup, so the
filename object gets freed after processing the request.

fs change:
This replaces the const char* __user filename parameter in the two
functions do_statx and vfs_statx with a struct filename *. In addition
to be able to correctly construct a filename object a new helper
function getname_statx_lookup_flags is introduced. The function makes
sure that do_statx and vfs_statx is invoked with the correct lookup flags.

Signed-off-by: Stefan Roesch <shr@fb.com>
Link: https://lore.kernel.org/r/20220225185326.1373304-2-shr@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/internal.h |  4 +++-
 fs/io_uring.c | 22 ++++++++++++++++++++--
 fs/stat.c     | 49 +++++++++++++++++++++++++++++++++++--------------
 3 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 8590c973c2f42..56c0477f42153 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -184,7 +184,9 @@ int sb_init_dio_done_wq(struct super_block *sb);
 /*
  * fs/stat.c:
  */
-int do_statx(int dfd, const char __user *filename, unsigned flags,
+
+int getname_statx_lookup_flags(int flags);
+int do_statx(int dfd, struct filename *filename, unsigned int flags,
 	     unsigned int mask, struct statx __user *buffer);
 
 /*
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3d30f7b07677f..257d659fc25f1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -664,7 +664,7 @@ struct io_statx {
 	int				dfd;
 	unsigned int			mask;
 	unsigned int			flags;
-	const char __user		*filename;
+	struct filename			*filename;
 	struct statx __user		*buffer;
 };
 
@@ -4913,6 +4913,8 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
+	const char __user *path;
+
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
@@ -4922,10 +4924,22 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	req->statx.dfd = READ_ONCE(sqe->fd);
 	req->statx.mask = READ_ONCE(sqe->len);
-	req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	path = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 	req->statx.flags = READ_ONCE(sqe->statx_flags);
 
+	req->statx.filename = getname_flags(path,
+					getname_statx_lookup_flags(req->statx.flags),
+					NULL);
+
+	if (IS_ERR(req->statx.filename)) {
+		int ret = PTR_ERR(req->statx.filename);
+
+		req->statx.filename = NULL;
+		return ret;
+	}
+
+	req->flags |= REQ_F_NEED_CLEANUP;
 	return 0;
 }
 
@@ -7007,6 +7021,10 @@ static void io_clean_op(struct io_kiocb *req)
 			putname(req->hardlink.oldpath);
 			putname(req->hardlink.newpath);
 			break;
+		case IORING_OP_STATX:
+			if (req->statx.filename)
+				putname(req->statx.filename);
+			break;
 		}
 	}
 	if ((req->flags & REQ_F_POLLED) && req->apoll) {
diff --git a/fs/stat.c b/fs/stat.c
index 28d2020ba1f42..7f734be0e57ec 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -184,6 +184,20 @@ int vfs_fstat(int fd, struct kstat *stat)
 	return error;
 }
 
+int getname_statx_lookup_flags(int flags)
+{
+	int lookup_flags = 0;
+
+	if (!(flags & AT_SYMLINK_NOFOLLOW))
+		lookup_flags |= LOOKUP_FOLLOW;
+	if (!(flags & AT_NO_AUTOMOUNT))
+		lookup_flags |= LOOKUP_AUTOMOUNT;
+	if (flags & AT_EMPTY_PATH)
+		lookup_flags |= LOOKUP_EMPTY;
+
+	return lookup_flags;
+}
+
 /**
  * vfs_statx - Get basic and extra attributes by filename
  * @dfd: A file descriptor representing the base dir for a relative filename
@@ -199,26 +213,19 @@ int vfs_fstat(int fd, struct kstat *stat)
  *
  * 0 will be returned on success, and a -ve error code if unsuccessful.
  */
-static int vfs_statx(int dfd, const char __user *filename, int flags,
+static int vfs_statx(int dfd, struct filename *filename, int flags,
 	      struct kstat *stat, u32 request_mask)
 {
 	struct path path;
-	unsigned lookup_flags = 0;
+	unsigned int lookup_flags = getname_statx_lookup_flags(flags);
 	int error;
 
 	if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH |
 		      AT_STATX_SYNC_TYPE))
 		return -EINVAL;
 
-	if (!(flags & AT_SYMLINK_NOFOLLOW))
-		lookup_flags |= LOOKUP_FOLLOW;
-	if (!(flags & AT_NO_AUTOMOUNT))
-		lookup_flags |= LOOKUP_AUTOMOUNT;
-	if (flags & AT_EMPTY_PATH)
-		lookup_flags |= LOOKUP_EMPTY;
-
 retry:
-	error = user_path_at(dfd, filename, lookup_flags, &path);
+	error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
 	if (error)
 		goto out;
 
@@ -240,8 +247,15 @@ out:
 int vfs_fstatat(int dfd, const char __user *filename,
 			      struct kstat *stat, int flags)
 {
-	return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
-			 stat, STATX_BASIC_STATS);
+	int ret;
+	int statx_flags = flags | AT_NO_AUTOMOUNT;
+	struct filename *name;
+
+	name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
+	ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
+	putname(name);
+
+	return ret;
 }
 
 #ifdef __ARCH_WANT_OLD_STAT
@@ -602,7 +616,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
-int do_statx(int dfd, const char __user *filename, unsigned flags,
+int do_statx(int dfd, struct filename *filename, unsigned int flags,
 	     unsigned int mask, struct statx __user *buffer)
 {
 	struct kstat stat;
@@ -636,7 +650,14 @@ SYSCALL_DEFINE5(statx,
 		unsigned int, mask,
 		struct statx __user *, buffer)
 {
-	return do_statx(dfd, filename, flags, mask, buffer);
+	int ret;
+	struct filename *name;
+
+	name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL);
+	ret = do_statx(dfd, name, flags, mask, buffer);
+	putname(name);
+
+	return ret;
 }
 
 #ifdef CONFIG_COMPAT
-- 
GitLab


From 9af177ee3ef14c17ef10893c257fa4e2008a3d74 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 9 Mar 2022 16:46:07 -0700
Subject: [PATCH 1228/1586] io_uring: retry early for reads if we can poll

Most of the logic in io_read() deals with regular files, and in some ways
it would make sense to split the handling into S_IFREG and others. But
at least for retry, we don't need to bother setting up a bunch of state
just to abort in the loop later. In particular, don't bother forcing
setup of async data for a normal non-vectored read when we don't need it.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3d30f7b07677f..4d8366bc226f1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3773,6 +3773,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
 		req->flags &= ~REQ_F_REISSUE;
+		/* if we can poll, just do that */
+		if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
+			return -EAGAIN;
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
 			goto done;
-- 
GitLab


From 2be2eb02e2f5a096c351e5b70c46cfef259dabcd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Mar 2022 09:54:25 -0700
Subject: [PATCH 1229/1586] io_uring: ensure reads re-import for selected
 buffers

If we drop buffers between scheduling a retry, then we need to re-import
when we start the request again.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4d8366bc226f1..584b36dcd0aa3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3737,6 +3737,16 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 		if (unlikely(ret < 0))
 			return ret;
 	} else {
+		/*
+		 * Safe and required to re-import if we're using provided
+		 * buffers, as we dropped the selected one before retry.
+		 */
+		if (req->flags & REQ_F_BUFFER_SELECT) {
+			ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
+			if (unlikely(ret < 0))
+				return ret;
+		}
+
 		rw = req->async_data;
 		s = &rw->s;
 		/*
-- 
GitLab


From b1c62645758eb438179e3a0769168cb7b0a94d6b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 9 Mar 2022 11:27:52 -0700
Subject: [PATCH 1230/1586] io_uring: recycle provided buffers if request goes
 async

If we are using provided buffers, it's less than useful to have a buffer
selected and pinned if a request needs to go async or arms poll for
notification trigger on when we can process it.

Recycle the buffer in those events, so we don't pin it for the duration
of the request.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 584b36dcd0aa3..3145c9cacee09 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -269,6 +269,7 @@ struct io_buffer {
 	__u64 addr;
 	__u32 len;
 	__u16 bid;
+	__u16 bgid;
 };
 
 struct io_restriction {
@@ -1351,6 +1352,36 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 	return cflags;
 }
 
+static void io_kbuf_recycle(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer *head, *buf;
+
+	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+		return;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	buf = req->kbuf;
+
+	head = xa_load(&ctx->io_buffers, buf->bgid);
+	if (head) {
+		list_add(&buf->list, &head->list);
+	} else {
+		int ret;
+
+		INIT_LIST_HEAD(&buf->list);
+
+		/* if we fail, just leave buffer attached */
+		ret = xa_insert(&ctx->io_buffers, buf->bgid, buf, GFP_KERNEL);
+		if (unlikely(ret < 0))
+			return;
+	}
+
+	req->flags &= ~REQ_F_BUFFER_SELECTED;
+	req->kbuf = NULL;
+}
+
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 			  bool cancel_all)
 	__must_hold(&req->ctx->timeout_lock)
@@ -4763,6 +4794,7 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
+		buf->bgid = pbuf->bgid;
 		addr += pbuf->len;
 		bid++;
 		if (!*head) {
@@ -7395,8 +7427,12 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
 		 * Queued up for async execution, worker will release
 		 * submit reference when the iocb is actually submitted.
 		 */
+		io_kbuf_recycle(req);
 		io_queue_async_work(req, NULL);
 		break;
+	case IO_APOLL_OK:
+		io_kbuf_recycle(req);
+		break;
 	}
 
 	if (linked_timeout)
-- 
GitLab


From afcf5441b9ff22ac57244cd45ff102ebc2e32d1a Mon Sep 17 00:00:00 2001
From: Dan Li <ashimida@linux.alibaba.com>
Date: Wed, 2 Mar 2022 23:43:23 -0800
Subject: [PATCH 1231/1586] arm64: Add gcc Shadow Call Stack support

Shadow call stacks will be available in GCC >= 12, this patch makes
the corresponding kernel configuration available when compiling
the kernel with the gcc.

Note that the implementation in GCC is slightly different from Clang.
With SCS enabled, functions will only pop x30 once in the epilogue,
like:

   str     x30, [x18], #8
   stp     x29, x30, [sp, #-16]!
   ......
-  ldp     x29, x30, [sp], #16	  //clang
+  ldr     x29, [sp], #16	  //GCC
   ldr     x30, [x18, #-8]!

Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce09ab17ddd21f73ff2caf6eec3b0ee9b0e1a11e

Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Dan Li <ashimida@linux.alibaba.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220303074323.86282-1-ashimida@linux.alibaba.com
---
 arch/Kconfig                 | 19 ++++++++++---------
 arch/arm64/Kconfig           |  2 +-
 include/linux/compiler-gcc.h |  4 ++++
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c5b50bfe31c1a..cabfac22f2fbf 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -599,21 +599,22 @@ config STACKPROTECTOR_STRONG
 config ARCH_SUPPORTS_SHADOW_CALL_STACK
 	bool
 	help
-	  An architecture should select this if it supports Clang's Shadow
-	  Call Stack and implements runtime support for shadow stack
+	  An architecture should select this if it supports the compiler's
+	  Shadow Call Stack and implements runtime support for shadow stack
 	  switching.
 
 config SHADOW_CALL_STACK
-	bool "Clang Shadow Call Stack"
-	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+	bool "Shadow Call Stack"
+	depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
 	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
 	help
-	  This option enables Clang's Shadow Call Stack, which uses a
-	  shadow stack to protect function return addresses from being
-	  overwritten by an attacker. More information can be found in
-	  Clang's documentation:
+	  This option enables the compiler's Shadow Call Stack, which
+	  uses a shadow stack to protect function return addresses from
+	  being overwritten by an attacker. More information can be found
+	  in the compiler's documentation:
 
-	    https://clang.llvm.org/docs/ShadowCallStack.html
+	  - Clang: https://clang.llvm.org/docs/ShadowCallStack.html
+	  - GCC: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#Instrumentation-Options
 
 	  Note that security guarantees in the kernel differ from the
 	  ones documented for user space. The kernel must store addresses
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b8ab790555c81..a6733b972212d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1239,7 +1239,7 @@ config HW_PERF_EVENTS
 config ARCH_HAS_FILTER_PGPROT
 	def_bool y
 
-# Supported by clang >= 7.0
+# Supported by clang >= 7.0 or GCC >= 12.0.0
 config CC_HAVE_SHADOW_CALL_STACK
 	def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index ccbbd31b3aae5..deff5b308470a 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -97,6 +97,10 @@
 #define KASAN_ABI_VERSION 4
 #endif
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#endif
+
 #if __has_attribute(__no_sanitize_address__)
 #define __no_sanitize_address __attribute__((no_sanitize_address))
 #else
-- 
GitLab


From 524bb1da785a7ae43dd413cd392b5071c6c367f8 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 5 Mar 2022 14:02:14 +0300
Subject: [PATCH 1232/1586] PM: core: keep irq flags in
 device_pm_check_callbacks()

The function device_pm_check_callbacks() can be called under the spin
lock (in the reported case it happens from genpd_add_device() ->
dev_pm_domain_set(), when the genpd uses spinlocks rather than mutexes.

However this function uncoditionally uses spin_lock_irq() /
spin_unlock_irq(), thus not preserving the CPU flags. Use the
irqsave/irqrestore instead.

The backtrace for the reference:
[    2.752010] ------------[ cut here ]------------
[    2.756769] raw_local_irq_restore() called with IRQs enabled
[    2.762596] WARNING: CPU: 4 PID: 1 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x34/0x50
[    2.772338] Modules linked in:
[    2.775487] CPU: 4 PID: 1 Comm: swapper/0 Tainted: G S                5.17.0-rc6-00384-ge330d0d82eff-dirty #684
[    2.781384] Freeing initrd memory: 46024K
[    2.785839] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    2.785841] pc : warn_bogus_irq_restore+0x34/0x50
[    2.785844] lr : warn_bogus_irq_restore+0x34/0x50
[    2.785846] sp : ffff80000805b7d0
[    2.785847] x29: ffff80000805b7d0 x28: 0000000000000000 x27: 0000000000000002
[    2.785850] x26: ffffd40e80930b18 x25: ffff7ee2329192b8 x24: ffff7edfc9f60800
[    2.785853] x23: ffffd40e80930b18 x22: ffffd40e80930d30 x21: ffff7edfc0dffa00
[    2.785856] x20: ffff7edfc09e3768 x19: 0000000000000000 x18: ffffffffffffffff
[    2.845775] x17: 6572206f74206465 x16: 6c696166203a3030 x15: ffff80008805b4f7
[    2.853108] x14: 0000000000000000 x13: ffffd40e809550b0 x12: 00000000000003d8
[    2.860441] x11: 0000000000000148 x10: ffffd40e809550b0 x9 : ffffd40e809550b0
[    2.867774] x8 : 00000000ffffefff x7 : ffffd40e809ad0b0 x6 : ffffd40e809ad0b0
[    2.875107] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
[    2.882440] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff7edfc03a8000
[    2.889774] Call trace:
[    2.892290]  warn_bogus_irq_restore+0x34/0x50
[    2.896770]  _raw_spin_unlock_irqrestore+0x94/0xa0
[    2.901690]  genpd_unlock_spin+0x20/0x30
[    2.905724]  genpd_add_device+0x100/0x2d0
[    2.909850]  __genpd_dev_pm_attach+0xa8/0x23c
[    2.914329]  genpd_dev_pm_attach_by_id+0xc4/0x190
[    2.919167]  genpd_dev_pm_attach_by_name+0x3c/0xd0
[    2.924086]  dev_pm_domain_attach_by_name+0x24/0x30
[    2.929102]  psci_dt_attach_cpu+0x24/0x90
[    2.933230]  psci_cpuidle_probe+0x2d4/0x46c
[    2.937534]  platform_probe+0x68/0xe0
[    2.941304]  really_probe.part.0+0x9c/0x2fc
[    2.945605]  __driver_probe_device+0x98/0x144
[    2.950085]  driver_probe_device+0x44/0x15c
[    2.954385]  __device_attach_driver+0xb8/0x120
[    2.958950]  bus_for_each_drv+0x78/0xd0
[    2.962896]  __device_attach+0xd8/0x180
[    2.966843]  device_initial_probe+0x14/0x20
[    2.971144]  bus_probe_device+0x9c/0xa4
[    2.975092]  device_add+0x380/0x88c
[    2.978679]  platform_device_add+0x114/0x234
[    2.983067]  platform_device_register_full+0x100/0x190
[    2.988344]  psci_idle_init+0x6c/0xb0
[    2.992113]  do_one_initcall+0x74/0x3a0
[    2.996060]  kernel_init_freeable+0x2fc/0x384
[    3.000543]  kernel_init+0x28/0x130
[    3.004132]  ret_from_fork+0x10/0x20
[    3.007817] irq event stamp: 319826
[    3.011404] hardirqs last  enabled at (319825): [<ffffd40e7eda0268>] __up_console_sem+0x78/0x84
[    3.020332] hardirqs last disabled at (319826): [<ffffd40e7fd6d9d8>] el1_dbg+0x24/0x8c
[    3.028458] softirqs last  enabled at (318312): [<ffffd40e7ec90410>] _stext+0x410/0x588
[    3.036678] softirqs last disabled at (318299): [<ffffd40e7ed1bf68>] __irq_exit_rcu+0x158/0x174
[    3.045607] ---[ end trace 0000000000000000 ]---

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 04ea92cbd9cfd..08c8a69d7b810 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
 
 void device_pm_check_callbacks(struct device *dev)
 {
-	spin_lock_irq(&dev->power.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
 	dev->power.no_pm_callbacks =
 		(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
 		 !dev->bus->suspend && !dev->bus->resume)) &&
@@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev)
 		(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
 		(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
 		 !dev->driver->suspend && !dev->driver->resume));
-	spin_unlock_irq(&dev->power.lock);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
 
 bool dev_pm_skip_suspend(struct device *dev)
-- 
GitLab


From 85750bcd480c74b13661ee2c9db49de500fd2823 Mon Sep 17 00:00:00 2001
From: Lianjie Zhang <zhanglianjie@uniontech.com>
Date: Thu, 10 Mar 2022 22:38:30 +0800
Subject: [PATCH 1233/1586] cpufreq: unify show() and store() naming and use
 __ATTR_XX

Usually, sysfs attributes have .show and .store and their naming
convention is filename_show() and filename_store().

But in cpufreq the naming convention of these functions is
show_filename() and store_filename() which prevents __ATTR_RW() and
__ATTR_RO() from being used in there to simplify code.

Accordingly, change the naming convention of the sysfs .show and
.store methods in cpufreq to follow the one expected by __ATTR_RW()
and __ATTR_RO() and use these macros in that code.

Signed-off-by: Lianjie Zhang <zhanglianjie@uniontech.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c | 10 +++++-----
 drivers/cpufreq/cpufreq_governor.c     |  6 +++---
 drivers/cpufreq/cpufreq_governor.h     | 12 +++++-------
 drivers/cpufreq/cpufreq_ondemand.c     | 10 +++++-----
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 08515f7e515fe..b6bd0ff353234 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -146,7 +146,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 
 /************************** sysfs interface ************************/
 
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -161,7 +161,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
 				  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -177,7 +177,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+static ssize_t down_threshold_store(struct gov_attr_set *attr_set,
 				    const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -195,7 +195,7 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
 				      const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t freq_step_store(struct gov_attr_set *attr_set, const char *buf,
 			       size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 63f7c219062b9..0d42cf8b88d8a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
 
 /* Common sysfs tunables */
 /*
- * store_sampling_rate - update sampling rate effective immediately if needed.
+ * sampling_rate_store - update sampling rate effective immediately if needed.
  *
  * If new rate is smaller than the old, simply updating
  * dbs.sampling_rate might not be appropriate. For example, if the
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(gov_dbs_data_mutex);
  * This must be called with dbs_data->mutex held, otherwise traversing
  * policy_dbs_list isn't safe.
  */
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -80,7 +80,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 
 	return count;
 }
-EXPORT_SYMBOL_GPL(store_sampling_rate);
+EXPORT_SYMBOL_GPL(sampling_rate_store);
 
 /**
  * gov_update_cpu_data - Update CPU load data.
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index bab8e61403771..a5a0bc3cc23ec 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -51,7 +51,7 @@ static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
 }
 
 #define gov_show_one(_gov, file_name)					\
-static ssize_t show_##file_name						\
+static ssize_t file_name##_show						\
 (struct gov_attr_set *attr_set, char *buf)				\
 {									\
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
@@ -60,7 +60,7 @@ static ssize_t show_##file_name						\
 }
 
 #define gov_show_one_common(file_name)					\
-static ssize_t show_##file_name						\
+static ssize_t file_name##_show						\
 (struct gov_attr_set *attr_set, char *buf)				\
 {									\
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
@@ -68,12 +68,10 @@ static ssize_t show_##file_name						\
 }
 
 #define gov_attr_ro(_name)						\
-static struct governor_attr _name =					\
-__ATTR(_name, 0444, show_##_name, NULL)
+static struct governor_attr _name = __ATTR_RO(_name)
 
 #define gov_attr_rw(_name)						\
-static struct governor_attr _name =					\
-__ATTR(_name, 0644, show_##_name, store_##_name)
+static struct governor_attr _name = __ATTR_RW(_name)
 
 /* Common to all CPUs of a policy */
 struct policy_dbs_info {
@@ -176,7 +174,7 @@ void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
 void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
+ssize_t sampling_rate_store(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count);
 void gov_update_cpu_data(struct dbs_data *dbs_data);
 #endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6a41ea4729b8a..e8fbf970ff078 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -202,7 +202,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
 /************************** sysfs interface ************************/
 static struct dbs_governor od_dbs_gov;
 
-static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf,
 				size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -220,7 +220,7 @@ static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
 	return count;
 }
 
-static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
 				  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -237,7 +237,7 @@ static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -265,7 +265,7 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
 				      const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
@@ -290,7 +290,7 @@ static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
 	return count;
 }
 
-static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+static ssize_t powersave_bias_store(struct gov_attr_set *attr_set,
 				    const char *buf, size_t count)
 {
 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
-- 
GitLab


From e86801b0ff1c5c6d1f78232f7e3b52c0b0631560 Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Date: Tue, 8 Mar 2022 18:07:49 -0800
Subject: [PATCH 1234/1586] ACPI: tables: Add AGDI to the list of known table
 signatures

Add AGDI to the list of known ACPI table signatures to allow the
kernel to recognize it when upgrading tables via initrd.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 369eb998c3d1d..ceee808f7f2a2 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -545,7 +545,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = {
 	ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT,
 	ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT,
 	ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT,
-	ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT };
+	ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI };
 
 #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
 
-- 
GitLab


From a2a591fb76e6f5461dfd04715b69c317e50c43a5 Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Date: Tue, 8 Mar 2022 18:07:50 -0800
Subject: [PATCH 1235/1586] ACPI: AGDI: Add driver for Arm Generic Diagnostic
 Dump and Reset device

ACPI for Arm Components 1.1 Platform Design Document v1.1 [0] specifices
Arm Generic Diagnostic Device Interface (AGDI). It allows an admin to
issue diagnostic dump and reset via an SDEI event or an interrupt.
This patch implements SDEI path.

[0] https://developer.arm.com/documentation/den0093/latest/

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/arm64/Kconfig  |  10 ++++
 drivers/acpi/arm64/Makefile |   1 +
 drivers/acpi/arm64/agdi.c   | 116 ++++++++++++++++++++++++++++++++++++
 drivers/acpi/bus.c          |   2 +
 include/linux/acpi_agdi.h   |  13 ++++
 5 files changed, 142 insertions(+)
 create mode 100644 drivers/acpi/arm64/agdi.c
 create mode 100644 include/linux/acpi_agdi.h

diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index 6dba187f4f2e0..d4a72835f328c 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -8,3 +8,13 @@ config ACPI_IORT
 
 config ACPI_GTDT
 	bool
+
+config ACPI_AGDI
+	bool "Arm Generic Diagnostic Dump and Reset Device Interface"
+	depends on ARM_SDE_INTERFACE
+	help
+	  Arm Generic Diagnostic Dump and Reset Device Interface (AGDI) is
+	  a standard that enables issuing a non-maskable diagnostic dump and
+	  reset command.
+
+	  If set, the kernel parses AGDI table and listens for the command.
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 66acbe77f46e5..7b9e4045659d9 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-y				+= dma.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
new file mode 100644
index 0000000000000..4df337d545b73
--- /dev/null
+++ b/drivers/acpi/arm64/agdi.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file implements handling of
+ * Arm Generic Diagnostic Dump and Reset Interface table (AGDI)
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ */
+
+#define pr_fmt(fmt) "ACPI: AGDI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_sdei.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+struct agdi_data {
+	int sdei_event;
+};
+
+static int agdi_sdei_handler(u32 sdei_event, struct pt_regs *regs, void *arg)
+{
+	nmi_panic(regs, "Arm Generic Diagnostic Dump and Reset SDEI event issued");
+	return 0;
+}
+
+static int agdi_sdei_probe(struct platform_device *pdev,
+			   struct agdi_data *adata)
+{
+	int err;
+
+	err = sdei_event_register(adata->sdei_event, agdi_sdei_handler, pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register for SDEI event %d",
+			adata->sdei_event);
+		return err;
+	}
+
+	err = sdei_event_enable(adata->sdei_event);
+	if (err)  {
+		sdei_event_unregister(adata->sdei_event);
+		dev_err(&pdev->dev, "Failed to enable event %d\n",
+			adata->sdei_event);
+		return err;
+	}
+
+	return 0;
+}
+
+static int agdi_probe(struct platform_device *pdev)
+{
+	struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+
+	if (!adata)
+		return -EINVAL;
+
+	return agdi_sdei_probe(pdev, adata);
+}
+
+static int agdi_remove(struct platform_device *pdev)
+{
+	struct agdi_data *adata = dev_get_platdata(&pdev->dev);
+	int err, i;
+
+	err = sdei_event_disable(adata->sdei_event);
+	if (err)
+		return err;
+
+	for (i = 0; i < 3; i++) {
+		err = sdei_event_unregister(adata->sdei_event);
+		if (err != -EINPROGRESS)
+			break;
+
+		schedule();
+	}
+
+	return err;
+}
+
+static struct platform_driver agdi_driver = {
+	.driver = {
+		.name = "agdi",
+	},
+	.probe = agdi_probe,
+	.remove = agdi_remove,
+};
+
+void __init acpi_agdi_init(void)
+{
+	struct acpi_table_agdi *agdi_table;
+	struct agdi_data pdata;
+	struct platform_device *pdev;
+	acpi_status status;
+
+	status = acpi_get_table(ACPI_SIG_AGDI, 0,
+				(struct acpi_table_header **) &agdi_table);
+	if (ACPI_FAILURE(status))
+		return;
+
+	if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) {
+		pr_warn("Interrupt signaling is not supported");
+		goto err_put_table;
+	}
+
+	pdata.sdei_event = agdi_table->sdei_event;
+
+	pdev = platform_device_register_data(NULL, "agdi", 0, &pdata, sizeof(pdata));
+	if (IS_ERR(pdev))
+		goto err_put_table;
+
+	if (platform_driver_register(&agdi_driver))
+		platform_device_unregister(pdev);
+
+err_put_table:
+	acpi_put_table((struct acpi_table_header *)agdi_table);
+}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index cd374210fb9f7..747a7f98fc1ba 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,6 +26,7 @@
 #include <asm/mpspec.h>
 #include <linux/dmi.h>
 #endif
+#include <linux/acpi_agdi.h>
 #include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
@@ -1341,6 +1342,7 @@ static int __init acpi_init(void)
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
+	acpi_agdi_init();
 	return 0;
 }
 
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h
new file mode 100644
index 0000000000000..f477f0b452fa0
--- /dev/null
+++ b/include/linux/acpi_agdi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_AGDI_H__
+#define __ACPI_AGDI_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_AGDI
+void __init acpi_agdi_init(void);
+#else
+static inline void acpi_agdi_init(void) {}
+#endif
+#endif /* __ACPI_AGDI_H__ */
-- 
GitLab


From 1132e6de11cfc334b44f609792664f1bc3055c52 Mon Sep 17 00:00:00 2001
From: Ionela Voinescu <ionela.voinescu@arm.com>
Date: Thu, 10 Mar 2022 14:54:49 +0000
Subject: [PATCH 1236/1586] x86, ACPI: rename init_freq_invariance_cppc() to
 arch_init_invariance_cppc()

init_freq_invariance_cppc() was called in acpi_cppc_processor_probe(),
after CPU performance information and controls were populated from the
per-cpu _CPC objects.

But these _CPC objects provide information that helps with both CPU
(u-arch) and frequency invariance. Therefore, change the function name
to a more generic one, while adding the arch_ prefix, as this function
is expected to be defined differently by different architectures.

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/topology.h | 2 +-
 drivers/acpi/cppc_acpi.c        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 9c73d62f72760..c512c647b4fc6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -227,7 +227,7 @@ static inline void init_freq_invariance(bool secondary, bool cppc_ready)
 
 #ifdef CONFIG_ACPI_CPPC_LIB
 void init_freq_invariance_cppc(void);
-#define init_freq_invariance_cppc init_freq_invariance_cppc
+#define arch_init_invariance_cppc init_freq_invariance_cppc
 
 bool amd_set_max_freq_ratio(u64 *ratio);
 #else
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 866560cbb082c..bfd142ab4e073 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -633,8 +633,8 @@ static bool is_cppc_supported(int revision, int num_ent)
  *  )
  */
 
-#ifndef init_freq_invariance_cppc
-static inline void init_freq_invariance_cppc(void) { }
+#ifndef arch_init_invariance_cppc
+static inline void arch_init_invariance_cppc(void) { }
 #endif
 
 /**
@@ -816,7 +816,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 		goto out_free;
 	}
 
-	init_freq_invariance_cppc();
+	arch_init_invariance_cppc();
 
 	kfree(output.pointer);
 	return 0;
-- 
GitLab


From 9924fbb51e0ae30b8d7eec7c1c839d74da9678b3 Mon Sep 17 00:00:00 2001
From: Ionela Voinescu <ionela.voinescu@arm.com>
Date: Thu, 10 Mar 2022 14:54:50 +0000
Subject: [PATCH 1237/1586] arch_topology: obtain cpu capacity using
 information from CPPC

Define topology_init_cpu_capacity_cppc() to use highest performance
values from _CPC objects to obtain and set maximum capacity information
for each CPU. acpi_cppc_processor_probe() is a good point at which to
trigger the initialization of CPU (u-arch) capacity values, as at this
point the highest performance values can be obtained from each CPU's
_CPC objects. Architectures can therefore use this functionality
through arch_init_invariance_cppc().

The performance scale used by CPPC is a unified scale for all CPUs in
the system. Therefore, by obtaining the raw highest performance values
from the _CPC objects, and normalizing them on the [0, 1024] capacity
scale, used by the task scheduler, we obtain the CPU capacity of each
CPU.

While an ACPI Notify(0x85) could alert about a change in the highest
performance value, which should in turn retrigger the CPU capacity
computations, this notification is not currently handled by the ACPI
processor driver. When supported, a call to arch_init_invariance_cppc()
would perform the update.

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/arch_topology.c  | 45 ++++++++++++++++++++++++++++++++---
 include/linux/arch_topology.h |  4 ++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 976154140f0b0..1d6636ebaac5b 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -339,6 +339,46 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 	return !ret;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+void topology_init_cpu_capacity_cppc(void)
+{
+	struct cppc_perf_caps perf_caps;
+	int cpu;
+
+	if (likely(acpi_disabled || !acpi_cpc_valid()))
+		return;
+
+	raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
+			       GFP_KERNEL);
+	if (!raw_capacity)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		if (!cppc_get_perf_caps(cpu, &perf_caps) &&
+		    (perf_caps.highest_perf >= perf_caps.nominal_perf) &&
+		    (perf_caps.highest_perf >= perf_caps.lowest_perf)) {
+			raw_capacity[cpu] = perf_caps.highest_perf;
+			pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n",
+				 cpu, raw_capacity[cpu]);
+			continue;
+		}
+
+		pr_err("cpu_capacity: CPU%d missing/invalid highest performance.\n", cpu);
+		pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+		goto exit;
+	}
+
+	topology_normalize_cpu_scale();
+	schedule_work(&update_topology_flags_work);
+	pr_debug("cpu_capacity: cpu_capacity initialization done\n");
+
+exit:
+	free_raw_capacity();
+}
+#endif
+
 #ifdef CONFIG_CPU_FREQ
 static cpumask_var_t cpus_to_visit;
 static void parsing_done_workfn(struct work_struct *work);
@@ -387,9 +427,8 @@ static int __init register_cpufreq_notifier(void)
 	int ret;
 
 	/*
-	 * on ACPI-based systems we need to use the default cpu capacity
-	 * until we have the necessary code to parse the cpu capacity, so
-	 * skip registering cpufreq notifier.
+	 * On ACPI-based systems skip registering cpufreq notifier as cpufreq
+	 * information is not needed for cpu capacity initialization.
 	 */
 	if (!acpi_disabled || !raw_capacity)
 		return -EINVAL;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index cce6136b300a6..58cbe18d825c2 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,6 +11,10 @@
 void topology_normalize_cpu_scale(void);
 int topology_update_cpu_topology(void);
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+void topology_init_cpu_capacity_cppc(void);
+#endif
+
 struct device_node;
 bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
 
-- 
GitLab


From 82909316caac1ef3320b94c57bd0915aac90b0bd Mon Sep 17 00:00:00 2001
From: Ionela Voinescu <ionela.voinescu@arm.com>
Date: Thu, 10 Mar 2022 14:54:51 +0000
Subject: [PATCH 1238/1586] arm64, topology: enable use of
 init_cpu_capacity_cppc()

Now that the arch topology driver provides a method of setting CPU
capacity values based on information on highest performance from CPPC,
use this functionality on arm64 platforms.

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/include/asm/topology.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index f386b90a79c85..9fab663dd2de9 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -24,6 +24,10 @@ void update_freq_counters_refs(void);
 #define arch_scale_freq_capacity topology_get_freq_scale
 #define arch_scale_freq_invariant topology_scale_freq_invariant
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
+#endif
+
 /* Replace task scheduler's default cpu-invariant accounting */
 #define arch_scale_cpu_capacity topology_get_cpu_scale
 
-- 
GitLab


From bcbb7bf6ccde7cb969a5642879832bc84ebf06a3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Mar 2022 12:59:35 -0700
Subject: [PATCH 1239/1586] io_uring: allow submissions to continue on error

By default, io_uring will stop submitting a batch of requests if we run
into an error submitting a request. This isn't strictly necessary, as
the error result is passed out-of-band via a CQE anyway. And it can be
a bit confusing for some applications.

Provide a way to setup a ring that will continue submitting on error,
when the error CQE has been posted.

There's still one case that will break out of submission. If we fail
allocating a request, then we'll still return -ENOMEM. We could in theory
post a CQE for that condition too even if we never got a request. Leave
that for a potential followup.

Reported-by: Dylan Yudaken <dylany@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 12 +++++++++---
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3145c9cacee09..229b31d644ef7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7801,8 +7801,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		}
 		/* will complete beyond this point, count as submitted */
 		submitted++;
-		if (io_submit_sqe(ctx, req, sqe))
-			break;
+		if (io_submit_sqe(ctx, req, sqe)) {
+			/*
+			 * Continue submitting even for sqe failure if the
+			 * ring was setup with IORING_SETUP_SUBMIT_ALL
+			 */
+			if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
+				break;
+		}
 	} while (submitted < nr);
 
 	if (unlikely(submitted != nr)) {
@@ -11265,7 +11271,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
 			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
-			IORING_SETUP_R_DISABLED))
+			IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
 		return -EINVAL;
 
 	return  io_uring_create(entries, &p, params);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8bd4bfdd9a898..d2be4eb220082 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -101,6 +101,7 @@ enum {
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
 
 enum {
 	IORING_OP_NOP,
-- 
GitLab


From ab1fd5abb75001d37e1fef5f310951b56f41ad58 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:18 +0900
Subject: [PATCH 1240/1586] dt-bindings: interrupt-controller: apple,aic2: New
 binding for AICv2

This new incompatible revision of the AIC peripheral introduces
multi-die support. This binding is based on apple,aic, but
changes interrupt-cells to add a new die argument.

Also adds a second reg entry to specify the offset of the event
register. Inexplicably, the capability registers allow us to compute
other register offsets, but not this one. This allows us to keep
forward-compatibility with future SoCs that will likely implement
different die counts, thus shifting the event register. Apple also
specify the offset explicitly in their device tree...

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-3-marcan@marcan.st
---
 .../interrupt-controller/apple,aic2.yaml      | 98 +++++++++++++++++++
 MAINTAINERS                                   |  2 +-
 2 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml

diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
new file mode 100644
index 0000000000000..47a78a167aba7
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/apple,aic2.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple Interrupt Controller 2
+
+maintainers:
+  - Hector Martin <marcan@marcan.st>
+
+description: |
+  The Apple Interrupt Controller 2 is a simple interrupt controller present on
+  Apple ARM SoC platforms starting with t600x (M1 Pro and Max).
+
+  It provides the following features:
+
+  - Level-triggered hardware IRQs wired to SoC blocks
+    - Single mask bit per IRQ
+    - Automatic masking on event delivery (auto-ack)
+    - Software triggering (ORed with hw line)
+  - Automatic prioritization (single event/ack register per CPU, lower IRQs =
+    higher priority)
+  - Automatic masking on ack
+  - Support for multiple dies
+
+  This device also represents the FIQ interrupt sources on platforms using AIC,
+  which do not go through a discrete interrupt controller. It also handles
+  FIQ-based Fast IPIs.
+
+properties:
+  compatible:
+    items:
+      - const: apple,t6000-aic
+      - const: apple,aic2
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 4
+    description: |
+      The 1st cell contains the interrupt type:
+        - 0: Hardware IRQ
+        - 1: FIQ
+
+      The 2nd cell contains the die ID.
+
+      The next cell contains the interrupt number.
+        - HW IRQs: interrupt number
+        - FIQs:
+          - 0: physical HV timer
+          - 1: virtual HV timer
+          - 2: physical guest timer
+          - 3: virtual guest timer
+
+      The last cell contains the interrupt flags. This is normally
+      IRQ_TYPE_LEVEL_HIGH (4).
+
+  reg:
+    items:
+      - description: Address and size of the main AIC2 registers.
+      - description: Address and size of the AIC2 Event register.
+
+  reg-names:
+    items:
+      - const: core
+      - const: event
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - '#interrupt-cells'
+  - interrupt-controller
+  - reg
+  - reg-names
+
+additionalProperties: false
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+
+examples:
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        aic: interrupt-controller@28e100000 {
+            compatible = "apple,t6000-aic", "apple,aic2";
+            #interrupt-cells = <4>;
+            interrupt-controller;
+            reg = <0x2 0x8e100000 0x0 0xc000>,
+                  <0x2 0x8e10c000 0x0 0x4>;
+            reg-names = "core", "event";
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 69a2935daf6c4..7a920a2a5c7a1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1767,7 +1767,7 @@ T:	git https://github.com/AsahiLinux/linux.git
 F:	Documentation/devicetree/bindings/arm/apple.yaml
 F:	Documentation/devicetree/bindings/arm/apple/*
 F:	Documentation/devicetree/bindings/i2c/apple,i2c.yaml
-F:	Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+F:	Documentation/devicetree/bindings/interrupt-controller/apple,*
 F:	Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
 F:	Documentation/devicetree/bindings/pci/apple,pcie.yaml
 F:	Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml
-- 
GitLab


From 2cf68211664acd2e4bdd1fb66697137b30901981 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:19 +0900
Subject: [PATCH 1241/1586] irqchip/apple-aic: Add Fast IPI support

The newer AICv2 present in t600x SoCs does not have legacy IPI support
at all. Since t8103 also supports Fast IPIs, implement support for this
first. The legacy IPI code is left as a fallback, so it can be
potentially used by older SoCs in the future.

The vIPI code is shared; only the IPI firing/acking bits change for Fast
IPIs.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-4-marcan@marcan.st
---
 drivers/irqchip/irq-apple-aic.c | 122 ++++++++++++++++++++++++++++----
 1 file changed, 109 insertions(+), 13 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 38091ebb94033..613e0ebdabdc0 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -24,7 +24,7 @@
  * - Default "this CPU" register view and explicit per-CPU views
  *
  * In addition, this driver also handles FIQs, as these are routed to the same
- * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and
+ * IRQ vector. These are used for Fast IPIs, the ARMv8 timer IRQs, and
  * performance counters (TODO).
  *
  * Implementation notes:
@@ -52,9 +52,11 @@
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-vgic-info.h>
 #include <linux/irqdomain.h>
+#include <linux/jump_label.h>
 #include <linux/limits.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
+#include <asm/cputype.h>
 #include <asm/exception.h>
 #include <asm/sysreg.h>
 #include <asm/virt.h>
@@ -106,7 +108,6 @@
 
 /*
  * IMP-DEF sysregs that control FIQ sources
- * Note: sysreg-based IPIs are not supported yet.
  */
 
 /* Core PMC control register */
@@ -155,6 +156,10 @@
 #define SYS_IMP_APL_UPMSR_EL1		sys_reg(3, 7, 15, 6, 4)
 #define UPMSR_IACT			BIT(0)
 
+/* MPIDR fields */
+#define MPIDR_CPU(x)			MPIDR_AFFINITY_LEVEL(x, 0)
+#define MPIDR_CLUSTER(x)		MPIDR_AFFINITY_LEVEL(x, 1)
+
 #define AIC_NR_FIQ		4
 #define AIC_NR_SWIPI		32
 
@@ -173,11 +178,44 @@
 #define AIC_TMR_EL02_PHYS	AIC_TMR_GUEST_PHYS
 #define AIC_TMR_EL02_VIRT	AIC_TMR_GUEST_VIRT
 
+DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
+
+struct aic_info {
+	int version;
+
+	/* Features */
+	bool fast_ipi;
+};
+
+static const struct aic_info aic1_info = {
+	.version	= 1,
+};
+
+static const struct aic_info aic1_fipi_info = {
+	.version	= 1,
+
+	.fast_ipi	= true,
+};
+
+static const struct of_device_id aic_info_match[] = {
+	{
+		.compatible = "apple,t8103-aic",
+		.data = &aic1_fipi_info,
+	},
+	{
+		.compatible = "apple,aic",
+		.data = &aic1_info,
+	},
+	{}
+};
+
 struct aic_irq_chip {
 	void __iomem *base;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
 	int nr_hw;
+
+	struct aic_info info;
 };
 
 static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
@@ -386,8 +424,12 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 	 */
 
 	if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
-		pr_err_ratelimited("Fast IPI fired. Acking.\n");
-		write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		if (static_branch_likely(&use_fast_ipi)) {
+			aic_handle_ipi(regs);
+		} else {
+			pr_err_ratelimited("Fast IPI fired. Acking.\n");
+			write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		}
 	}
 
 	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
@@ -563,6 +605,22 @@ static const struct irq_domain_ops aic_irq_domain_ops = {
  * IPI irqchip
  */
 
+static void aic_ipi_send_fast(int cpu)
+{
+	u64 mpidr = cpu_logical_map(cpu);
+	u64 my_mpidr = read_cpuid_mpidr();
+	u64 cluster = MPIDR_CLUSTER(mpidr);
+	u64 idx = MPIDR_CPU(mpidr);
+
+	if (MPIDR_CLUSTER(my_mpidr) == cluster)
+		write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx),
+			       SYS_IMP_APL_IPI_RR_LOCAL_EL1);
+	else
+		write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx) | FIELD_PREP(IPI_RR_CLUSTER, cluster),
+			       SYS_IMP_APL_IPI_RR_GLOBAL_EL1);
+	isb();
+}
+
 static void aic_ipi_mask(struct irq_data *d)
 {
 	u32 irq_bit = BIT(irqd_to_hwirq(d));
@@ -588,8 +646,12 @@ static void aic_ipi_unmask(struct irq_data *d)
 	 * If a pending vIPI was unmasked, raise a HW IPI to ourselves.
 	 * No barriers needed here since this is a self-IPI.
 	 */
-	if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)
-		aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+	if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) {
+		if (static_branch_likely(&use_fast_ipi))
+			aic_ipi_send_fast(smp_processor_id());
+		else
+			aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+	}
 }
 
 static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
@@ -617,8 +679,12 @@ static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
 		smp_mb__after_atomic();
 
 		if (!(pending & irq_bit) &&
-		    (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))
-			send |= AIC_IPI_SEND_CPU(cpu);
+		    (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) {
+			if (static_branch_likely(&use_fast_ipi))
+				aic_ipi_send_fast(cpu);
+			else
+				send |= AIC_IPI_SEND_CPU(cpu);
+		}
 	}
 
 	/*
@@ -650,8 +716,16 @@ static void aic_handle_ipi(struct pt_regs *regs)
 	/*
 	 * Ack the IPI. We need to order this after the AIC event read, but
 	 * that is enforced by normal MMIO ordering guarantees.
+	 *
+	 * For the Fast IPI case, this needs to be ordered before the vIPI
+	 * handling below, so we need to isb();
 	 */
-	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+	if (static_branch_likely(&use_fast_ipi)) {
+		write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+		isb();
+	} else {
+		aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+	}
 
 	/*
 	 * The mask read does not need to be ordered. Only we can change
@@ -679,7 +753,8 @@ static void aic_handle_ipi(struct pt_regs *regs)
 	 * No ordering needed here; at worst this just changes the timing of
 	 * when the next IPI will be delivered.
 	 */
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+	if (!static_branch_likely(&use_fast_ipi))
+		aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
 }
 
 static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
@@ -776,10 +851,15 @@ static int aic_init_cpu(unsigned int cpu)
 	/*
 	 * Always keep IPIs unmasked at the hardware level (except auto-masking
 	 * by AIC during processing). We manage masks at the vIPI level.
+	 * These registers only exist on AICv1, AICv2 always uses fast IPIs.
 	 */
 	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
-	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+	if (static_branch_likely(&use_fast_ipi)) {
+		aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER);
+	} else {
+		aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
+		aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+	}
 
 	/* Initialize the local mask state */
 	__this_cpu_write(aic_fiq_unmasked, 0);
@@ -799,6 +879,7 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	void __iomem *regs;
 	u32 info;
 	struct aic_irq_chip *irqc;
+	const struct of_device_id *match;
 
 	regs = of_iomap(node, 0);
 	if (WARN_ON(!regs))
@@ -808,12 +889,24 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	if (!irqc)
 		return -ENOMEM;
 
-	aic_irqc = irqc;
 	irqc->base = regs;
 
+	match = of_match_node(aic_info_match, node);
+	if (!match)
+		return -ENODEV;
+
+	irqc->info = *(struct aic_info *)match->data;
+
+	aic_irqc = irqc;
+
 	info = aic_ic_read(irqc, AIC_INFO);
 	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
 
+	if (irqc->info.fast_ipi)
+		static_branch_enable(&use_fast_ipi);
+	else
+		static_branch_disable(&use_fast_ipi);
+
 	irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),
 						   irqc->nr_hw + AIC_NR_FIQ,
 						   &aic_irq_domain_ops, irqc);
@@ -845,6 +938,9 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	if (!is_kernel_in_hyp_mode())
 		pr_info("Kernel running in EL1, mapping interrupts");
 
+	if (static_branch_likely(&use_fast_ipi))
+		pr_info("Using Fast IPIs");
+
 	cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
 			  "irqchip/apple-aic/ipi:starting",
 			  aic_init_cpu, NULL);
-- 
GitLab


From 7c841f5f6fa3f991cb76b96cd2378337a74011b3 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:20 +0900
Subject: [PATCH 1242/1586] irqchip/apple-aic: Switch to irq_domain_create_tree
 and sparse hwirqs

This allows us to directly use the hardware event number as the hwirq
number. Since IRQ events have bit 16 set (type=1), FIQs now move to
starting at hwirq number 0.

This will become more important once multi-die support is introduced in
a later commit.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-5-marcan@marcan.st
---
 drivers/irqchip/irq-apple-aic.c | 71 ++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 613e0ebdabdc0..96480389195d4 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -68,7 +68,7 @@
  */
 
 #define AIC_INFO		0x0004
-#define AIC_INFO_NR_HW		GENMASK(15, 0)
+#define AIC_INFO_NR_IRQ		GENMASK(15, 0)
 
 #define AIC_CONFIG		0x0010
 
@@ -77,7 +77,8 @@
 #define AIC_EVENT_TYPE		GENMASK(31, 16)
 #define AIC_EVENT_NUM		GENMASK(15, 0)
 
-#define AIC_EVENT_TYPE_HW	1
+#define AIC_EVENT_TYPE_FIQ	0 /* Software use */
+#define AIC_EVENT_TYPE_IRQ	1
 #define AIC_EVENT_TYPE_IPI	4
 #define AIC_EVENT_IPI_OTHER	1
 #define AIC_EVENT_IPI_SELF	2
@@ -160,6 +161,11 @@
 #define MPIDR_CPU(x)			MPIDR_AFFINITY_LEVEL(x, 0)
 #define MPIDR_CLUSTER(x)		MPIDR_AFFINITY_LEVEL(x, 1)
 
+#define AIC_IRQ_HWIRQ(x)	(FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \
+				 FIELD_PREP(AIC_EVENT_NUM, x))
+#define AIC_FIQ_HWIRQ(x)	(FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_FIQ) | \
+				 FIELD_PREP(AIC_EVENT_NUM, x))
+#define AIC_HWIRQ_IRQ(x)	FIELD_GET(AIC_EVENT_NUM, x)
 #define AIC_NR_FIQ		4
 #define AIC_NR_SWIPI		32
 
@@ -213,7 +219,7 @@ struct aic_irq_chip {
 	void __iomem *base;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
-	int nr_hw;
+	int nr_irq;
 
 	struct aic_info info;
 };
@@ -243,18 +249,22 @@ static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
 
 static void aic_irq_mask(struct irq_data *d)
 {
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
-	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)),
-		     MASK_BIT(irqd_to_hwirq(d)));
+	u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_unmask(struct irq_data *d)
 {
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
-	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
-		     MASK_BIT(irqd_to_hwirq(d)));
+	u32 irq = AIC_HWIRQ_IRQ(hwirq);
+
+	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_eoi(struct irq_data *d)
@@ -281,8 +291,8 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 		type = FIELD_GET(AIC_EVENT_TYPE, event);
 		irq = FIELD_GET(AIC_EVENT_NUM, event);
 
-		if (type == AIC_EVENT_TYPE_HW)
-			generic_handle_domain_irq(aic_irqc->hw_domain, irq);
+		if (type == AIC_EVENT_TYPE_IRQ)
+			generic_handle_domain_irq(aic_irqc->hw_domain, event);
 		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
 			aic_handle_ipi(regs);
 		else if (event != 0)
@@ -314,7 +324,7 @@ static int aic_irq_set_affinity(struct irq_data *d,
 	else
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
 
-	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
+	aic_ic_write(ic, AIC_TARGET_CPU + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu));
 	irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
 	return IRQ_SET_MASK_OK;
@@ -344,9 +354,7 @@ static struct irq_chip aic_chip = {
 
 static unsigned long aic_fiq_get_idx(struct irq_data *d)
 {
-	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
-
-	return irqd_to_hwirq(d) - ic->nr_hw;
+	return AIC_HWIRQ_IRQ(irqd_to_hwirq(d));
 }
 
 static void aic_fiq_set_mask(struct irq_data *d)
@@ -434,11 +442,11 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 
 	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
 		generic_handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL0_PHYS);
+					  AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS));
 
 	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
 		generic_handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL0_VIRT);
+					  AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT));
 
 	if (is_kernel_in_hyp_mode()) {
 		uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);
@@ -446,12 +454,12 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 		if ((enabled & VM_TMR_FIQ_ENABLE_P) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
 			generic_handle_domain_irq(aic_irqc->hw_domain,
-						  aic_irqc->nr_hw + AIC_TMR_EL02_PHYS);
+						  AIC_FIQ_HWIRQ(AIC_TMR_EL02_PHYS));
 
 		if ((enabled & VM_TMR_FIQ_ENABLE_V) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
 			generic_handle_domain_irq(aic_irqc->hw_domain,
-						  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
+						  AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT));
 	}
 
 	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
@@ -496,9 +504,9 @@ static struct irq_chip fiq_chip = {
 static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
 			      irq_hw_number_t hw)
 {
-	struct aic_irq_chip *ic = id->host_data;
+	u32 type = FIELD_GET(AIC_EVENT_TYPE, hw);
 
-	if (hw < ic->nr_hw) {
+	if (type == AIC_EVENT_TYPE_IRQ) {
 		irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
@@ -523,14 +531,14 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 
 	switch (fwspec->param[0]) {
 	case AIC_IRQ:
-		if (fwspec->param[1] >= ic->nr_hw)
+		if (fwspec->param[1] >= ic->nr_irq)
 			return -EINVAL;
-		*hwirq = fwspec->param[1];
+		*hwirq = AIC_IRQ_HWIRQ(fwspec->param[1]);
 		break;
 	case AIC_FIQ:
 		if (fwspec->param[1] >= AIC_NR_FIQ)
 			return -EINVAL;
-		*hwirq = ic->nr_hw + fwspec->param[1];
+		*hwirq = AIC_FIQ_HWIRQ(fwspec->param[1]);
 
 		/*
 		 * In EL1 the non-redirected registers are the guest's,
@@ -539,10 +547,10 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 		if (!is_kernel_in_hyp_mode()) {
 			switch (fwspec->param[1]) {
 			case AIC_TMR_GUEST_PHYS:
-				*hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS;
+				*hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS);
 				break;
 			case AIC_TMR_GUEST_VIRT:
-				*hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT;
+				*hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT);
 				break;
 			case AIC_TMR_HV_PHYS:
 			case AIC_TMR_HV_VIRT:
@@ -900,16 +908,15 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	aic_irqc = irqc;
 
 	info = aic_ic_read(irqc, AIC_INFO);
-	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
+	irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
 
 	if (irqc->info.fast_ipi)
 		static_branch_enable(&use_fast_ipi);
 	else
 		static_branch_disable(&use_fast_ipi);
 
-	irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),
-						   irqc->nr_hw + AIC_NR_FIQ,
-						   &aic_irq_domain_ops, irqc);
+	irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
+						 &aic_irq_domain_ops, irqc);
 	if (WARN_ON(!irqc->hw_domain)) {
 		iounmap(irqc->base);
 		kfree(irqc);
@@ -928,11 +935,11 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	set_handle_irq(aic_handle_irq);
 	set_handle_fiq(aic_handle_fiq);
 
-	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
 		aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);
-	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
 		aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);
-	for (i = 0; i < irqc->nr_hw; i++)
+	for (i = 0; i < irqc->nr_irq; i++)
 		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
 
 	if (!is_kernel_in_hyp_mode())
@@ -948,7 +955,7 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	vgic_set_kvm_info(&vgic_info);
 
 	pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
-		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
+		irqc->nr_irq, AIC_NR_FIQ, AIC_NR_SWIPI);
 
 	return 0;
 }
-- 
GitLab


From dc97fd6fec009957e81026055fc99a03877ff3b8 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:21 +0900
Subject: [PATCH 1243/1586] irqchip/apple-aic: Dynamically compute register
 offsets

This allows us to support AIC variants with different numbers of IRQs
based on capability registers.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-6-marcan@marcan.st
---
 drivers/irqchip/irq-apple-aic.c | 72 +++++++++++++++++++++++++--------
 1 file changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 96480389195d4..4b1ba732f4764 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -64,7 +64,7 @@
 #include <dt-bindings/interrupt-controller/apple-aic.h>
 
 /*
- * AIC registers (MMIO)
+ * AIC v1 registers (MMIO)
  */
 
 #define AIC_INFO		0x0004
@@ -94,16 +94,14 @@
 #define AIC_IPI_SELF		BIT(31)
 
 #define AIC_TARGET_CPU		0x3000
-#define AIC_SW_SET		0x4000
-#define AIC_SW_CLR		0x4080
-#define AIC_MASK_SET		0x4100
-#define AIC_MASK_CLR		0x4180
 
 #define AIC_CPU_IPI_SET(cpu)	(0x5008 + ((cpu) << 7))
 #define AIC_CPU_IPI_CLR(cpu)	(0x500c + ((cpu) << 7))
 #define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
 #define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
 
+#define AIC_MAX_IRQ		0x400
+
 #define MASK_REG(x)		(4 * ((x) >> 5))
 #define MASK_BIT(x)		BIT((x) & GENMASK(4, 0))
 
@@ -189,17 +187,31 @@ DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
 struct aic_info {
 	int version;
 
+	/* Register offsets */
+	u32 event;
+	u32 target_cpu;
+	u32 sw_set;
+	u32 sw_clr;
+	u32 mask_set;
+	u32 mask_clr;
+
 	/* Features */
 	bool fast_ipi;
 };
 
 static const struct aic_info aic1_info = {
 	.version	= 1,
+
+	.event		= AIC_EVENT,
+	.target_cpu	= AIC_TARGET_CPU,
 };
 
 static const struct aic_info aic1_fipi_info = {
 	.version	= 1,
 
+	.event		= AIC_EVENT,
+	.target_cpu	= AIC_TARGET_CPU,
+
 	.fast_ipi	= true,
 };
 
@@ -219,7 +231,9 @@ struct aic_irq_chip {
 	void __iomem *base;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
+
 	int nr_irq;
+	int max_irq;
 
 	struct aic_info info;
 };
@@ -254,7 +268,7 @@ static void aic_irq_mask(struct irq_data *d)
 
 	u32 irq = AIC_HWIRQ_IRQ(hwirq);
 
-	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irq), MASK_BIT(irq));
+	aic_ic_write(ic, ic->info.mask_set + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_unmask(struct irq_data *d)
@@ -264,7 +278,7 @@ static void aic_irq_unmask(struct irq_data *d)
 
 	u32 irq = AIC_HWIRQ_IRQ(hwirq);
 
-	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(irq), MASK_BIT(irq));
+	aic_ic_write(ic, ic->info.mask_clr + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_eoi(struct irq_data *d)
@@ -287,7 +301,7 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 		 * We cannot use a relaxed read here, as reads from DMA buffers
 		 * need to be ordered after the IRQ fires.
 		 */
-		event = readl(ic->base + AIC_EVENT);
+		event = readl(ic->base + ic->info.event);
 		type = FIELD_GET(AIC_EVENT_TYPE, event);
 		irq = FIELD_GET(AIC_EVENT_NUM, event);
 
@@ -319,12 +333,14 @@ static int aic_irq_set_affinity(struct irq_data *d,
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 	int cpu;
 
+	BUG_ON(!ic->info.target_cpu);
+
 	if (force)
 		cpu = cpumask_first(mask_val);
 	else
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
 
-	aic_ic_write(ic, AIC_TARGET_CPU + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu));
+	aic_ic_write(ic, ic->info.target_cpu + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu));
 	irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
 	return IRQ_SET_MASK_OK;
@@ -884,8 +900,8 @@ static struct gic_kvm_info vgic_info __initdata = {
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
 	int i;
+	u32 off;
 	void __iomem *regs;
-	u32 info;
 	struct aic_irq_chip *irqc;
 	const struct of_device_id *match;
 
@@ -907,8 +923,30 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 
 	aic_irqc = irqc;
 
-	info = aic_ic_read(irqc, AIC_INFO);
-	irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
+	switch (irqc->info.version) {
+	case 1: {
+		u32 info;
+
+		info = aic_ic_read(irqc, AIC_INFO);
+		irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
+		irqc->max_irq = AIC_MAX_IRQ;
+
+		off = irqc->info.target_cpu;
+		off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */
+
+		break;
+	}
+	}
+
+	irqc->info.sw_set = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_SET */
+	irqc->info.sw_clr = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_CLR */
+	irqc->info.mask_set = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_SET */
+	irqc->info.mask_clr = off;
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_CLR */
+	off += sizeof(u32) * (irqc->max_irq >> 5); /* HW_STATE */
 
 	if (irqc->info.fast_ipi)
 		static_branch_enable(&use_fast_ipi);
@@ -936,11 +974,11 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	set_handle_fiq(aic_handle_fiq);
 
 	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
-		aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);
+		aic_ic_write(irqc, irqc->info.mask_set + i * 4, U32_MAX);
 	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
-		aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);
+		aic_ic_write(irqc, irqc->info.sw_clr + i * 4, U32_MAX);
 	for (i = 0; i < irqc->nr_irq; i++)
-		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
+		aic_ic_write(irqc, irqc->info.target_cpu + i * 4, 1);
 
 	if (!is_kernel_in_hyp_mode())
 		pr_info("Kernel running in EL1, mapping interrupts");
@@ -954,8 +992,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 
 	vgic_set_kvm_info(&vgic_info);
 
-	pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
-		irqc->nr_irq, AIC_NR_FIQ, AIC_NR_SWIPI);
+	pr_info("Initialized with %d/%d IRQs, %d FIQs, %d vIPIs",
+		irqc->nr_irq, irqc->max_irq, AIC_NR_FIQ, AIC_NR_SWIPI);
 
 	return 0;
 }
-- 
GitLab


From a801f0ee563b8180caf186493806a145a75b4a3c Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:22 +0900
Subject: [PATCH 1244/1586] irqchip/apple-aic: Support multiple dies

Multi-die support in AICv2 uses several sets of IRQ registers. Introduce
a die count and compute the register group offset based on the die ID
field of the hwirq number, as reported by the hardware.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-7-marcan@marcan.st
---
 drivers/irqchip/irq-apple-aic.c | 77 +++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 4b1ba732f4764..93c622435ba25 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -74,7 +74,8 @@
 
 #define AIC_WHOAMI		0x2000
 #define AIC_EVENT		0x2004
-#define AIC_EVENT_TYPE		GENMASK(31, 16)
+#define AIC_EVENT_DIE		GENMASK(31, 24)
+#define AIC_EVENT_TYPE		GENMASK(23, 16)
 #define AIC_EVENT_NUM		GENMASK(15, 0)
 
 #define AIC_EVENT_TYPE_FIQ	0 /* Software use */
@@ -159,11 +160,13 @@
 #define MPIDR_CPU(x)			MPIDR_AFFINITY_LEVEL(x, 0)
 #define MPIDR_CLUSTER(x)		MPIDR_AFFINITY_LEVEL(x, 1)
 
-#define AIC_IRQ_HWIRQ(x)	(FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \
-				 FIELD_PREP(AIC_EVENT_NUM, x))
+#define AIC_IRQ_HWIRQ(die, irq)	(FIELD_PREP(AIC_EVENT_DIE, die) | \
+				 FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \
+				 FIELD_PREP(AIC_EVENT_NUM, irq))
 #define AIC_FIQ_HWIRQ(x)	(FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_FIQ) | \
 				 FIELD_PREP(AIC_EVENT_NUM, x))
 #define AIC_HWIRQ_IRQ(x)	FIELD_GET(AIC_EVENT_NUM, x)
+#define AIC_HWIRQ_DIE(x)	FIELD_GET(AIC_EVENT_DIE, x)
 #define AIC_NR_FIQ		4
 #define AIC_NR_SWIPI		32
 
@@ -195,6 +198,8 @@ struct aic_info {
 	u32 mask_set;
 	u32 mask_clr;
 
+	u32 die_stride;
+
 	/* Features */
 	bool fast_ipi;
 };
@@ -234,6 +239,8 @@ struct aic_irq_chip {
 
 	int nr_irq;
 	int max_irq;
+	int nr_die;
+	int max_die;
 
 	struct aic_info info;
 };
@@ -266,9 +273,10 @@ static void aic_irq_mask(struct irq_data *d)
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
+	u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
 	u32 irq = AIC_HWIRQ_IRQ(hwirq);
 
-	aic_ic_write(ic, ic->info.mask_set + MASK_REG(irq), MASK_BIT(irq));
+	aic_ic_write(ic, ic->info.mask_set + off + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_unmask(struct irq_data *d)
@@ -276,9 +284,10 @@ static void aic_irq_unmask(struct irq_data *d)
 	irq_hw_number_t hwirq = irqd_to_hwirq(d);
 	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
 
+	u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride;
 	u32 irq = AIC_HWIRQ_IRQ(hwirq);
 
-	aic_ic_write(ic, ic->info.mask_clr + MASK_REG(irq), MASK_BIT(irq));
+	aic_ic_write(ic, ic->info.mask_clr + off + MASK_REG(irq), MASK_BIT(irq));
 }
 
 static void aic_irq_eoi(struct irq_data *d)
@@ -541,27 +550,41 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 				    unsigned int *type)
 {
 	struct aic_irq_chip *ic = id->host_data;
+	u32 *args;
+	u32 die = 0;
 
-	if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode))
+	if (fwspec->param_count < 3 || fwspec->param_count > 4 ||
+	    !is_of_node(fwspec->fwnode))
 		return -EINVAL;
 
+	args = &fwspec->param[1];
+
+	if (fwspec->param_count == 4) {
+		die = args[0];
+		args++;
+	}
+
 	switch (fwspec->param[0]) {
 	case AIC_IRQ:
-		if (fwspec->param[1] >= ic->nr_irq)
+		if (die >= ic->nr_die)
 			return -EINVAL;
-		*hwirq = AIC_IRQ_HWIRQ(fwspec->param[1]);
+		if (args[0] >= ic->nr_irq)
+			return -EINVAL;
+		*hwirq = AIC_IRQ_HWIRQ(die, args[0]);
 		break;
 	case AIC_FIQ:
-		if (fwspec->param[1] >= AIC_NR_FIQ)
+		if (die != 0)
+			return -EINVAL;
+		if (args[0] >= AIC_NR_FIQ)
 			return -EINVAL;
-		*hwirq = AIC_FIQ_HWIRQ(fwspec->param[1]);
+		*hwirq = AIC_FIQ_HWIRQ(args[0]);
 
 		/*
 		 * In EL1 the non-redirected registers are the guest's,
 		 * not EL2's, so remap the hwirqs to match.
 		 */
 		if (!is_kernel_in_hyp_mode()) {
-			switch (fwspec->param[1]) {
+			switch (args[0]) {
 			case AIC_TMR_GUEST_PHYS:
 				*hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS);
 				break;
@@ -580,7 +603,7 @@ static int aic_irq_domain_translate(struct irq_domain *id,
 		return -EINVAL;
 	}
 
-	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+	*type = args[1] & IRQ_TYPE_SENSE_MASK;
 
 	return 0;
 }
@@ -899,8 +922,8 @@ static struct gic_kvm_info vgic_info __initdata = {
 
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
-	int i;
-	u32 off;
+	int i, die;
+	u32 off, start_off;
 	void __iomem *regs;
 	struct aic_irq_chip *irqc;
 	const struct of_device_id *match;
@@ -930,8 +953,9 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		info = aic_ic_read(irqc, AIC_INFO);
 		irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info);
 		irqc->max_irq = AIC_MAX_IRQ;
+		irqc->nr_die = irqc->max_die = 1;
 
-		off = irqc->info.target_cpu;
+		off = start_off = irqc->info.target_cpu;
 		off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */
 
 		break;
@@ -953,6 +977,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	else
 		static_branch_disable(&use_fast_ipi);
 
+	irqc->info.die_stride = off - start_off;
+
 	irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
 						 &aic_irq_domain_ops, irqc);
 	if (WARN_ON(!irqc->hw_domain)) {
@@ -973,12 +999,17 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 	set_handle_irq(aic_handle_irq);
 	set_handle_fiq(aic_handle_fiq);
 
-	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
-		aic_ic_write(irqc, irqc->info.mask_set + i * 4, U32_MAX);
-	for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
-		aic_ic_write(irqc, irqc->info.sw_clr + i * 4, U32_MAX);
-	for (i = 0; i < irqc->nr_irq; i++)
-		aic_ic_write(irqc, irqc->info.target_cpu + i * 4, 1);
+	off = 0;
+	for (die = 0; die < irqc->nr_die; die++) {
+		for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+			aic_ic_write(irqc, irqc->info.mask_set + off + i * 4, U32_MAX);
+		for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++)
+			aic_ic_write(irqc, irqc->info.sw_clr + off + i * 4, U32_MAX);
+		if (irqc->info.target_cpu)
+			for (i = 0; i < irqc->nr_irq; i++)
+				aic_ic_write(irqc, irqc->info.target_cpu + off + i * 4, 1);
+		off += irqc->info.die_stride;
+	}
 
 	if (!is_kernel_in_hyp_mode())
 		pr_info("Kernel running in EL1, mapping interrupts");
@@ -992,8 +1023,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 
 	vgic_set_kvm_info(&vgic_info);
 
-	pr_info("Initialized with %d/%d IRQs, %d FIQs, %d vIPIs",
-		irqc->nr_irq, irqc->max_irq, AIC_NR_FIQ, AIC_NR_SWIPI);
+	pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs",
+		irqc->nr_irq, irqc->max_irq, irqc->nr_die, irqc->max_die, AIC_NR_FIQ, AIC_NR_SWIPI);
 
 	return 0;
 }
-- 
GitLab


From 768d4435de2a042f35069c68587d8e6702102248 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Thu, 10 Mar 2022 04:21:23 +0900
Subject: [PATCH 1245/1586] irqchip/apple-aic: Add support for AICv2

Introduce support for the new AICv2 hardware block in t6000/t6001 SoCs.

It seems these blocks are missing the information required to compute
the event register offset in the capability registers, so we specify
that in the DT as a second reg entry.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220309192123.152028-8-marcan@marcan.st
---
 drivers/irqchip/irq-apple-aic.c | 181 ++++++++++++++++++++++++++------
 1 file changed, 148 insertions(+), 33 deletions(-)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 93c622435ba25..ba2dff70dff54 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -103,6 +103,58 @@
 
 #define AIC_MAX_IRQ		0x400
 
+/*
+ * AIC v2 registers (MMIO)
+ */
+
+#define AIC2_VERSION		0x0000
+#define AIC2_VERSION_VER	GENMASK(7, 0)
+
+#define AIC2_INFO1		0x0004
+#define AIC2_INFO1_NR_IRQ	GENMASK(15, 0)
+#define AIC2_INFO1_LAST_DIE	GENMASK(27, 24)
+
+#define AIC2_INFO2		0x0008
+
+#define AIC2_INFO3		0x000c
+#define AIC2_INFO3_MAX_IRQ	GENMASK(15, 0)
+#define AIC2_INFO3_MAX_DIE	GENMASK(27, 24)
+
+#define AIC2_RESET		0x0010
+#define AIC2_RESET_RESET	BIT(0)
+
+#define AIC2_CONFIG		0x0014
+#define AIC2_CONFIG_ENABLE	BIT(0)
+#define AIC2_CONFIG_PREFER_PCPU	BIT(28)
+
+#define AIC2_TIMEOUT		0x0028
+#define AIC2_CLUSTER_PRIO	0x0030
+#define AIC2_DELAY_GROUPS	0x0100
+
+#define AIC2_IRQ_CFG		0x2000
+
+/*
+ * AIC2 registers are laid out like this, starting at AIC2_IRQ_CFG:
+ *
+ * Repeat for each die:
+ *   IRQ_CFG: u32 * MAX_IRQS
+ *   SW_SET: u32 * (MAX_IRQS / 32)
+ *   SW_CLR: u32 * (MAX_IRQS / 32)
+ *   MASK_SET: u32 * (MAX_IRQS / 32)
+ *   MASK_CLR: u32 * (MAX_IRQS / 32)
+ *   HW_STATE: u32 * (MAX_IRQS / 32)
+ *
+ * This is followed by a set of event registers, each 16K page aligned.
+ * The first one is the AP event register we will use. Unfortunately,
+ * the actual implemented die count is not specified anywhere in the
+ * capability registers, so we have to explicitly specify the event
+ * register as a second reg entry in the device tree to remain
+ * forward-compatible.
+ */
+
+#define AIC2_IRQ_CFG_TARGET	GENMASK(3, 0)
+#define AIC2_IRQ_CFG_DELAY_IDX	GENMASK(7, 5)
+
 #define MASK_REG(x)		(4 * ((x) >> 5))
 #define MASK_BIT(x)		BIT((x) & GENMASK(4, 0))
 
@@ -193,6 +245,7 @@ struct aic_info {
 	/* Register offsets */
 	u32 event;
 	u32 target_cpu;
+	u32 irq_cfg;
 	u32 sw_set;
 	u32 sw_clr;
 	u32 mask_set;
@@ -220,6 +273,14 @@ static const struct aic_info aic1_fipi_info = {
 	.fast_ipi	= true,
 };
 
+static const struct aic_info aic2_info = {
+	.version	= 2,
+
+	.irq_cfg	= AIC2_IRQ_CFG,
+
+	.fast_ipi	= true,
+};
+
 static const struct of_device_id aic_info_match[] = {
 	{
 		.compatible = "apple,t8103-aic",
@@ -229,11 +290,16 @@ static const struct of_device_id aic_info_match[] = {
 		.compatible = "apple,aic",
 		.data = &aic1_info,
 	},
+	{
+		.compatible = "apple,aic2",
+		.data = &aic2_info,
+	},
 	{}
 };
 
 struct aic_irq_chip {
 	void __iomem *base;
+	void __iomem *event;
 	struct irq_domain *hw_domain;
 	struct irq_domain *ipi_domain;
 
@@ -310,7 +376,7 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 		 * We cannot use a relaxed read here, as reads from DMA buffers
 		 * need to be ordered after the IRQ fires.
 		 */
-		event = readl(ic->base + ic->info.event);
+		event = readl(ic->event + ic->info.event);
 		type = FIELD_GET(AIC_EVENT_TYPE, event);
 		irq = FIELD_GET(AIC_EVENT_NUM, event);
 
@@ -373,6 +439,14 @@ static struct irq_chip aic_chip = {
 	.irq_set_type = aic_irq_set_type,
 };
 
+static struct irq_chip aic2_chip = {
+	.name = "AIC2",
+	.irq_mask = aic_irq_mask,
+	.irq_unmask = aic_irq_unmask,
+	.irq_eoi = aic_irq_eoi,
+	.irq_set_type = aic_irq_set_type,
+};
+
 /*
  * FIQ irqchip
  */
@@ -529,10 +603,15 @@ static struct irq_chip fiq_chip = {
 static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
 			      irq_hw_number_t hw)
 {
+	struct aic_irq_chip *ic = id->host_data;
 	u32 type = FIELD_GET(AIC_EVENT_TYPE, hw);
+	struct irq_chip *chip = &aic_chip;
+
+	if (ic->info.version == 2)
+		chip = &aic2_chip;
 
 	if (type == AIC_EVENT_TYPE_IRQ) {
-		irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,
+		irq_domain_set_info(id, irq, hw, chip, id->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
 	} else {
@@ -888,24 +967,26 @@ static int aic_init_cpu(unsigned int cpu)
 	/* Commit all of the above */
 	isb();
 
-	/*
-	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
-	 * If we ever end up with a mismatch here, we will have to introduce
-	 * a mapping table similar to what other irqchip drivers do.
-	 */
-	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
+	if (aic_irqc->info.version == 1) {
+		/*
+		 * Make sure the kernel's idea of logical CPU order is the same as AIC's
+		 * If we ever end up with a mismatch here, we will have to introduce
+		 * a mapping table similar to what other irqchip drivers do.
+		 */
+		WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
 
-	/*
-	 * Always keep IPIs unmasked at the hardware level (except auto-masking
-	 * by AIC during processing). We manage masks at the vIPI level.
-	 * These registers only exist on AICv1, AICv2 always uses fast IPIs.
-	 */
-	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
-	if (static_branch_likely(&use_fast_ipi)) {
-		aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER);
-	} else {
-		aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
-		aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+		/*
+		 * Always keep IPIs unmasked at the hardware level (except auto-masking
+		 * by AIC during processing). We manage masks at the vIPI level.
+		 * These registers only exist on AICv1, AICv2 always uses fast IPIs.
+		 */
+		aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
+		if (static_branch_likely(&use_fast_ipi)) {
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER);
+		} else {
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
+			aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+		}
 	}
 
 	/* Initialize the local mask state */
@@ -933,14 +1014,16 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		return -EIO;
 
 	irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
-	if (!irqc)
+	if (!irqc) {
+		iounmap(regs);
 		return -ENOMEM;
+	}
 
 	irqc->base = regs;
 
 	match = of_match_node(aic_info_match, node);
 	if (!match)
-		return -ENODEV;
+		goto err_unmap;
 
 	irqc->info = *(struct aic_info *)match->data;
 
@@ -958,6 +1041,28 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		off = start_off = irqc->info.target_cpu;
 		off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */
 
+		irqc->event = irqc->base;
+
+		break;
+	}
+	case 2: {
+		u32 info1, info3;
+
+		info1 = aic_ic_read(irqc, AIC2_INFO1);
+		info3 = aic_ic_read(irqc, AIC2_INFO3);
+
+		irqc->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1);
+		irqc->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3);
+		irqc->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1;
+		irqc->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3);
+
+		off = start_off = irqc->info.irq_cfg;
+		off += sizeof(u32) * irqc->max_irq; /* IRQ_CFG */
+
+		irqc->event = of_iomap(node, 1);
+		if (WARN_ON(!irqc->event))
+			goto err_unmap;
+
 		break;
 	}
 	}
@@ -981,20 +1086,13 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 
 	irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node),
 						 &aic_irq_domain_ops, irqc);
-	if (WARN_ON(!irqc->hw_domain)) {
-		iounmap(irqc->base);
-		kfree(irqc);
-		return -ENODEV;
-	}
+	if (WARN_ON(!irqc->hw_domain))
+		goto err_unmap;
 
 	irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
 
-	if (aic_init_smp(irqc, node)) {
-		irq_domain_remove(irqc->hw_domain);
-		iounmap(irqc->base);
-		kfree(irqc);
-		return -ENODEV;
-	}
+	if (aic_init_smp(irqc, node))
+		goto err_remove_domain;
 
 	set_handle_irq(aic_handle_irq);
 	set_handle_fiq(aic_handle_fiq);
@@ -1011,6 +1109,13 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		off += irqc->info.die_stride;
 	}
 
+	if (irqc->info.version == 2) {
+		u32 config = aic_ic_read(irqc, AIC2_CONFIG);
+
+		config |= AIC2_CONFIG_ENABLE;
+		aic_ic_write(irqc, AIC2_CONFIG, config);
+	}
+
 	if (!is_kernel_in_hyp_mode())
 		pr_info("Kernel running in EL1, mapping interrupts");
 
@@ -1027,6 +1132,16 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 		irqc->nr_irq, irqc->max_irq, irqc->nr_die, irqc->max_die, AIC_NR_FIQ, AIC_NR_SWIPI);
 
 	return 0;
+
+err_remove_domain:
+	irq_domain_remove(irqc->hw_domain);
+err_unmap:
+	if (irqc->event && irqc->event != irqc->base)
+		iounmap(irqc->event);
+	iounmap(irqc->base);
+	kfree(irqc);
+	return -ENODEV;
 }
 
-IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic, "apple,aic", aic_of_ic_init);
+IRQCHIP_DECLARE(apple_aic2, "apple,aic2", aic_of_ic_init);
-- 
GitLab


From 85d9abcd7331566781b93ff46e4bccd4806ef2b2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 11 Mar 2022 11:35:27 +0100
Subject: [PATCH 1246/1586] xen/blkfront: speed up purge_persistent_grants()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

purge_persistent_grants() is scanning the grants list for persistent
grants being no longer in use by the backend. When having found such a
grant, it will be set to "invalid" and pushed to the tail of the list.

Instead of pushing it directly to the end of the list, add it first to
a temporary list, avoiding to scan those entries again in the main
list traversal. After having finished the scan, append the temporary
list to the grant list.

Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Link: https://lore.kernel.org/r/20220311103527.12931-1-jgross@suse.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ccd0dd0c6b83c..5d04e480f36a7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2521,6 +2521,7 @@ static void purge_persistent_grants(struct blkfront_info *info)
 
 	for_each_rinfo(info, rinfo, i) {
 		struct grant *gnt_list_entry, *tmp;
+		LIST_HEAD(grants);
 
 		spin_lock_irqsave(&rinfo->ring_lock, flags);
 
@@ -2539,9 +2540,11 @@ static void purge_persistent_grants(struct blkfront_info *info)
 			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
 			rinfo->persistent_gnts_c--;
 			gnt_list_entry->gref = GRANT_INVALID_REF;
-			list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+			list_add_tail(&gnt_list_entry->node, &grants);
 		}
 
+		list_splice_tail(&grants, &rinfo->grants);
+
 		spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 	}
 }
-- 
GitLab


From 1889421a891ff439b25495011b8b75f81660abca Mon Sep 17 00:00:00 2001
From: Kuldeep Singh <singh.kuldeep87k@gmail.com>
Date: Wed, 9 Mar 2022 22:48:47 +0530
Subject: [PATCH 1247/1586] spi: Update clock-names property for arm pl022

PL022 has two input clocks named sspclk and apb_pclk. Current schema
refers to two notations of sspclk which are indeed same and thus one can
be dropped. Update clock-names property to reflect the same.

Signed-off-by: Kuldeep Singh <singh.kuldeep87k@gmail.com>
Link: https://lore.kernel.org/r/20220309171847.5345-1-singh.kuldeep87k@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-pl022.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
index 6d633728fc2be..bda45ff3d2942 100644
--- a/Documentation/devicetree/bindings/spi/spi-pl022.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
@@ -38,9 +38,7 @@ properties:
 
   clock-names:
     items:
-      - enum:
-          - SSPCLK
-          - sspclk
+      - const: sspclk
       - const: apb_pclk
 
   pl022,autosuspend-delay:
-- 
GitLab


From 5f9a62ff7d2808c7b56c0ec90f3b7eae5872afe6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 6 Feb 2022 12:25:47 -0500
Subject: [PATCH 1248/1586] NFSD: Remove CONFIG_NFSD_V3

Eventually support for NFSv2 in the Linux NFS server is to be
deprecated and then removed.

However, NFSv2 is the "always supported" version that is available
as soon as CONFIG_NFSD is set.  Before NFSv2 support can be removed,
we need to choose a different "always supported" version.

This patch removes CONFIG_NFSD_V3 so that NFSv3 is always supported,
as NFSv2 is today. When NFSv2 support is removed, NFSv3 will become
the only "always supported" NFS version.

The defconfigs still need to be updated to remove CONFIG_NFSD_V3=y.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/Kconfig       |  2 +-
 fs/nfsd/Kconfig  | 12 +-----------
 fs/nfsd/Makefile |  3 +--
 fs/nfsd/nfsfh.c  |  4 ----
 fs/nfsd/nfsfh.h  | 20 --------------------
 fs/nfsd/nfssvc.c |  2 --
 fs/nfsd/vfs.c    |  9 ---------
 fs/nfsd/vfs.h    |  2 --
 8 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 6c7dc1387beb0..90fdb62545e0b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -344,7 +344,7 @@ config LOCKD
 
 config LOCKD_V4
 	bool
-	depends on NFSD_V3 || NFS_V3
+	depends on NFSD || NFS_V3
 	depends on FILE_LOCKING
 	default y
 
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 3d1d17256a91c..f6a2fd3015e75 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -35,18 +35,9 @@ config NFSD_V2_ACL
 	bool
 	depends on NFSD
 
-config NFSD_V3
-	bool "NFS server support for NFS version 3"
-	depends on NFSD
-	help
-	  This option enables support in your system's NFS server for
-	  version 3 of the NFS protocol (RFC 1813).
-
-	  If unsure, say Y.
-
 config NFSD_V3_ACL
 	bool "NFS server support for the NFSv3 ACL protocol extension"
-	depends on NFSD_V3
+	depends on NFSD
 	select NFSD_V2_ACL
 	help
 	  Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
@@ -70,7 +61,6 @@ config NFSD_V3_ACL
 config NFSD_V4
 	bool "NFS server support for NFS version 4"
 	depends on NFSD && PROC_FS
-	select NFSD_V3
 	select FS_POSIX_ACL
 	select SUNRPC_GSS
 	select CRYPTO
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 3f0983e93a998..805c06d5f1b4b 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -12,9 +12,8 @@ nfsd-y			+= trace.o
 
 nfsd-y 			+= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o \
-			   stats.o filecache.o
+			   stats.o filecache.o nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
-nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o nfs4recover.o
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 145208bcb9bd4..c29baa03dfafd 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -611,8 +611,6 @@ out_negative:
 	return nfserr_serverfault;
 }
 
-#ifdef CONFIG_NFSD_V3
-
 /**
  * fh_fill_pre_attrs - Fill in pre-op attributes
  * @fhp: file handle to be updated
@@ -673,8 +671,6 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
 			nfsd4_change_attribute(&fhp->fh_post_attr, inode);
 }
 
-#endif /* CONFIG_NFSD_V3 */
-
 /*
  * Release a file handle.
  */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 434930d8a946e..fb9d358a267e5 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -90,7 +90,6 @@ typedef struct svc_fh {
 						 * operation
 						 */
 	int			fh_flags;	/* FH flags */
-#ifdef CONFIG_NFSD_V3
 	bool			fh_post_saved;	/* post-op attrs saved */
 	bool			fh_pre_saved;	/* pre-op attrs saved */
 
@@ -107,7 +106,6 @@ typedef struct svc_fh {
 	/* Post-op attributes saved in fh_unlock */
 	struct kstat		fh_post_attr;	/* full attrs after operation */
 	u64			fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
 } svc_fh;
 #define NFSD4_FH_FOREIGN (1<<0)
 #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
@@ -283,8 +281,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
 }
 #endif
 
-#ifdef CONFIG_NFSD_V3
-
 /**
  * fh_clear_pre_post_attrs - Reset pre/post attributes
  * @fhp: file handle to be updated
@@ -327,22 +323,6 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
 extern void fh_fill_pre_attrs(struct svc_fh *fhp);
 extern void fh_fill_post_attrs(struct svc_fh *fhp);
 
-#else /* !CONFIG_NFSD_V3 */
-
-static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
-{
-}
-
-static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
-{
-}
-
-static inline void fh_fill_post_attrs(struct svc_fh *fhp)
-{
-}
-
-#endif /* !CONFIG_NFSD_V3 */
-
 
 /*
  * Lock a file handle/inode
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5abbe5d1c77ff..4bb5baa17040f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -117,9 +117,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 
 static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
-#if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
-#endif
 #if defined(CONFIG_NFSD_V4)
 	[4] = &nfsd_version4,
 #endif
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 91600e71be191..166eb0ba3e713 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -32,9 +32,7 @@
 #include <linux/writeback.h>
 #include <linux/security.h>
 
-#ifdef CONFIG_NFSD_V3
 #include "xdr3.h"
-#endif /* CONFIG_NFSD_V3 */
 
 #ifdef CONFIG_NFSD_V4
 #include "../internal.h"
@@ -608,7 +606,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 #endif /* defined(CONFIG_NFSD_V4) */
 
-#ifdef CONFIG_NFSD_V3
 /*
  * Check server access rights to a file system object
  */
@@ -720,7 +717,6 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
  out:
 	return error;
 }
-#endif /* CONFIG_NFSD_V3 */
 
 int nfsd_open_break_lease(struct inode *inode, int access)
 {
@@ -1113,7 +1109,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_NFSD_V3
 /**
  * nfsd_commit - Commit pending writes to stable storage
  * @rqstp: RPC request being processed
@@ -1190,7 +1185,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
 out:
 	return err;
 }
-#endif /* CONFIG_NFSD_V3 */
 
 static __be32
 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
@@ -1380,8 +1374,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 					rdev, resfhp);
 }
 
-#ifdef CONFIG_NFSD_V3
-
 /*
  * NFSv3 and NFSv4 version of nfsd_create
  */
@@ -1547,7 +1539,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	err = nfserrno(host_err);
 	goto out;
 }
-#endif /* CONFIG_NFSD_V3 */
 
 /*
  * Read a symlink. On entry, *lenp must contain the maximum path length that
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 2c43d10e3cab4..ccb87b2864f64 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -68,7 +68,6 @@ __be32		nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
 __be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
 __be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
 __be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
@@ -76,7 +75,6 @@ __be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 				u32 *verifier, bool *truncp, bool *created);
 __be32		nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
 				u64 offset, u32 count, __be32 *verf);
-#endif /* CONFIG_NFSD_V3 */
 #ifdef CONFIG_NFSD_V4
 __be32		nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			    char *name, void **bufp, int *lenp);
-- 
GitLab


From f3e4080edd2754abbdc3daf9881b1f6e168c6669 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Feb 2022 11:12:09 -0500
Subject: [PATCH 1249/1586] arch: Remove references to CONFIG_NFSD_V3 in the
 default configs

CONFIG_NFSD_V3 has been removed. NFSD support for NFSv3 can no
longer be disabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 arch/alpha/configs/defconfig                | 1 -
 arch/arm/configs/davinci_all_defconfig      | 1 -
 arch/arm/configs/ezx_defconfig              | 1 -
 arch/arm/configs/imote2_defconfig           | 1 -
 arch/arm/configs/integrator_defconfig       | 1 -
 arch/arm/configs/iop32x_defconfig           | 1 -
 arch/arm/configs/keystone_defconfig         | 1 -
 arch/arm/configs/lart_defconfig             | 1 -
 arch/arm/configs/netwinder_defconfig        | 1 -
 arch/arm/configs/versatile_defconfig        | 1 -
 arch/arm/configs/viper_defconfig            | 1 -
 arch/arm/configs/zeus_defconfig             | 1 -
 arch/ia64/configs/zx1_defconfig             | 1 -
 arch/m68k/configs/amiga_defconfig           | 1 -
 arch/m68k/configs/apollo_defconfig          | 1 -
 arch/m68k/configs/atari_defconfig           | 1 -
 arch/m68k/configs/bvme6000_defconfig        | 1 -
 arch/m68k/configs/hp300_defconfig           | 1 -
 arch/m68k/configs/mac_defconfig             | 1 -
 arch/m68k/configs/multi_defconfig           | 1 -
 arch/m68k/configs/mvme147_defconfig         | 1 -
 arch/m68k/configs/mvme16x_defconfig         | 1 -
 arch/m68k/configs/q40_defconfig             | 1 -
 arch/m68k/configs/sun3_defconfig            | 1 -
 arch/m68k/configs/sun3x_defconfig           | 1 -
 arch/mips/configs/cobalt_defconfig          | 1 -
 arch/mips/configs/decstation_64_defconfig   | 1 -
 arch/mips/configs/decstation_defconfig      | 1 -
 arch/mips/configs/decstation_r4k_defconfig  | 1 -
 arch/mips/configs/ip22_defconfig            | 1 -
 arch/mips/configs/ip32_defconfig            | 1 -
 arch/mips/configs/jazz_defconfig            | 1 -
 arch/mips/configs/malta_defconfig           | 1 -
 arch/mips/configs/malta_kvm_defconfig       | 1 -
 arch/mips/configs/maltaup_xpa_defconfig     | 1 -
 arch/mips/configs/rm200_defconfig           | 1 -
 arch/mips/configs/tb0219_defconfig          | 1 -
 arch/mips/configs/tb0226_defconfig          | 1 -
 arch/mips/configs/tb0287_defconfig          | 1 -
 arch/mips/configs/workpad_defconfig         | 1 -
 arch/parisc/configs/generic-32bit_defconfig | 1 -
 arch/powerpc/configs/linkstation_defconfig  | 1 -
 arch/powerpc/configs/mvme5100_defconfig     | 1 -
 arch/sh/configs/ap325rxa_defconfig          | 1 -
 arch/sh/configs/ecovec24_defconfig          | 1 -
 arch/sh/configs/landisk_defconfig           | 1 -
 arch/sh/configs/sdk7780_defconfig           | 1 -
 arch/sh/configs/se7724_defconfig            | 1 -
 arch/sh/configs/sh03_defconfig              | 1 -
 arch/sh/configs/sh7785lcr_32bit_defconfig   | 1 -
 arch/sh/configs/titan_defconfig             | 1 -
 51 files changed, 51 deletions(-)

diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index 7f1ca30b115bd..7e93369308800 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -62,7 +62,6 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index e849367c05662..b58d45a03607e 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -258,7 +258,6 @@ CONFIG_MINIX_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ASCII=m
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index ec84d80096b1c..0788a892e160b 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -309,7 +309,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_SMB_FS=m
 CONFIG_CIFS=m
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 6db871d4e0775..015b7ef237dee 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -283,7 +283,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_SMB_FS=m
 CONFIG_CIFS=m
diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig
index 4dfe321a79f6d..5b485722ccf97 100644
--- a/arch/arm/configs/integrator_defconfig
+++ b/arch/arm/configs/integrator_defconfig
@@ -79,7 +79,6 @@ CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig
index 18a21faa834cb..de2cc6759cae5 100644
--- a/arch/arm/configs/iop32x_defconfig
+++ b/arch/arm/configs/iop32x_defconfig
@@ -94,7 +94,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 33c917df7b329..b1bcb858216b9 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -213,7 +213,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arm/configs/lart_defconfig b/arch/arm/configs/lart_defconfig
index b6ddb98843263..2dfa33d7dca39 100644
--- a/arch/arm/configs/lart_defconfig
+++ b/arch/arm/configs/lart_defconfig
@@ -59,7 +59,6 @@ CONFIG_CRAMFS=m
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NLS=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/arm/configs/netwinder_defconfig b/arch/arm/configs/netwinder_defconfig
index 2e3b20ef0db15..c3c171cec91bb 100644
--- a/arch/arm/configs/netwinder_defconfig
+++ b/arch/arm/configs/netwinder_defconfig
@@ -71,7 +71,6 @@ CONFIG_NFS_V3=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_SMB_FS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_NLS_CODEPAGE_437=y
diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig
index d06aa64e05a1a..c2d79a67f81f4 100644
--- a/arch/arm/configs/versatile_defconfig
+++ b/arch/arm/configs/versatile_defconfig
@@ -86,7 +86,6 @@ CONFIG_ROMFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_850=m
 CONFIG_NLS_ISO8859_1=m
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig
index 989599ce53008..c28539bfd1283 100644
--- a/arch/arm/configs/viper_defconfig
+++ b/arch/arm/configs/viper_defconfig
@@ -145,7 +145,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig
index d3b98c4d225be..25bb6995f1055 100644
--- a/arch/arm/configs/zeus_defconfig
+++ b/arch/arm/configs/zeus_defconfig
@@ -160,7 +160,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_850=m
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
index 629cb9cdf723b..851d8594cdb82 100644
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -106,7 +106,6 @@ CONFIG_HUGETLBFS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_737=y
 CONFIG_NLS_CODEPAGE_775=y
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index bc9952f8be667..894ab9d20cacf 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -500,7 +500,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index a77269c6e5bac..11e7d58ff8f45 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -457,7 +457,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 7a74efa6b9a1a..dd033659d319a 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -478,7 +478,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index a5323bf2eb333..326092e3adc60 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -450,7 +450,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5e80aa0869d54..7583a2d750231 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -459,7 +459,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index e84326a3f62db..fac22f70c8cb7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -480,7 +480,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 337552f433390..ee430aa473bb9 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -565,7 +565,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 7b688f7d272a2..c26e9d196094c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -449,7 +449,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 7c2cb31d63dd8..f00a1e3060eda 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -450,7 +450,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index ca43897af26de..e054e81dde59f 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -467,7 +467,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index e3d515f37144a..62678b037d0d6 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -452,7 +452,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d601606c969b8..113f4c60caf79 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -451,7 +451,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_STATS2 is not set
 # CONFIG_CIFS_DEBUG is not set
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index c6a652ad34f7b..e835730ea7fa4 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -69,6 +69,5 @@ CONFIG_CONFIGFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_LIBCRC32C=y
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index e2ed105f8c97c..0021427a1bbe7 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -159,7 +159,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 7e987d6f5e344..7a97a0818ce42 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index 6df5f6f2ac8eb..a0643363526d7 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_SWAP=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 CONFIG_NLS_ISO8859_8=m
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 21a1168ae301f..70a4ba90f4917 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -269,7 +269,6 @@ CONFIG_UFS_FS=m
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_CIFS=m
 CONFIG_CIFS_UPCALL=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 1ae48f7d9ddd2..74020aa3440b1 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -112,7 +112,6 @@ CONFIG_CONFIGFS_FS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 CONFIG_NLS=y
 CONFIG_NLS_CODEPAGE_437=m
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 8c223035921f2..843f360da5f2b 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -92,5 +92,4 @@ CONFIG_TMPFS=y
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=m
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 3321bb5769445..1c220d152a50c 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -363,7 +363,6 @@ CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 009b30372226b..b5ba08d7ab575 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -371,7 +371,6 @@ CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index e214e136101cd..8d58653f1b4ed 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -370,7 +370,6 @@ CONFIG_UFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_737=m
 CONFIG_NLS_CODEPAGE_775=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 3dc2da2bee0df..7d6f235e8ccbc 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -354,7 +354,6 @@ CONFIG_SYSV_FS=m
 CONFIG_UFS_FS=m
 CONFIG_NFS_FS=m
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 CONFIG_CODA_FS=m
 CONFIG_AFS_FS=m
diff --git a/arch/mips/configs/tb0219_defconfig b/arch/mips/configs/tb0219_defconfig
index 6547f84750b57..c56d8ab14ba67 100644
--- a/arch/mips/configs/tb0219_defconfig
+++ b/arch/mips/configs/tb0219_defconfig
@@ -72,6 +72,5 @@ CONFIG_ROMFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="cca=3 mem=64M console=ttyVR0,115200 ip=any root=/dev/nfs"
diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig
index 7e099f7c2286e..6e1423428f022 100644
--- a/arch/mips/configs/tb0226_defconfig
+++ b/arch/mips/configs/tb0226_defconfig
@@ -67,6 +67,5 @@ CONFIG_ROMFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="cca=3 mem=32M console=ttyVR0,115200"
diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig
index 0d881dd862c06..cf65a0879eceb 100644
--- a/arch/mips/configs/tb0287_defconfig
+++ b/arch/mips/configs/tb0287_defconfig
@@ -77,7 +77,6 @@ CONFIG_ROMFS_FS=m
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_FONTS=y
 CONFIG_FONT_8x8=y
 CONFIG_FONT_8x16=y
diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig
index 4798dc86c9cea..7e16da0bde8c2 100644
--- a/arch/mips/configs/workpad_defconfig
+++ b/arch/mips/configs/workpad_defconfig
@@ -63,6 +63,5 @@ CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_NFS_FS=m
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyVR0,19200 ide0=0x170,0x376,49 mem=16M"
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 53061cb2cf7f0..a5fee10d76ee6 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -210,7 +210,6 @@ CONFIG_TMPFS_XATTR=y
 CONFIG_NFS_FS=m
 # CONFIG_NFS_V2 is not set
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index d4be64f190ffb..fa707de761bed 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -120,7 +120,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_932=m
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 1fed6be95d538..d1c7fd5bf34b3 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -101,7 +101,6 @@ CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_CIFS=m
 CONFIG_NLS=y
 CONFIG_NLS_CODEPAGE_437=m
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index 5193b3e099b9f..4d83576b89c66 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -93,7 +93,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index 03cb916819faf..d90d29d444691 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -123,7 +123,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index e6c5ddf070c00..492a0a2e0e367 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -108,7 +108,6 @@ CONFIG_UFS_FS=m
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_SMB_FS=m
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 6c719ab4332ac..7d6d323598480 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -120,7 +120,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index a26f7f1841c7a..d817df7cc4a72 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -122,7 +122,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index ff502683132e9..9fcf68b22dba9 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -75,7 +75,6 @@ CONFIG_NFS_V3=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_737=m
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 5c725c75fcef7..7eb3c10f28ad2 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -129,7 +129,6 @@ CONFIG_NFS_V3=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_ISO8859_1=y
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index cd5c58916c65a..73a0d68b0de6c 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -239,7 +239,6 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
 CONFIG_SMB_FS=m
 CONFIG_CIFS=m
 CONFIG_PARTITION_ADVANCED=y
-- 
GitLab


From 35aff0678f99b0623bb72d50112de9e163a19559 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Feb 2022 11:26:06 -0500
Subject: [PATCH 1250/1586] NFSD: Clean up _lm_ operation names

The common practice is to name function instances the same as the
method names, but with a uniquifying prefix. Commit aef9583b234a
("NFSD: Get reference of lockowner when coping file_lock") missed
this -- the new function names should both have been of the form
"nfsd4_lm_*".

Before more lock manager operations are added in NFSD, rename these
two functions for consistency.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 32063733443d4..f932bcd724217 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6526,7 +6526,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 }
 
 static fl_owner_t
-nfsd4_fl_get_owner(fl_owner_t owner)
+nfsd4_lm_get_owner(fl_owner_t owner)
 {
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
 
@@ -6535,7 +6535,7 @@ nfsd4_fl_get_owner(fl_owner_t owner)
 }
 
 static void
-nfsd4_fl_put_owner(fl_owner_t owner)
+nfsd4_lm_put_owner(fl_owner_t owner)
 {
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
 
@@ -6570,8 +6570,8 @@ nfsd4_lm_notify(struct file_lock *fl)
 
 static const struct lock_manager_operations nfsd_posix_mng_ops  = {
 	.lm_notify = nfsd4_lm_notify,
-	.lm_get_owner = nfsd4_fl_get_owner,
-	.lm_put_owner = nfsd4_fl_put_owner,
+	.lm_get_owner = nfsd4_lm_get_owner,
+	.lm_put_owner = nfsd4_lm_put_owner,
 };
 
 static inline void
-- 
GitLab


From 50719bf3442dd6cd05159e9c98d020b3919ce978 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Feb 2022 15:30:13 -0500
Subject: [PATCH 1251/1586] NFSD: Fix nfsd_breaker_owns_lease() return values

These have been incorrect since the function was introduced.

A proper kerneldoc comment is added since this function, though
static, is part of an external interface.

Reported-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f932bcd724217..234e852fcdfad 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4711,6 +4711,14 @@ nfsd_break_deleg_cb(struct file_lock *fl)
 	return ret;
 }
 
+/**
+ * nfsd_breaker_owns_lease - Check if lease conflict was resolved
+ * @fl: Lock state to check
+ *
+ * Return values:
+ *   %true: Lease conflict was resolved
+ *   %false: Lease conflict was not resolved.
+ */
 static bool nfsd_breaker_owns_lease(struct file_lock *fl)
 {
 	struct nfs4_delegation *dl = fl->fl_owner;
@@ -4718,11 +4726,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl)
 	struct nfs4_client *clp;
 
 	if (!i_am_nfsd())
-		return NULL;
+		return false;
 	rqst = kthread_data(current);
 	/* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
 	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
-		return NULL;
+		return false;
 	clp = *(rqst->rq_lease_breaker);
 	return dl->dl_stid.sc_client == clp;
 }
-- 
GitLab


From 9d6647762b9c6b555bc83d97d7c93be6057a990f Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Sat, 12 Feb 2022 10:12:52 -0800
Subject: [PATCH 1252/1586] fs/lock: documentation cleanup. Replace
 inode->i_lock with flc_lock.

Update lock usage of lock_manager_operations' functions to reflect
the changes in commit 6109c85037e5 ("locks: add a dedicated spinlock
to protect i_flctx lists").

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
 Documentation/filesystems/locking.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 3f9b1497ebb8d..aaca0b6018193 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -438,13 +438,13 @@ prototypes::
 locking rules:
 
 ======================	=============	=================	=========
-ops			inode->i_lock	blocked_lock_lock	may block
+ops			   flc_lock  	blocked_lock_lock	may block
 ======================	=============	=================	=========
-lm_notify:		yes		yes			no
+lm_notify:		no      	yes			no
 lm_grant:		no		no			no
 lm_break:		yes		no			no
 lm_change		yes		no			no
-lm_breaker_owns_lease:	no		no			no
+lm_breaker_owns_lease:	yes     	no			no
 ======================	=============	=================	=========
 
 buffer_head
-- 
GitLab


From ba900534f807f0b327c92d5141c85d2313e2d55c Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 4 Mar 2022 09:40:05 -0800
Subject: [PATCH 1253/1586] f2fs: don't get FREEZE lock in f2fs_evict_inode in
 frozen fs

Let's purge inode cache in order to avoid the below deadlock.

[freeze test]                         shrinkder
freeze_super
 - pwercpu_down_write(SB_FREEZE_FS)
                                       - super_cache_scan
                                         - down_read(&sb->s_umount)
                                           - prune_icache_sb
                                            - dispose_list
                                             - evict
                                              - f2fs_evict_inode
thaw_super
 - down_write(&sb->s_umount);
                                              - __percpu_down_read(SB_FREEZE_FS)

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 1 +
 fs/f2fs/debug.c                         | 1 +
 fs/f2fs/f2fs.h                          | 1 +
 fs/f2fs/inode.c                         | 6 ++++--
 fs/f2fs/super.c                         | 4 ++++
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 58bf0dc83712d..5a5f3c5445f66 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -458,6 +458,7 @@ Description:	Show status of f2fs superblock in real time.
 		0x800  SBI_QUOTA_SKIP_FLUSH  skip flushing quota in current CP
 		0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
 		0x2000 SBI_IS_RESIZEFS       resizefs is in process
+		0x4000 SBI_IS_FREEZING       freefs is in process
 		====== ===================== =================================
 
 What:		/sys/fs/f2fs/<disk>/ckpt_thread_ioprio
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 9a13902c77026..cba5eab24595f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -338,6 +338,7 @@ static char *s_flag[] = {
 	[SBI_QUOTA_SKIP_FLUSH]	= " quota_skip_flush",
 	[SBI_QUOTA_NEED_REPAIR]	= " quota_need_repair",
 	[SBI_IS_RESIZEFS]	= " resizefs",
+	[SBI_IS_FREEZING]	= " freezefs",
 };
 
 static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 68d791ec8b278..da729f53daa8a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1293,6 +1293,7 @@ enum {
 	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
 	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
 	SBI_IS_RESIZEFS,			/* resizefs is in process */
+	SBI_IS_FREEZING,			/* freezefs is in process */
 };
 
 enum {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ab8e0c06c78c4..71f232dcf3c20 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
 	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
 	f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
 
-	sb_start_intwrite(inode->i_sb);
+	if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+		sb_start_intwrite(inode->i_sb);
 	set_inode_flag(inode, FI_NO_ALLOC);
 	i_size_write(inode, 0);
 retry:
@@ -809,7 +810,8 @@ retry:
 		if (dquot_initialize_needed(inode))
 			set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
 	}
-	sb_end_intwrite(inode->i_sb);
+	if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+		sb_end_intwrite(inode->i_sb);
 no_delete:
 	dquot_drop(inode);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8e3840973077a..4b570b5c26748 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
 	/* ensure no checkpoint required */
 	if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
 		return -EINVAL;
+
+	/* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
+	set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
 	return 0;
 }
 
 static int f2fs_unfreeze(struct super_block *sb)
 {
+	clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
 	return 0;
 }
 
-- 
GitLab


From 5b2050718d095cd3242d1f42aaaea3a2fec8e6f0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 11 Mar 2022 10:21:43 -0700
Subject: [PATCH 1254/1586] block: ensure plug merging checks the correct queue
 at least once

Song reports that a RAID rebuild workload runs much slower recently,
and it is seeing a lot less merging than it did previously. The reason
is that a previous commit reduced the amount of work we do for plug
merging. RAID rebuild interleaves requests between disks, so a last-entry
check in plug merging always misses a merge opportunity since we always
find a different disk than what we are looking for.

Modify the logic such that it's still a one-hit cache, but ensure that
we check enough to find the right target before giving up.

Fixes: d38a9c04c0d5 ("block: only check previous entry for plug merge attempt")
Reported-and-tested-by: Song Liu <song@kernel.org>
Reviewed-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index f5255991b773c..8d8177f71ebdb 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -1087,12 +1087,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	if (!plug || rq_list_empty(plug->mq_list))
 		return false;
 
-	/* check the previously added entry for a quick merge attempt */
-	rq = rq_list_peek(&plug->mq_list);
-	if (rq->q == q) {
-		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
-				BIO_MERGE_OK)
-			return true;
+	rq_list_for_each(&plug->mq_list, rq) {
+		if (rq->q == q) {
+			if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+			    BIO_MERGE_OK)
+				return true;
+			break;
+		}
+
+		/*
+		 * Only keep iterating plug list for merges if we have multiple
+		 * queues
+		 */
+		if (!plug->multiple_queues)
+			break;
 	}
 	return false;
 }
-- 
GitLab


From 26fed4ac4eab09c27fbae1859696cc38f0536407 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 11 Mar 2022 10:24:17 -0700
Subject: [PATCH 1255/1586] block: flush plug based on hardware and software
 queue order

We used to sort the plug list if we had multiple queues before dispatching
requests to the IO scheduler. This usually isn't needed, but for certain
workloads that interleave requests to disks, it's a less efficient to
process the plug list one-by-one if everything is interleaved.

Don't sort the list, but skip through it and flush out entries that have
the same target at the same time.

Fixes: df87eb0fce8f ("block: get rid of plug list sorting")
Reported-and-tested-by: Song Liu <song@kernel.org>
Reviewed-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 59 ++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 862d91c6112e5..213bb5979bed8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2573,13 +2573,36 @@ static void __blk_mq_flush_plug_list(struct request_queue *q,
 	q->mq_ops->queue_rqs(&plug->mq_list);
 }
 
+static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
+{
+	struct blk_mq_hw_ctx *this_hctx = NULL;
+	struct blk_mq_ctx *this_ctx = NULL;
+	struct request *requeue_list = NULL;
+	unsigned int depth = 0;
+	LIST_HEAD(list);
+
+	do {
+		struct request *rq = rq_list_pop(&plug->mq_list);
+
+		if (!this_hctx) {
+			this_hctx = rq->mq_hctx;
+			this_ctx = rq->mq_ctx;
+		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+			rq_list_add(&requeue_list, rq);
+			continue;
+		}
+		list_add_tail(&rq->queuelist, &list);
+		depth++;
+	} while (!rq_list_empty(plug->mq_list));
+
+	plug->mq_list = requeue_list;
+	trace_block_unplug(this_hctx->queue, depth, !from_sched);
+	blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched);
+}
+
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
-	struct blk_mq_hw_ctx *this_hctx;
-	struct blk_mq_ctx *this_ctx;
 	struct request *rq;
-	unsigned int depth;
-	LIST_HEAD(list);
 
 	if (rq_list_empty(plug->mq_list))
 		return;
@@ -2615,35 +2638,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 			return;
 	}
 
-	this_hctx = NULL;
-	this_ctx = NULL;
-	depth = 0;
 	do {
-		rq = rq_list_pop(&plug->mq_list);
-
-		if (!this_hctx) {
-			this_hctx = rq->mq_hctx;
-			this_ctx = rq->mq_ctx;
-		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
-			trace_block_unplug(this_hctx->queue, depth,
-						!from_schedule);
-			blk_mq_sched_insert_requests(this_hctx, this_ctx,
-						&list, from_schedule);
-			depth = 0;
-			this_hctx = rq->mq_hctx;
-			this_ctx = rq->mq_ctx;
-
-		}
-
-		list_add(&rq->queuelist, &list);
-		depth++;
+		blk_mq_dispatch_plug_list(plug, from_schedule);
 	} while (!rq_list_empty(plug->mq_list));
-
-	if (!list_empty(&list)) {
-		trace_block_unplug(this_hctx->queue, depth, !from_schedule);
-		blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
-						from_schedule);
-	}
 }
 
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
-- 
GitLab


From f839e5f1cef36ce268950c387129b1bfefdaebc9 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Fri, 11 Mar 2022 17:22:54 +0000
Subject: [PATCH 1256/1586] parisc: Fix non-access data TLB cache flush faults

When a page is not present, we get non-access data TLB faults from
the fdc and fic instructions in flush_user_dcache_range_asm and
flush_user_icache_range_asm. When these occur, the cache line is
not invalidated and potentially we get memory corruption. The
problem was hidden by the nullification of the flush instructions.

These faults also affect performance. With pa8800/pa8900 processors,
there will be 32 faults per 4 KB page since the cache line is 128
bytes.  There will be more faults with earlier processors.

The problem is fixed by using flush_cache_pages(). It does the flush
using a tmp alias mapping.

The flush_cache_pages() call in flush_cache_range() flushed too
large a range.

V2: Remove unnecessary preempt_disable() and preempt_enable() calls.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/cache.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 94150b91c96fb..bce71cefe5724 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -558,15 +558,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
 	}
 }
 
-static void flush_user_cache_tlb(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end)
-{
-	flush_user_dcache_range_asm(start, end);
-	if (vma->vm_flags & VM_EXEC)
-		flush_user_icache_range_asm(start, end);
-	flush_tlb_range(vma, start, end);
-}
-
 void flush_cache_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
@@ -581,17 +572,8 @@ void flush_cache_mm(struct mm_struct *mm)
 		return;
 	}
 
-	preempt_disable();
-	if (mm->context == mfsp(3)) {
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end);
-		preempt_enable();
-		return;
-	}
-
 	for (vma = mm->mmap; vma; vma = vma->vm_next)
 		flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
-	preempt_enable();
 }
 
 void flush_cache_range(struct vm_area_struct *vma,
@@ -605,15 +587,7 @@ void flush_cache_range(struct vm_area_struct *vma,
 		return;
 	}
 
-	preempt_disable();
-	if (vma->vm_mm->context == mfsp(3)) {
-		flush_user_cache_tlb(vma, start, end);
-		preempt_enable();
-		return;
-	}
-
-	flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end);
-	preempt_enable();
+	flush_cache_pages(vma, vma->vm_mm, start, end);
 }
 
 void
-- 
GitLab


From e00b0a2ab8ec019c344e53bfc76e31c18bb587b7 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Wed, 9 Mar 2022 21:14:36 +0000
Subject: [PATCH 1257/1586] parisc: Fix handling off probe non-access faults

Currently, the parisc kernel does not fully support non-access TLB
fault handling for probe instructions. In the fast path, we set the
target register to zero if it is not a shadowed register. The slow
path is not implemented, so we call do_page_fault. The architecture
indicates that non-access faults should not cause a page fault from
disk.

This change adds to code to provide non-access fault support for
probe instructions. It also modifies the handling of faults on
userspace so that if the address lies in a valid VMA and the access
type matches that for the VMA, the probe target register is set to
one. Otherwise, the target register is set to zero.

This was done to make probe instructions more useful for userspace.
Probe instructions are not very useful if they set the target register
to zero whenever a page is not present in memory. Nominally, the
purpose of the probe instruction is determine whether read or write
access to a given address is allowed.

This fixes a problem in function pointer comparison noticed in the
glibc testsuite (stdio-common/tst-vfprintf-user-type). The same
problem is likely in glibc (_dl_lookup_address).

V2 adds flush and lpa instruction support to handle_nadtlb_fault.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/traps.h |  1 +
 arch/parisc/kernel/traps.c      |  2 +
 arch/parisc/mm/fault.c          | 89 +++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
index 34619f010c631..0ccdb738a9a36 100644
--- a/arch/parisc/include/asm/traps.h
+++ b/arch/parisc/include/asm/traps.h
@@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst);
 const char *trap_name(unsigned long code);
 void do_page_fault(struct pt_regs *regs, unsigned long code,
 		unsigned long address);
+int handle_nadtlb_fault(struct pt_regs *regs);
 #endif
 
 #endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b6fdebddc8e99..39576a9245c7f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -662,6 +662,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 			 by hand. Technically we need to emulate:
 			 fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw
 		*/
+		if (code == 17 && handle_nadtlb_fault(regs))
+			return;
 		fault_address = regs->ior;
 		fault_space = regs->isr;
 		break;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e9eabf8f14d7e..f114e102aaf21 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -425,3 +425,92 @@ out_of_memory:
 	}
 	pagefault_out_of_memory();
 }
+
+/* Handle non-access data TLB miss faults.
+ *
+ * For probe instructions, accesses to userspace are considered allowed
+ * if they lie in a valid VMA and the access type matches. We are not
+ * allowed to handle MM faults here so there may be situations where an
+ * actual access would fail even though a probe was successful.
+ */
+int
+handle_nadtlb_fault(struct pt_regs *regs)
+{
+	unsigned long insn = regs->iir;
+	int breg, treg, xreg, val = 0;
+	struct vm_area_struct *vma, *prev_vma;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	unsigned long address;
+	unsigned long acc_type;
+
+	switch (insn & 0x380) {
+	case 0x280:
+		/* FDC instruction */
+		fallthrough;
+	case 0x380:
+		/* PDC and FIC instructions */
+		if (printk_ratelimit()) {
+			pr_warn("BUG: nullifying cache flush/purge instruction\n");
+			show_regs(regs);
+		}
+		if (insn & 0x20) {
+			/* Base modification */
+			breg = (insn >> 21) & 0x1f;
+			xreg = (insn >> 16) & 0x1f;
+			if (breg && xreg)
+				regs->gr[breg] += regs->gr[xreg];
+		}
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	case 0x180:
+		/* PROBE instruction */
+		treg = insn & 0x1f;
+		if (regs->isr) {
+			tsk = current;
+			mm = tsk->mm;
+			if (mm) {
+				/* Search for VMA */
+				address = regs->ior;
+				mmap_read_lock(mm);
+				vma = find_vma_prev(mm, address, &prev_vma);
+				mmap_read_unlock(mm);
+
+				/*
+				 * Check if access to the VMA is okay.
+				 * We don't allow for stack expansion.
+				 */
+				acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
+				if (vma
+				    && address >= vma->vm_start
+				    && (vma->vm_flags & acc_type) == acc_type)
+					val = 1;
+			}
+		}
+		if (treg)
+			regs->gr[treg] = val;
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	case 0x300:
+		/* LPA instruction */
+		if (insn & 0x20) {
+			/* Base modification */
+			breg = (insn >> 21) & 0x1f;
+			xreg = (insn >> 16) & 0x1f;
+			if (breg && xreg)
+				regs->gr[breg] += regs->gr[xreg];
+		}
+		treg = insn & 0x1f;
+		if (treg)
+			regs->gr[treg] = 0;
+		regs->gr[0] |= PSW_N;
+		return 1;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
-- 
GitLab


From 14615ecccb8b0022589b7a7841cdfdf96319724b Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Wed, 9 Mar 2022 21:23:39 +0000
Subject: [PATCH 1258/1586] parisc: Simplify fast path for non-access data TLB
 faults

With the latest cache fix for non-access faults and the support for
non-access faults (code 17) in handle_interruption, we can remove
the fast path emulation for fdc, fic, pdc, lpa, probe and probei
instructions.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/entry.S | 72 +++-----------------------------------
 1 file changed, 5 insertions(+), 67 deletions(-)

diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 6e9cdb2698628..ecf50159359e3 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1288,74 +1288,12 @@ nadtlb_check_alias_20:
 nadtlb_emulate:
 
 	/*
-	 * Non access misses can be caused by fdc,fic,pdc,lpa,probe and
-	 * probei instructions. We don't want to fault for these
-	 * instructions (not only does it not make sense, it can cause
-	 * deadlocks, since some flushes are done with the mmap
-	 * semaphore held). If the translation doesn't exist, we can't
-	 * insert a translation, so have to emulate the side effects
-	 * of the instruction. Since we don't insert a translation
-	 * we can get a lot of faults during a flush loop, so it makes
-	 * sense to try to do it here with minimum overhead. We only
-	 * emulate fdc,fic,pdc,probew,prober instructions whose base 
-	 * and index registers are not shadowed. We defer everything 
-	 * else to the "slow" path.
+	 * Non-access misses can be caused by fdc,fic,pdc,lpa,probe and
+	 * probei instructions. The kernel no longer faults doing flushes.
+	 * Use of lpa and probe instructions is rare. Given the issue
+	 * with shadow registers, we defer everything to the "slow" path.
 	 */
-
-	mfctl           %cr19,%r9 /* Get iir */
-
-	/* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits.
-	   Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */
-
-	/* Checks for fdc,fdce,pdc,"fic,4f" only */
-	ldi             0x280,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_probe_check
-	bb,>=,n         %r9,26,nadtlb_nullify  /* m bit not set, just nullify */
-	BL		get_register,%r25
-	extrw,u         %r9,15,5,%r8           /* Get index register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	copy            %r1,%r24
-	BL		get_register,%r25
-	extrw,u         %r9,10,5,%r8           /* Get base register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	add,l           %r1,%r24,%r1           /* doesn't affect c/b bits */
-
-nadtlb_nullify:
-	mfctl           %ipsw,%r8
-	ldil            L%PSW_N,%r9
-	or              %r8,%r9,%r8            /* Set PSW_N */
-	mtctl           %r8,%ipsw
-
-	rfir
-	nop
-
-	/* 
-		When there is no translation for the probe address then we
-		must nullify the insn and return zero in the target register.
-		This will indicate to the calling code that it does not have 
-		write/read privileges to this address.
-
-		This should technically work for prober and probew in PA 1.1,
-		and also probe,r and probe,w in PA 2.0
-
-		WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN!
-		THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET.
-
-	*/
-nadtlb_probe_check:
-	ldi             0x80,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/
-	BL              get_register,%r25      /* Find the target register */
-	extrw,u         %r9,31,5,%r8           /* Get target register */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	copy            %r0,%r1                /* Write zero to target register */
-	b nadtlb_nullify                       /* Nullify return insn */
-	nop
-
+	b,n		nadtlb_fault
 
 #ifdef CONFIG_64BIT
 itlb_miss_20w:
-- 
GitLab


From df24e1783e6e0eb3dc0e3ba5a8df3bb0cc537408 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 8 Dec 2021 11:06:52 +0100
Subject: [PATCH 1259/1586] parisc: Add vDSO support

Add minimal vDSO support, which provides the signal trampoline helpers,
but none of the userspace syscall helpers like time wrappers.

The big benefit of this vDSO implementation is, that we now don't need
an executeable stack any longer. PA-RISC is one of the last
architectures where an executeable stack was needed in oder to implement
the signal trampolines by putting assembly instructions on the stack
which then gets executed. Instead the kernel will provide the relevant
code in the vDSO page and only put the pointers to the signal
information on the stack.

By dropping the need for executable stacks we avoid running into issues
with applications which want non executable stacks for security reasons.
Additionally, alternative stacks on memory areas without exec
permissions are supported too.

This code is based on an initial implementation by Randolph Chung from 2006:
https://lore.kernel.org/linux-parisc/4544A34A.6080700@tausq.org/

I did the porting and lifted the code to current code base. Dave fixed
the unwind code so that gdb and glibc are able to backtrace through the
code. An additional patch to gdb will be pushed upstream by Dave.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Dave Anglin <dave.anglin@bell.net>
Cc: Randolph Chung <randolph@tausq.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig                           |   1 +
 arch/parisc/Makefile                          |  30 +++
 arch/parisc/include/asm/elf.h                 |  15 ++
 arch/parisc/include/asm/mmu.h                 |   6 +-
 arch/parisc/include/asm/mmu_context.h         |  16 +-
 arch/parisc/include/asm/pgtable.h             |   7 +-
 arch/parisc/include/asm/processor.h           |   2 +-
 arch/parisc/include/asm/rt_sigframe.h         |  10 +-
 arch/parisc/include/asm/tlbflush.h            |   2 +-
 arch/parisc/include/asm/unistd.h              |   4 -
 arch/parisc/include/asm/vdso.h                |  24 ++
 arch/parisc/include/uapi/asm/auxvec.h         |   8 +
 arch/parisc/kernel/Makefile                   |   5 +
 arch/parisc/kernel/asm-offsets.c              |   9 +
 arch/parisc/kernel/cache.c                    |   6 +-
 arch/parisc/kernel/signal.c                   | 225 ++++++++----------
 arch/parisc/kernel/signal32.h                 |  19 +-
 arch/parisc/kernel/vdso.c                     | 122 ++++++++++
 arch/parisc/kernel/vdso32/Makefile            |  53 +++++
 arch/parisc/kernel/vdso32/gen_vdso_offsets.sh |  15 ++
 arch/parisc/kernel/vdso32/note.S              |  26 ++
 arch/parisc/kernel/vdso32/restart_syscall.S   |  32 +++
 arch/parisc/kernel/vdso32/sigtramp.S          | 195 +++++++++++++++
 arch/parisc/kernel/vdso32/vdso32.lds.S        | 111 +++++++++
 arch/parisc/kernel/vdso32/vdso32_wrapper.S    |  14 ++
 arch/parisc/kernel/vdso64/Makefile            |  48 ++++
 arch/parisc/kernel/vdso64/gen_vdso_offsets.sh |  15 ++
 arch/parisc/kernel/vdso64/note.S              |   2 +
 arch/parisc/kernel/vdso64/restart_syscall.S   |   3 +
 arch/parisc/kernel/vdso64/sigtramp.S          | 166 +++++++++++++
 arch/parisc/kernel/vdso64/vdso64.lds.S        | 109 +++++++++
 arch/parisc/kernel/vdso64/vdso64_wrapper.S    |  14 ++
 32 files changed, 1149 insertions(+), 165 deletions(-)
 create mode 100644 arch/parisc/include/asm/vdso.h
 create mode 100644 arch/parisc/include/uapi/asm/auxvec.h
 create mode 100644 arch/parisc/kernel/vdso.c
 create mode 100644 arch/parisc/kernel/vdso32/Makefile
 create mode 100755 arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
 create mode 100644 arch/parisc/kernel/vdso32/note.S
 create mode 100644 arch/parisc/kernel/vdso32/restart_syscall.S
 create mode 100644 arch/parisc/kernel/vdso32/sigtramp.S
 create mode 100644 arch/parisc/kernel/vdso32/vdso32.lds.S
 create mode 100644 arch/parisc/kernel/vdso32/vdso32_wrapper.S
 create mode 100644 arch/parisc/kernel/vdso64/Makefile
 create mode 100755 arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
 create mode 100644 arch/parisc/kernel/vdso64/note.S
 create mode 100644 arch/parisc/kernel/vdso64/restart_syscall.S
 create mode 100644 arch/parisc/kernel/vdso64/sigtramp.S
 create mode 100644 arch/parisc/kernel/vdso64/vdso64.lds.S
 create mode 100644 arch/parisc/kernel/vdso64/vdso64_wrapper.S

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 43c1c880def6c..f65268fd28caf 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -10,6 +10,7 @@ config PARISC
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_NO_SG_CHAIN
 	select ARCH_SUPPORTS_HUGETLBFS if PA20
 	select ARCH_SUPPORTS_MEMORY_FAILURE
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 82d77f4b0d083..6d7361364dadc 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -44,6 +44,16 @@ endif
 
 export LD_BFD
 
+# Set default 32 bits cross compilers for vdso
+CC_ARCHES_32 = hppa hppa2.0 hppa1.1
+CC_SUFFIXES  = linux linux-gnu unknown-linux-gnu
+CROSS32_COMPILE := $(call cc-cross-prefix, \
+	$(foreach a,$(CC_ARCHES_32), \
+	$(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
+CROSS32CC := $(CROSS32_COMPILE)gcc
+export CROSS32CC
+
+# Set default cross compiler for kernel build
 ifdef cross_compiling
 	ifeq ($(CROSS_COMPILE),)
 		CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
@@ -163,6 +173,26 @@ vmlinuz: vmlinux
 	@$(KGZIP) -cf -9 $< > $@
 endif
 
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(if $(CONFIG_64BIT),$(Q)$(MAKE) \
+		$(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h)
+	$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h
+endif
+
+PHONY += vdso_install
+
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@
+	$(if $(CONFIG_COMPAT_VDSO), \
+		$(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@)
 install:
 	$(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \
 			$(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 3bd465a27791c..cc426d365892b 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -359,4 +359,19 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *);
 #define arch_randomize_brk arch_randomize_brk
 
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+					int executable_stack);
+#define VDSO_AUX_ENT(a, b) NEW_AUX_ENT(a, b)
+#define VDSO_CURRENT_BASE current->mm->context.vdso_base
+
+#define ARCH_DLINFO						\
+do {								\
+	if (VDSO_CURRENT_BASE) {				\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\
+	}							\
+} while (0)
+
 #endif
diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h
index 3fb70a601d5c9..44fd062b62ede 100644
--- a/arch/parisc/include/asm/mmu.h
+++ b/arch/parisc/include/asm/mmu.h
@@ -2,7 +2,9 @@
 #ifndef _PARISC_MMU_H_
 #define _PARISC_MMU_H_
 
-/* On parisc, we store the space id here */
-typedef unsigned long mm_context_t;
+typedef struct {
+	unsigned long space_id;
+	unsigned long vdso_base;
+} mm_context_t;
 
 #endif /* _PARISC_MMU_H_ */
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 726257648d9f5..e788e995bd7a6 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	BUG_ON(atomic_read(&mm->mm_users) != 1);
 
-	mm->context = alloc_sid();
+	mm->context.space_id = alloc_sid();
 	return 0;
 }
 
@@ -28,22 +28,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 static inline void
 destroy_context(struct mm_struct *mm)
 {
-	free_sid(mm->context);
-	mm->context = 0;
+	free_sid(mm->context.space_id);
+	mm->context.space_id = 0;
 }
 
 static inline unsigned long __space_to_prot(mm_context_t context)
 {
 #if SPACEID_SHIFT == 0
-	return context << 1;
+	return context.space_id << 1;
 #else
-	return context >> (SPACEID_SHIFT - 1);
+	return context.space_id >> (SPACEID_SHIFT - 1);
 #endif
 }
 
 static inline void load_context(mm_context_t context)
 {
-	mtsp(context, 3);
+	mtsp(context.space_id, 3);
 	mtctl(__space_to_prot(context), 8);
 }
 
@@ -89,8 +89,8 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
 
 	BUG_ON(next == &init_mm); /* Should never happen */
 
-	if (next->context == 0)
-	    next->context = alloc_sid();
+	if (next->context.space_id == 0)
+		next->context.space_id = alloc_sid();
 
 	switch_mm(prev,next,current);
 }
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 3e7cf882639fb..f0393f00c2ff8 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -70,7 +70,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 	unsigned long flags;
 
 	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
+	mtsp(mm->context.space_id, 1);
 	pdtlb(addr);
 	pitlb(addr);
 	purge_tlb_end(flags);
@@ -219,9 +219,10 @@ extern void __update_cache(pte_t pte);
 #define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
 #define _PAGE_HUGE     (1 << xlate_pabit(_PAGE_HPAGE_BIT))
 #define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
+#define _PAGE_SPECIAL  (_PAGE_DMB)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
 #define _PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define _PAGE_KERNEL_EXEC	(_PAGE_KERNEL_RO | _PAGE_EXEC)
 #define _PAGE_KERNEL_RWX	(_PAGE_KERNEL_EXEC | _PAGE_WRITE)
@@ -348,6 +349,7 @@ static inline void pud_clear(pud_t *pud) {
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_write(pte_t pte)		{ return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
 
 static inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
@@ -355,6 +357,7 @@ static inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_PAGE_WRITE; ret
 static inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte)	{ pte_val(pte) |= _PAGE_SPECIAL; return pte; }
 
 /*
  * Huge pte definitions.
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 3a3d054384082..0063642127954 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -236,7 +236,7 @@ on downward growing arches, it looks like this:
 
 #define start_thread(regs, new_pc, new_sp) do {		\
 	elf_addr_t *sp = (elf_addr_t *)new_sp;		\
-	__u32 spaceid = (__u32)current->mm->context;	\
+	__u32 spaceid = (__u32)current->mm->context.space_id;	\
 	elf_addr_t pc = (elf_addr_t)new_pc | 3;		\
 	elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1;	\
 							\
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 2b3010ade00e7..bb7fb41533275 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,16 +2,8 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 4
-#define SIGRESTARTBLOCK_TRAMP 5 
-#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
-
 struct rt_sigframe {
-	/* XXX: Must match trampoline size in arch/parisc/kernel/signal.c 
-	        Secondary to that it must protect the ERESTART_RESTARTBLOCK
-		trampoline we left on the stack (we were bad and didn't 
-		change sp so we could run really fast.) */
-	unsigned int tramp[TRAMP_SIZE];
+	unsigned int tramp[2]; /* holds original return address */
 	struct siginfo info;
 	struct ucontext uc;
 };
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index c5ded01d45be0..5ffd7c17f593f 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -17,7 +17,7 @@ int __flush_tlb_range(unsigned long sid,
 	unsigned long start, unsigned long end);
 
 #define flush_tlb_range(vma, start, end) \
-	__flush_tlb_range((vma)->vm_mm->context, start, end)
+	__flush_tlb_range((vma)->vm_mm->context.space_id, start, end)
 
 #define flush_tlb_kernel_range(start, end) \
 	__flush_tlb_range(0, start, end)
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index cd438e4150f65..7708a5806f09b 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -63,10 +63,6 @@
 		);							\
 		__sys_res = (long)__res;				\
 	}								\
-	if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){	\
-		errno = -__sys_res;		        		\
-		__sys_res = -1;						\
-	}								\
 	__sys_res;							\
 })
 
diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h
new file mode 100644
index 0000000000000..ef8206193f82f
--- /dev/null
+++ b/arch/parisc/include/asm/vdso.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_VDSO_H__
+#define __PARISC_VDSO_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_64BIT
+#include <generated/vdso64-offsets.h>
+#endif
+#include <generated/vdso32-offsets.h>
+
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+
+extern struct vdso_data *vdso_data;
+
+#endif /* __ASSEMBLY __ */
+
+/* Default link addresses for the vDSOs */
+#define VDSO_LBASE	0
+
+#define VDSO_VERSION_STRING	LINUX_5.18
+
+#endif /* __PARISC_VDSO_H__ */
diff --git a/arch/parisc/include/uapi/asm/auxvec.h b/arch/parisc/include/uapi/asm/auxvec.h
new file mode 100644
index 0000000000000..90d2aa699cf3e
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/auxvec.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PARISC_AUXVEC_H
+#define _UAPI_PARISC_AUXVEC_H
+
+/* The vDSO location. */
+#define AT_SYSINFO_EHDR		33
+
+#endif /* _UAPI_PARISC_AUXVEC_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 8fb819bbbb171..d579243edc2fb 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -39,3 +39,8 @@ obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_KPROBES)			+= kprobes.o
 obj-$(CONFIG_KEXEC_CORE)		+= kexec.o relocate_kernel.o
 obj-$(CONFIG_KEXEC_FILE)		+= kexec_file.o
+
+# vdso
+obj-y			+= vdso.o
+obj-$(CONFIG_64BIT)	+= vdso64/
+obj-y			+= vdso32/
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 2a83ef36d2164..2673d57eeb008 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -26,7 +26,11 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/pdc.h>
+#include <uapi/asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/rt_sigframe.h>
 #include <linux/uaccess.h>
+#include "signal32.h"
 
 /* Add FRAME_SIZE to the size x and align it to y. All definitions
  * that use align_frame will include space for a frame.
@@ -218,6 +222,11 @@ int main(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count));
 	BLANK();
+	DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE);
+	DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE);
+	DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32);
+	DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32);
+	BLANK();
 	DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base));
 	DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride));
 	DEFINE(ICACHE_COUNT, offsetof(struct pdc_cache_info, ic_count));
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index bce71cefe5724..00dc82658856e 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -566,7 +566,7 @@ void flush_cache_mm(struct mm_struct *mm)
 	   rp3440, etc.  So, avoid it if the mm isn't too big.  */
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    mm_total_size(mm) >= parisc_cache_flush_threshold) {
-		if (mm->context)
+		if (mm->context.space_id)
 			flush_tlb_all();
 		flush_cache_all();
 		return;
@@ -581,7 +581,7 @@ void flush_cache_range(struct vm_area_struct *vma,
 {
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    end - start >= parisc_cache_flush_threshold) {
-		if (vma->vm_mm->context)
+		if (vma->vm_mm->context.space_id)
 			flush_tlb_range(vma, start, end);
 		flush_cache_all();
 		return;
@@ -594,7 +594,7 @@ void
 flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
 	if (pfn_valid(pfn)) {
-		if (likely(vma->vm_mm->context)) {
+		if (likely(vma->vm_mm->context.space_id)) {
 			flush_tlb_page(vma, vmaddr);
 			__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 		} else {
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 46b1050640b80..24443908f9057 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -1,16 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  linux/arch/parisc/kernel/signal.c: Architecture-specific signal
- *  handling support.
+ *  PA-RISC architecture-specific signal handling support.
  *
  *  Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
  *  Copyright (C) 2000 Linuxcare, Inc.
+ *  Copyright (C) 2000-2022 Helge Deller <deller@gmx.de>
+ *  Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
  *
  *  Based on the ia64, i386, and alpha versions.
- *
- *  Like the IA-64, we are a recent enough port (we are *starting*
- *  with glibc2.2) that we do not need to support the old non-realtime
- *  Linux signals.  Therefore we don't.
  */
 
 #include <linux/sched.h>
@@ -32,6 +29,7 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/asm-offsets.h>
+#include <asm/vdso.h>
 
 #ifdef CONFIG_COMPAT
 #include "signal32.h"
@@ -59,14 +57,6 @@
  * Do a signal return - restore sigcontext.
  */
 
-/* Trampoline for calling rt_sigreturn() */
-#define INSN_LDI_R25_0	 0x34190000 /* ldi  0,%r25 (in_syscall=0) */
-#define INSN_LDI_R25_1	 0x34190002 /* ldi  1,%r25 (in_syscall=1) */
-#define INSN_LDI_R20	 0x3414015a /* ldi  __NR_rt_sigreturn,%r20 */
-#define INSN_BLE_SR2_R0  0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */
-/* For debugging */
-#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */
-
 static long
 restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 {
@@ -77,9 +67,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 	err |= __copy_from_user(regs->iaoq, sc->sc_iaoq, sizeof(regs->iaoq));
 	err |= __copy_from_user(regs->iasq, sc->sc_iasq, sizeof(regs->iasq));
 	err |= __get_user(regs->sar, &sc->sc_sar);
-	DBG(2,"restore_sigcontext: iaoq is %#lx / %#lx\n",
-			regs->iaoq[0],regs->iaoq[1]);
-	DBG(2,"restore_sigcontext: r28 is %ld\n", regs->gr[28]);
+	DBG(2, "%s: iaoq is %#lx / %#lx\n",
+			__func__, regs->iaoq[0], regs->iaoq[1]);
+	DBG(2, "%s: r28 is %ld\n", __func__, regs->gr[28]);
 	return err;
 }
 
@@ -102,7 +92,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	/* Unwind the user stack to get the rt_sigframe structure. */
 	frame = (struct rt_sigframe __user *)
 		(usp - sigframe_size);
-	DBG(2,"sys_rt_sigreturn: frame is %p\n", frame);
+	DBG(2, "%s: frame is %p pid %d\n", __func__, frame, task_pid_nr(current));
 
 	regs->orig_r28 = 1; /* no restarts for sigreturn */
 
@@ -110,7 +100,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	compat_frame = (struct compat_rt_sigframe __user *)frame;
 	
 	if (is_compat_task()) {
-		DBG(2,"sys_rt_sigreturn: ELF32 process.\n");
 		if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask))
 			goto give_sigsegv;
 	} else
@@ -125,25 +114,25 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	/* Good thing we saved the old gr[30], eh? */
 #ifdef CONFIG_64BIT
 	if (is_compat_task()) {
-		DBG(1,"sys_rt_sigreturn: compat_frame->uc.uc_mcontext 0x%p\n",
-				&compat_frame->uc.uc_mcontext);
+		DBG(1, "%s: compat_frame->uc.uc_mcontext 0x%p\n",
+				__func__, &compat_frame->uc.uc_mcontext);
 // FIXME: Load upper half from register file
 		if (restore_sigcontext32(&compat_frame->uc.uc_mcontext, 
 					&compat_frame->regs, regs))
 			goto give_sigsegv;
-		DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", 
-				usp, &compat_frame->uc.uc_stack);
+		DBG(1, "%s: usp %#08lx stack 0x%p\n",
+				__func__, usp, &compat_frame->uc.uc_stack);
 		if (compat_restore_altstack(&compat_frame->uc.uc_stack))
 			goto give_sigsegv;
 	} else
 #endif
 	{
-		DBG(1,"sys_rt_sigreturn: frame->uc.uc_mcontext 0x%p\n",
-				&frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc.uc_mcontext 0x%p\n",
+				__func__, &frame->uc.uc_mcontext);
 		if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
 			goto give_sigsegv;
-		DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", 
-				usp, &frame->uc.uc_stack);
+		DBG(1, "%s: usp %#08lx stack 0x%p\n",
+				__func__, usp, &frame->uc.uc_stack);
 		if (restore_altstack(&frame->uc.uc_stack))
 			goto give_sigsegv;
 	}
@@ -155,14 +144,11 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 	 */
 	if (in_syscall)
 		regs->gr[31] = regs->iaoq[0];
-#if DEBUG_SIG
-	DBG(1,"sys_rt_sigreturn: returning to %#lx, DUMPING REGS:\n", regs->iaoq[0]);
-	show_regs(regs);
-#endif
+
 	return;
 
 give_sigsegv:
-	DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n");
+	DBG(1, "%s: Sending SIGSEGV\n", __func__);
 	force_sig(SIGSEGV);
 	return;
 }
@@ -177,15 +163,15 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size)
 	/*FIXME: ELF32 vs. ELF64 has different frame_size, but since we
 	  don't use the parameter it doesn't matter */
 
-	DBG(1,"get_sigframe: ka = %#lx, sp = %#lx, frame_size = %#lx\n",
-			(unsigned long)ka, sp, frame_size);
+	DBG(1, "%s: ka = %#lx, sp = %#lx, frame_size = %zu\n",
+			__func__, (unsigned long)ka, sp, frame_size);
 	
 	/* Align alternate stack and reserve 64 bytes for the signal
 	   handler's frame marker.  */
 	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp))
 		sp = (current->sas_ss_sp + 0x7f) & ~0x3f; /* Stacks grow up! */
 
-	DBG(1,"get_sigframe: Returning sp = %#lx\n", (unsigned long)sp);
+	DBG(1, "%s: Returning sp = %#lx\n", __func__, (unsigned long)sp);
 	return (void __user *) sp; /* Stacks grow up.  Fun. */
 }
 
@@ -205,20 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, int in_sysc
 		err |= __put_user(regs->gr[31]+4, &sc->sc_iaoq[1]);
 		err |= __put_user(regs->sr[3], &sc->sc_iasq[0]);
 		err |= __put_user(regs->sr[3], &sc->sc_iasq[1]);
-		DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (in syscall)\n",
-			regs->gr[31], regs->gr[31]+4);
+		DBG(1, "%s: iaoq %#lx / %#lx (in syscall)\n",
+			__func__, regs->gr[31], regs->gr[31]+4);
 	} else {
 		err |= __copy_to_user(sc->sc_iaoq, regs->iaoq, sizeof(regs->iaoq));
 		err |= __copy_to_user(sc->sc_iasq, regs->iasq, sizeof(regs->iasq));
-		DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (not in syscall)\n", 
-			regs->iaoq[0], regs->iaoq[1]);
+		DBG(1, "%s: iaoq %#lx / %#lx (not in syscall)\n",
+			__func__, regs->iaoq[0], regs->iaoq[1]);
 	}
 
 	err |= __put_user(flags, &sc->sc_flags);
 	err |= __copy_to_user(sc->sc_gr, regs->gr, sizeof(regs->gr));
 	err |= __copy_to_user(sc->sc_fr, regs->fr, sizeof(regs->fr));
 	err |= __put_user(regs->sar, &sc->sc_sar);
-	DBG(1,"setup_sigcontext: r28 is %ld\n", regs->gr[28]);
+	DBG(1, "%s: r28 is %ld\n", __func__, regs->gr[28]);
 
 	return err;
 }
@@ -230,7 +216,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	struct rt_sigframe __user *frame;
 	unsigned long rp, usp;
 	unsigned long haddr, sigframe_size;
-	unsigned long start, end;
+	unsigned long start;
 	int err = 0;
 #ifdef CONFIG_64BIT
 	struct compat_rt_sigframe __user * compat_frame;
@@ -247,8 +233,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
 	frame = get_sigframe(&ksig->ka, usp, sigframe_size);
 
-	DBG(1,"SETUP_RT_FRAME: START\n");
-	DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
+	DBG(1, "%s: frame %p info %p\n", __func__, frame, &ksig->info);
 
 	start = (unsigned long) frame;
 	if (start >= user_addr_max() - sigframe_size)
@@ -259,11 +244,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	compat_frame = (struct compat_rt_sigframe __user *)frame;
 	
 	if (is_compat_task()) {
-		DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &compat_frame->info);
+		DBG(1, "%s: frame->info = 0x%p\n", __func__, &compat_frame->info);
 		err |= copy_siginfo_to_user32(&compat_frame->info, &ksig->info);
 		err |= __compat_save_altstack( &compat_frame->uc.uc_stack, regs->gr[30]);
-		DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &compat_frame->uc);
-		DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc = 0x%p\n", __func__, &compat_frame->uc);
+		DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+			__func__, &compat_frame->uc.uc_mcontext);
 		err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext, 
 					&compat_frame->regs, regs, in_syscall);
 		err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set,
@@ -271,11 +257,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	} else
 #endif
 	{	
-		DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &frame->info);
+		DBG(1, "%s: frame->info = 0x%p\n", __func__, &frame->info);
 		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 		err |= __save_altstack(&frame->uc.uc_stack, regs->gr[30]);
-		DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &frame->uc);
-		DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &frame->uc.uc_mcontext);
+		DBG(1, "%s: frame->uc = 0x%p\n", __func__, &frame->uc);
+		DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n",
+			__func__, &frame->uc.uc_mcontext);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, in_syscall);
 		/* FIXME: Should probably be converted as well for the compat case */
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -284,32 +271,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	if (err)
 		return -EFAULT;
 
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace. The first words of tramp are used to
-	   save the previous sigrestartblock trampoline that might be
-	   on the stack. We start the sigreturn trampoline at 
-	   SIGRESTARTBLOCK_TRAMP+X. */
-	err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-	err |= __put_user(INSN_LDI_R20, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
-	err |= __put_user(INSN_BLE_SR2_R0, 
-			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
-	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
-
-	start = (unsigned long) &frame->tramp[0];
-	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
-	flush_user_dcache_range_asm(start, end);
-	flush_user_icache_range_asm(start, end);
-
-	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
-	 * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
-	 */
-	rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
+#ifdef CONFIG_64BIT
+	if (!is_compat_task())
+		rp = VDSO64_SYMBOL(current, sigtramp_rt);
+	else
+#endif
+		rp = VDSO32_SYMBOL(current, sigtramp_rt);
 
-	if (err)
-		return -EFAULT;
+	if (in_syscall)
+		rp += 4*4; /* skip 4 instructions and start at ldi 1,%r25 */
 
 	haddr = A(ksig->ka.sa.sa_handler);
 	/* The sa_handler may be a pointer to a function descriptor */
@@ -340,8 +310,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		
 		haddr = fdesc.addr;
 		regs->gr[19] = fdesc.gp;
-		DBG(1,"setup_rt_frame: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
-		     haddr, regs->gr[19], in_syscall);
+		DBG(1, "%s: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n",
+		     __func__, haddr, regs->gr[19], in_syscall);
 	}
 #endif
 
@@ -351,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		regs->gr[31] = haddr;
 #ifdef CONFIG_64BIT
 		if (!test_thread_flag(TIF_32BIT))
-			sigframe_size |= 1;
+			sigframe_size |= 1; /* XXX ???? */
 #endif
 	} else {
 		unsigned long psw = USER_PSW;
@@ -373,11 +343,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		}
 
 		regs->gr[0] = psw;
-		regs->iaoq[0] = haddr | 3;
+		regs->iaoq[0] = haddr | PRIV_USER;
 		regs->iaoq[1] = regs->iaoq[0] + 4;
 	}
 
-	regs->gr[2]  = rp;                /* userland return pointer */
+	regs->gr[2]  = rp;			/* userland return pointer */
 	regs->gr[26] = ksig->sig;               /* signal number */
 	
 #ifdef CONFIG_64BIT
@@ -391,15 +361,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 		regs->gr[24] = A(&frame->uc);   /* ucontext pointer */
 	}
 	
-	DBG(1,"setup_rt_frame: making sigreturn frame: %#lx + %#lx = %#lx\n",
+	DBG(1, "%s: making sigreturn frame: %#lx + %#lx = %#lx\n", __func__,
 	       regs->gr[30], sigframe_size,
 	       regs->gr[30] + sigframe_size);
 	/* Raise the user stack pointer to make a proper call frame. */
 	regs->gr[30] = (A(frame) + sigframe_size);
 
 
-	DBG(1,"setup_rt_frame: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
-	       current->comm, current->pid, frame, regs->gr[30],
+	DBG(1, "%s: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n",
+	       __func__, current->comm, current->pid, frame, regs->gr[30],
 	       regs->iaoq[0], regs->iaoq[1], rp);
 
 	return 0;
@@ -415,8 +385,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
 	int ret;
 	sigset_t *oldset = sigmask_to_save();
 
-	DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n",
-	       ksig->sig, ksig->ka, ksig->info, oldset, regs);
+	DBG(1, "%s: sig=%d, ka=%p, info=%p, oldset=%p, regs=%p\n",
+	       __func__, ksig->sig, &ksig->ka, &ksig->info, oldset, regs);
 	
 	/* Set up the stack frame */
 	ret = setup_rt_frame(ksig, oldset, regs, in_syscall);
@@ -424,8 +394,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
 	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP) ||
 			  test_thread_flag(TIF_BLOCKSTEP));
 
-	DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n",
-		regs->gr[28]);
+	DBG(1, "%s: Exit (success), regs->gr[28] = %ld\n",
+		__func__, regs->gr[28]);
 }
 
 /*
@@ -483,21 +453,27 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 	if (regs->orig_r28)
 		return;
 	regs->orig_r28 = 1; /* no more restarts */
+
+	DBG(1, "%s:  orig_r28 = %ld  pid %d  r20 %ld\n",
+		__func__, regs->orig_r28, task_pid_nr(current), regs->gr[20]);
+
 	/* Check the return code */
 	switch (regs->gr[28]) {
 	case -ERESTART_RESTARTBLOCK:
 	case -ERESTARTNOHAND:
-		DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
+		DBG(1, "%s: ERESTARTNOHAND: returning -EINTR\n", __func__);
 		regs->gr[28] = -EINTR;
 		break;
 	case -ERESTARTSYS:
 		if (!(ka->sa.sa_flags & SA_RESTART)) {
-			DBG(1,"ERESTARTSYS: putting -EINTR\n");
+			DBG(1, "%s: ERESTARTSYS: putting -EINTR pid %d\n",
+				__func__, task_pid_nr(current));
 			regs->gr[28] = -EINTR;
 			break;
 		}
 		fallthrough;
 	case -ERESTARTNOINTR:
+		DBG(1, "%s: %ld\n", __func__, regs->gr[28]);
 		check_syscallno_in_delay_branch(regs);
 		break;
 	}
@@ -509,50 +485,52 @@ insert_restart_trampoline(struct pt_regs *regs)
 	if (regs->orig_r28)
 		return;
 	regs->orig_r28 = 1; /* no more restarts */
-	switch(regs->gr[28]) {
+
+	DBG(2, "%s: gr28 = %ld pid %d\n",
+		__func__, regs->gr[28], task_pid_nr(current));
+
+	switch (regs->gr[28]) {
 	case -ERESTART_RESTARTBLOCK: {
 		/* Restart the system call - no handlers present */
 		unsigned int *usp = (unsigned int *)regs->gr[30];
-		unsigned long start = (unsigned long) &usp[2];
-		unsigned long end  = (unsigned long) &usp[5];
+		unsigned long rp;
 		long err = 0;
 
 		/* check that we don't exceed the stack */
 		if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
 			return;
 
-		/* Setup a trampoline to restart the syscall
-		 * with __NR_restart_syscall
+		/* Call trampoline in vdso to restart the syscall
+		 * with __NR_restart_syscall.
+		 * Original return addresses are on stack like this:
 		 *
 		 *  0: <return address (orig r31)>
 		 *  4: <2nd half for 64-bit>
-		 *  8: ldw 0(%sp), %r31
-		 * 12: be 0x100(%sr2, %r0)
-		 * 16: ldi __NR_restart_syscall, %r20
 		 */
 #ifdef CONFIG_64BIT
-		err |= put_user(regs->gr[31] >> 32, &usp[0]);
-		err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
-		err |= put_user(0x0fc010df, &usp[2]);
-#else
-		err |= put_user(regs->gr[31], &usp[0]);
-		err |= put_user(0x0fc0109f, &usp[2]);
+		if (!is_compat_task()) {
+			err |= put_user(regs->gr[31] >> 32, &usp[0]);
+			err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]);
+			rp = VDSO64_SYMBOL(current, restart_syscall);
+		} else
 #endif
-		err |= put_user(0xe0008200, &usp[3]);
-		err |= put_user(0x34140000, &usp[4]);
-
+		{
+			err |= put_user(regs->gr[31], &usp[0]);
+			rp = VDSO32_SYMBOL(current, restart_syscall);
+		}
 		WARN_ON(err);
 
-		/* flush data/instruction cache for new insns */
-		flush_user_dcache_range_asm(start, end);
-		flush_user_icache_range_asm(start, end);
-
-		regs->gr[31] = regs->gr[30] + 8;
+		regs->gr[31] = rp;
+		DBG(1, "%s: ERESTART_RESTARTBLOCK\n", __func__);
 		return;
 	}
+	case -EINTR:
+		/* ok, was handled before and should be returned. */
+		break;
 	case -ERESTARTNOHAND:
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
+		DBG(1, "%s: Type %ld\n", __func__, regs->gr[28]);
 		check_syscallno_in_delay_branch(regs);
 		return;
 	default:
@@ -567,30 +545,35 @@ insert_restart_trampoline(struct pt_regs *regs)
  * registers).  As noted below, the syscall number gets restored for
  * us due to the magic of delayed branching.
  */
-asmlinkage void
-do_signal(struct pt_regs *regs, long in_syscall)
+static void do_signal(struct pt_regs *regs, long in_syscall)
 {
 	struct ksignal ksig;
+	int restart_syscall;
+	bool has_handler;
 
-	DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n",
-	       regs, regs->sr[7], in_syscall);
+	has_handler = get_signal(&ksig);
 
-	if (get_signal(&ksig)) {
-		DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]);
+	restart_syscall = 0;
+	if (in_syscall)
+		restart_syscall = 1;
+
+	if (has_handler) {
 		/* Restart a system call if necessary. */
-		if (in_syscall)
+		if (restart_syscall)
 			syscall_restart(regs, &ksig.ka);
 
 		handle_signal(&ksig, regs, in_syscall);
+		DBG(1, "%s: Handled signal pid %d\n",
+			__func__, task_pid_nr(current));
 		return;
 	}
 
-	/* Did we come from a system call? */
-	if (in_syscall)
+	/* Do we need to restart the system call? */
+	if (restart_syscall)
 		insert_restart_trampoline(regs);
 	
-	DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n", 
-		regs->gr[28]);
+	DBG(1, "%s: Exit (not delivered), regs->gr[28] = %ld  orig_r28 = %ld  pid %d\n",
+		__func__, regs->gr[28], regs->orig_r28, task_pid_nr(current));
 
 	restore_saved_sigmask();
 }
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index f166250f2d064..c03eb1ed4c534 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,21 +36,12 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 4
-#define COMPAT_SIGRESTARTBLOCK_TRAMP 5
-#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
-				COMPAT_SIGRESTARTBLOCK_TRAMP)
-
 struct compat_rt_sigframe {
-        /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c
-                Secondary to that it must protect the ERESTART_RESTARTBLOCK
-                trampoline we left on the stack (we were bad and didn't
-                change sp so we could run really fast.) */
-        compat_uint_t tramp[COMPAT_TRAMP_SIZE];
-        compat_siginfo_t info;
-        struct compat_ucontext uc;
-        /* Hidden location of truncated registers, *must* be last. */
-        struct compat_regfile regs;
+	unsigned int tramp[2]; /* holds original return address */
+	compat_siginfo_t info;
+	struct compat_ucontext uc;
+	/* Hidden location of truncated registers, *must* be last. */
+	struct compat_regfile regs;
 };
 
 /*
diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c
new file mode 100644
index 0000000000000..63dc44c4c246b
--- /dev/null
+++ b/arch/parisc/kernel/vdso.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (c) 2022 Helge Deller <deller@gmx.de>
+ *
+ *  based on arch/s390/kernel/vdso.c which is
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/elf.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/compat.h>
+#include <linux/nsproxy.h>
+#include <linux/time_namespace.h>
+#include <linux/random.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/cacheflush.h>
+
+extern char vdso32_start, vdso32_end;
+extern char vdso64_start, vdso64_end;
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *vma)
+{
+	current->mm->context.vdso_base = vma->vm_start;
+	return 0;
+}
+
+#ifdef CONFIG_64BIT
+static struct vm_special_mapping vdso64_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+#endif
+
+static struct vm_special_mapping vdso32_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+				int executable_stack)
+{
+
+	unsigned long vdso_text_start, vdso_text_len, map_base;
+	struct vm_special_mapping *vdso_mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+#ifdef CONFIG_64BIT
+	if (!is_compat_task()) {
+		vdso_text_len = &vdso64_end - &vdso64_start;
+		vdso_mapping = &vdso64_mapping;
+	} else
+#endif
+	{
+		vdso_text_len = &vdso32_end - &vdso32_start;
+		vdso_mapping = &vdso32_mapping;
+	}
+
+	map_base = mm->mmap_base;
+	if (current->flags & PF_RANDOMIZE)
+		map_base -= (get_random_int() & 0x1f) * PAGE_SIZE;
+
+	vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0);
+
+	/* VM_MAYWRITE for COW so gdb can set breakpoints */
+	vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       vdso_mapping);
+	if (IS_ERR(vma)) {
+		do_munmap(mm, vdso_text_start, PAGE_SIZE, NULL);
+		rc = PTR_ERR(vma);
+	} else {
+		current->mm->context.vdso_base = vdso_text_start;
+		rc = 0;
+	}
+
+	mmap_write_unlock(mm);
+	return rc;
+}
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+	int pages = (end - start) >> PAGE_SHIFT;
+	struct page **pagelist;
+	int i;
+
+	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+	if (!pagelist)
+		panic("%s: Cannot allocate page list for VDSO", __func__);
+	for (i = 0; i < pages; i++)
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+	return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+#ifdef CONFIG_64BIT
+	vdso64_mapping.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
+#endif
+	if (IS_ENABLED(CONFIG_COMPAT) || !IS_ENABLED(CONFIG_64BIT))
+		vdso32_mapping.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
+	return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
new file mode 100644
index 0000000000000..85b1c6d261d12
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -0,0 +1,53 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls
+#  -march=1.1 -mschedule=7100LC
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+		$(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO32__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CROSS32CC) -print-libgcc-file-name)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C  #  -U$(ARCH)
+
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC)
+	$(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S FORCE
+	$(call if_changed_dep,vdso32as)
+
+$(obj-cvdso32): %.o: %.c FORCE
+	$(call if_changed_dep,vdso32cc)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+      cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 0000000000000..da39d6cff7f16
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso32/note.S b/arch/parisc/kernel/vdso32/note.S
new file mode 100644
index 0000000000000..bb350918bebd1
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/note.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
+	.section name, flags;						      \
+	.balign 4;							      \
+	.long 1f - 0f;		/* name length */			      \
+	.long 3f - 2f;		/* data length */			      \
+	.long type;		/* note type */				      \
+0:	.asciz vendor;		/* vendor name */			      \
+1:	.balign 4;							      \
+2:
+
+#define ASM_ELF_NOTE_END						      \
+3:	.balign 4;		/* pad out section */			      \
+	.previous
+
+	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+	.long LINUX_VERSION_CODE
+	ASM_ELF_NOTE_END
diff --git a/arch/parisc/kernel/vdso32/restart_syscall.S b/arch/parisc/kernel/vdso32/restart_syscall.S
new file mode 100644
index 0000000000000..7e82008d7e40c
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/restart_syscall.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Syscall restart trampoline for 32 and 64 bits processes.
+ *
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+#include <linux/linkage.h>
+
+	.text
+
+ENTRY_CFI(__kernel_restart_syscall)
+	/*
+	 * Setup a trampoline to restart the syscall
+	 * with __NR_restart_syscall
+	 */
+
+	/* load return pointer */
+#if defined(__VDSO64__)
+	ldd	0(%sp), %r31
+#elif defined(__VDSO32__)
+	ldw	0(%sp), %r31
+#endif
+
+	be	0x100(%sr2, %r0)
+	ldi	__NR_restart_syscall, %r20
+
+ENDPROC_CFI(__kernel_restart_syscall)
diff --git a/arch/parisc/kernel/vdso32/sigtramp.S b/arch/parisc/kernel/vdso32/sigtramp.S
new file mode 100644
index 0000000000000..192da7077869c
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 32 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <tausq@debian.org>
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+	.text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+   a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+   is not on the stack, we need a new variant with different offsets and
+   data to tell gdb where to find the signal context on the stack.
+
+   Here we put the offset to the context data at the start of the trampoline
+   region and offset the first trampoline by 2 instructions. Please do
+   not change the trampoline as the code in gdb depends on the following
+   instruction sequence exactly.
+ */
+	.align 64
+	.word SIGFRAME_CONTEXT_REGS32
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+   the return address to get an address in the middle of the presumed
+   call instruction. Since we don't have a call here, we artifically
+   extend the range covered by the unwind info by adding a nop before
+   the real start.
+ */
+	nop
+
+	.globl __kernel_sigtramp_rt
+	.type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+	.proc
+	.callinfo FRAME=ASM_SIGFRAME_SIZE32,CALLS,SAVE_RP
+	.entry
+
+.Lsigrt_start = . - 4
+0:	ldi	0, %r25			/* (in_syscall=0) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+
+1:	ldi	1, %r25			/* (in_syscall=1) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+.Lsigrt_end:
+	.exit
+	.procend
+	.size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+
+	.section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack.  */
+#define PTREGS SIGFRAME_CONTEXT_REGS32	/* 32-bit process offset is -672 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+	.macro rsave regno,ofs
+	.byte 0x05		/* DW_CFA_offset_extended */
+	.uleb128 \regno;	/*   regno */
+	.uleb128 \ofs		/*   factored offset */
+	.endm
+
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.stringz "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 4		/* Data alignment factor */
+	.byte 89		/* Return address register column, iaoq[0] */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+	.byte 0x0f		/* DW_CFA_def_cfa_expresion */
+	.uleb128 9f - 1f	/*   length */
+1:
+	.byte 0x8e		/*   DW_OP_breg30 */
+	.sleb128 PTREGS
+9:
+	.balign 4
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.long .Lsigrt_start - .		/* PC start, length */
+	.long .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+
+	/* General registers */
+	rsave  1,  2
+	rsave  2,  3
+	rsave  3,  4
+	rsave  4,  5
+	rsave  5,  6
+	rsave  6,  7
+	rsave  7,  8
+	rsave  8,  9
+	rsave  9, 10
+	rsave 10, 11
+	rsave 11, 12
+	rsave 12, 13
+	rsave 13, 14
+	rsave 14, 15
+	rsave 15, 16
+	rsave 16, 17
+	rsave 17, 18
+	rsave 18, 19
+	rsave 19, 20
+	rsave 20, 21
+	rsave 21, 22
+	rsave 22, 23
+	rsave 23, 24
+	rsave 24, 25
+	rsave 25, 26
+	rsave 26, 27
+	rsave 27, 28
+	rsave 28, 29
+	rsave 29, 30
+	rsave 30, 31
+	rsave 31, 32
+
+	/* Floating-point registers */
+	rsave 32, 42
+	rsave 33, 43
+	rsave 34, 44
+	rsave 35, 45
+	rsave 36, 46
+	rsave 37, 47
+	rsave 38, 48
+	rsave 39, 49
+	rsave 40, 50
+	rsave 41, 51
+	rsave 42, 52
+	rsave 43, 53
+	rsave 44, 54
+	rsave 45, 55
+	rsave 46, 56
+	rsave 47, 57
+	rsave 48, 58
+	rsave 49, 59
+	rsave 50, 60
+	rsave 51, 61
+	rsave 52, 62
+	rsave 53, 63
+	rsave 54, 64
+	rsave 55, 65
+	rsave 56, 66
+	rsave 57, 67
+	rsave 58, 68
+	rsave 59, 69
+	rsave 60, 70
+	rsave 61, 71
+	rsave 62, 72
+	rsave 63, 73
+	rsave 64, 74
+	rsave 65, 75
+	rsave 66, 76
+	rsave 67, 77
+	rsave 68, 78
+	rsave 69, 79
+	rsave 70, 80
+	rsave 71, 81
+	rsave 72, 82
+	rsave 73, 83
+	rsave 74, 84
+	rsave 75, 85
+	rsave 76, 86
+	rsave 77, 87
+	rsave 78, 88
+	rsave 79, 89
+	rsave 80, 90
+	rsave 81, 91
+	rsave 82, 92
+	rsave 83, 93
+	rsave 84, 94
+	rsave 85, 95
+	rsave 86, 96
+	rsave 87, 97
+
+	/* SAR register */
+	rsave 88, 102
+
+	/* iaoq[0] return address register */
+	rsave 89, 100
+	.balign 4
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 0000000000000..d4aff3af5262e
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 32 bits vdso library
+ */
+#include <asm/vdso.h>
+#include <asm/page.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-hppa-linux")
+OUTPUT_ARCH(hppa)
+ENTRY(_start)
+
+SECTIONS
+{
+  . = VDSO_LBASE + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }			:text
+  .gnu.hash       : { *(.gnu.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  .note		  : { *(.note.*) } 			:text	:note
+
+  . = ALIGN (16);
+  .text :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  }
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1		: { *(.rodata1) }
+  .rodata2		: { *(.data.rel.ro) }
+
+  .eh_frame_hdr		: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+  .eh_frame		: { KEEP (*(.eh_frame)) }	:text
+  .gcc_except_table	: { *(.gcc_except_table) }
+  .fixup		: { *(.fixup) }
+
+  .dynamic		: { *(.dynamic) }		:text	:dynamic
+  .plt : { *(.plt) }
+  .got : { *(.got) }
+
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+
+  /* Stabs debugging sections are here too
+   */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  .debug 0 : { *(.debug) }
+  .line 0 : { *(.line) }
+
+  .debug_srcinfo 0 : { *(.debug_srcinfo) }
+  .debug_sfnames 0 : { *(.debug_sfnames) }
+
+  .debug_aranges 0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+
+  .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev 0 : { *(.debug_abbrev) }
+  .debug_line 0 : { *(.debug_line) }
+  .debug_frame 0 : { *(.debug_frame) }
+  .debug_str 0 : { *(.debug_str) }
+  .debug_loc 0 : { *(.debug_loc) }
+  .debug_macinfo 0 : { *(.debug_macinfo) }
+
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames 0 : { *(.debug_varnames) }
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  VDSO_VERSION_STRING {
+    global:
+	__kernel_sigtramp_rt32;
+	__kernel_restart_syscall32;
+    local: *;
+  };
+}
diff --git a/arch/parisc/kernel/vdso32/vdso32_wrapper.S b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 0000000000000..5ac06093e8ec2
--- /dev/null
+++ b/arch/parisc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/parisc/kernel/vdso32/vdso32.so"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile
new file mode 100644
index 0000000000000..a30f5ec5eb4bf
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/Makefile
@@ -0,0 +1,48 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = note.o sigtramp.o restart_syscall.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+		$(call ld-option, -Wl$(comma)--hash-style=sysv)
+asflags-y := -D__VDSO64__ -s
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+VDSO_LIBGCC := $(shell $(CC) -print-libgcc-file-name)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE
+
+# Force dependency (incbin is bad)
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC)
+	$(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+	$(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 0000000000000..37f05cb38dad4
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/parisc/kernel/vdso64/note.S b/arch/parisc/kernel/vdso64/note.S
new file mode 100644
index 0000000000000..bd1fa23597d62
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/note.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../vdso32/note.S"
diff --git a/arch/parisc/kernel/vdso64/restart_syscall.S b/arch/parisc/kernel/vdso64/restart_syscall.S
new file mode 100644
index 0000000000000..83004451f6b17
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/restart_syscall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../vdso32/restart_syscall.S"
diff --git a/arch/parisc/kernel/vdso64/sigtramp.S b/arch/parisc/kernel/vdso64/sigtramp.S
new file mode 100644
index 0000000000000..66a6d2b241e1a
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Signal trampolines for 64 bit processes.
+ *
+ * Copyright (C) 2006 Randolph Chung <tausq@debian.org>
+ * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de>
+ * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net>
+ */
+#include <asm/unistd.h>
+#include <linux/linkage.h>
+#include <generated/asm-offsets.h>
+
+	.text
+
+/* Gdb expects the trampoline is on the stack and the pc is offset from
+   a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline
+   is not on the stack, we need a new variant with different offsets and
+   data to tell gdb where to find the signal context on the stack.
+
+   Here we put the offset to the context data at the start of the trampoline
+   region and offset the first trampoline by 2 instructions. Please do
+   not change the trampoline as the code in gdb depends on the following
+   instruction sequence exactly.
+ */
+	.align 64
+	.word SIGFRAME_CONTEXT_REGS
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+   the return address to get an address in the middle of the presumed
+   call instruction. Since we don't have a call here, we artifically
+   extend the range covered by the unwind info by adding a nop before
+   the real start.
+ */
+	nop
+
+	.globl __kernel_sigtramp_rt
+	.type __kernel_sigtramp_rt, @function
+__kernel_sigtramp_rt:
+	.proc
+	.callinfo FRAME=ASM_SIGFRAME_SIZE,CALLS,SAVE_RP
+	.entry
+
+.Lsigrt_start = . - 4
+0:	ldi	0, %r25			/* (in_syscall=0) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+
+1:	ldi	1, %r25			/* (in_syscall=1) */
+	ldi  __NR_rt_sigreturn, %r20
+	ble  0x100(%sr2, %r0)
+	nop
+.Lsigrt_end:
+	.exit
+	.procend
+	.size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt
+
+	.section .eh_frame,"a",@progbits
+
+/* This is where the mcontext_t struct can be found on the stack.  */
+#define PTREGS SIGFRAME_CONTEXT_REGS	/* 64-bit process offset is -720 */
+
+/* Register REGNO can be found at offset OFS of the mcontext_t structure. */
+	.macro rsave regno,ofs
+	.byte 0x05		/* DW_CFA_offset_extended */
+	.uleb128 \regno;	/*   regno */
+	.uleb128 \ofs		/*   factored offset */
+	.endm
+
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.stringz "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 8		/* Data alignment factor */
+	.byte 61		/* Return address register column, iaoq[0] */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+	.byte 0x0f		/* DW_CFA_def_cfa_expresion */
+	.uleb128 9f - 1f	/*   length */
+1:
+	.byte 0x8e		/*   DW_OP_breg30 */
+	.sleb128 PTREGS
+9:
+	.balign 8
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.long .Lsigrt_start - .		/* PC start, length */
+	.long .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+
+	/* General registers */
+	rsave  1,  2
+	rsave  2,  3
+	rsave  3,  4
+	rsave  4,  5
+	rsave  5,  6
+	rsave  6,  7
+	rsave  7,  8
+	rsave  8,  9
+	rsave  9, 10
+	rsave 10, 11
+	rsave 11, 12
+	rsave 12, 13
+	rsave 13, 14
+	rsave 14, 15
+	rsave 15, 16
+	rsave 16, 17
+	rsave 17, 18
+	rsave 18, 19
+	rsave 19, 20
+	rsave 20, 21
+	rsave 21, 22
+	rsave 22, 23
+	rsave 23, 24
+	rsave 24, 25
+	rsave 25, 26
+	rsave 26, 27
+	rsave 27, 28
+	rsave 28, 29
+	rsave 29, 30
+	rsave 30, 31
+	rsave 31, 32
+
+	/* Floating-point registers */
+	rsave 32, 36
+	rsave 33, 37
+	rsave 34, 38
+	rsave 35, 39
+	rsave 36, 40
+	rsave 37, 41
+	rsave 38, 42
+	rsave 39, 43
+	rsave 40, 44
+	rsave 41, 45
+	rsave 42, 46
+	rsave 43, 47
+	rsave 44, 48
+	rsave 45, 49
+	rsave 46, 50
+	rsave 47, 51
+	rsave 48, 52
+	rsave 49, 53
+	rsave 50, 54
+	rsave 51, 55
+	rsave 52, 56
+	rsave 53, 57
+	rsave 54, 58
+	rsave 55, 59
+	rsave 56, 60
+	rsave 57, 61
+	rsave 58, 62
+	rsave 59, 63
+
+	/* SAR register */
+	rsave 60, 67
+
+	/* iaoq[0] return address register */
+	rsave 61, 65
+	.balign 8
+.Lfde0_end:
diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 0000000000000..de1fb4b19286b
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf64-hppa-linux")
+OUTPUT_ARCH(hppa:hppa2.0w)
+ENTRY(_start)
+
+SECTIONS
+{
+  . = VDSO_LBASE + SIZEOF_HEADERS;
+  .hash           : { *(.hash) }			:text
+  .gnu.hash       : { *(.gnu.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  .note		  : { *(.note.*) } 			:text	:note
+
+  . = ALIGN (16);
+  .text :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+  }
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+
+  /* Other stuff is appended to the text segment: */
+  .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1		: { *(.rodata1) }
+
+  .eh_frame_hdr		: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+  .eh_frame		: { KEEP (*(.eh_frame)) }	:text
+  .gcc_except_table	: { *(.gcc_except_table) }
+  .fixup		: { *(.fixup) }
+
+  .dynamic		: { *(.dynamic) }		:text	:dynamic
+  .plt : { *(.plt) }
+  .got : { *(.got) }
+
+  _end = .;
+  __end = .;
+  PROVIDE (end = .);
+
+
+  /* Stabs debugging sections are here too
+   */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+  .debug 0 : { *(.debug) }
+  .line 0 : { *(.line) }
+
+  .debug_srcinfo 0 : { *(.debug_srcinfo) }
+  .debug_sfnames 0 : { *(.debug_sfnames) }
+
+  .debug_aranges 0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+
+  .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev 0 : { *(.debug_abbrev) }
+  .debug_line 0 : { *(.debug_line) }
+  .debug_frame 0 : { *(.debug_frame) }
+  .debug_str 0 : { *(.debug_str) }
+  .debug_loc 0 : { *(.debug_loc) }
+  .debug_macinfo 0 : { *(.debug_macinfo) }
+
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames 0 : { *(.debug_varnames) }
+
+  /DISCARD/ : { *(.note.GNU-stack) }
+  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  note PT_NOTE FLAGS(4); /* PF_R */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  VDSO_VERSION_STRING {
+    global:
+	__kernel_sigtramp_rt64;
+	__kernel_restart_syscall64;
+    local: *;
+  };
+}
diff --git a/arch/parisc/kernel/vdso64/vdso64_wrapper.S b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 0000000000000..66f929482d3d4
--- /dev/null
+++ b/arch/parisc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/parisc/kernel/vdso64/vdso64.so"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
-- 
GitLab


From 9c379c65241707e44072139d782bc2dfec9b4ab3 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 13 Jan 2022 16:35:53 +0100
Subject: [PATCH 1260/1586] video/fbdev/stifb: Implement the stifb_fillrect()
 function

The stifb driver (for Artist/HCRX graphics on PA-RISC) was missing
the fillrect function.
Tested on a 715/64 PA-RISC machine and in qemu.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/stifb.c | 45 +++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 265865610edc6..bebb2eea6448b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1041,6 +1041,47 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	SETUP_FB(fb);
 }
 
+#define ARTIST_VRAM_SIZE			0x000804
+#define ARTIST_VRAM_SRC				0x000808
+#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL	0x000a04
+#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE	0x000b00
+#define ARTIST_SRC_BM_ACCESS			0x018008
+#define ARTIST_FGCOLOR				0x018010
+#define ARTIST_BGCOLOR				0x018014
+#define ARTIST_BITMAP_OP			0x01801c
+
+static void
+stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	if (rect->rop != ROP_COPY)
+		return cfb_fillrect(info, rect);
+
+	SETUP_HW(fb);
+
+	if (fb->info.var.bits_per_pixel == 32) {
+		WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+	} else {
+		WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+	}
+
+	WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP);
+	WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS);
+	NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000);
+	NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color);
+	WRITE_WORD(0, fb, ARTIST_BGCOLOR);
+
+	NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy));
+	SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height));
+
+	SETUP_FB(fb);
+}
+
 static void __init
 stifb_init_display(struct stifb_info *fb)
 {
@@ -1105,7 +1146,7 @@ static const struct fb_ops stifb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
-	.fb_fillrect	= cfb_fillrect,
+	.fb_fillrect	= stifb_fillrect,
 	.fb_copyarea	= stifb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
 };
@@ -1297,7 +1338,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 		goto out_err0;
 	}
 	info->screen_size = fix->smem_len;
-	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
+	info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
 	info->pseudo_palette = &fb->pseudo_palette;
 
 	/* This has to be done !!! */
-- 
GitLab


From b9f50eea4f277943842e82bc4d73f912a2f165c6 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 15 Feb 2022 20:56:27 +0100
Subject: [PATCH 1261/1586] parisc: Always use the self-extracting kernel
 feature

This patch drops the CONFIG_PARISC_SELF_EXTRACT option.

The palo boot loader is able to decompress a kernel which was compressed
with gzip. That possibility was useful when the Linux kernel
self-extracting feature wasn't implemented yet.

Beside the fact that the self-extracting feature offers much better
compression rates, we do support self-extracting kernels already since
kernel v4.14, so now it's really time to get rid of that old option and
always use the self-extractor.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig  | 12 ------------
 arch/parisc/Makefile |  9 ---------
 2 files changed, 21 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index f65268fd28caf..fbb114049a6b2 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -260,18 +260,6 @@ config PARISC_PAGE_SIZE_64KB
 
 endchoice
 
-config PARISC_SELF_EXTRACT
-	bool "Build kernel as self-extracting executable"
-	default y
-	help
-	  Say Y if you want to build the parisc kernel as a kind of
-	  self-extracting executable.
-
-	  If you say N here, the kernel will be compressed with gzip
-	  which can be loaded by the palo bootloader directly too.
-
-	  If you don't know what to do here, say Y.
-
 config SMP
 	bool "Symmetric multi-processing support"
 	help
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 6d7361364dadc..2a9387a93592e 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -15,12 +15,8 @@
 # Mike Shaver, Helge Deller and Martin K. Petersen
 #
 
-ifdef CONFIG_PARISC_SELF_EXTRACT
 boot := arch/parisc/boot
 KBUILD_IMAGE := $(boot)/bzImage
-else
-KBUILD_IMAGE := vmlinuz
-endif
 
 NM		= sh $(srctree)/arch/parisc/nm
 CHECKFLAGS	+= -D__hppa__=1
@@ -165,13 +161,8 @@ Image: vmlinux
 bzImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-ifdef CONFIG_PARISC_SELF_EXTRACT
 vmlinuz: bzImage
 	$(OBJCOPY) $(boot)/bzImage $@
-else
-vmlinuz: vmlinux
-	@$(KGZIP) -cf -9 $< > $@
-endif
 
 ifeq ($(KBUILD_EXTMOD),)
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
-- 
GitLab


From 46b4016f7d3b1c16c5eefcdf16dd32e84f4dbb60 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 17 Feb 2022 21:29:22 +0100
Subject: [PATCH 1262/1586] parisc: Add defines for various space register

Provide defines for space registers (SR_KERNEL, SR_USER, ...) which
should be used instead of hardcoding the values.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/assembly.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 6369082c6c746..ea0cb318b13d2 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -47,6 +47,12 @@
 #define PRIV_USER	3
 #define PRIV_KERNEL	0
 
+/* Space register used inside kernel */
+#define SR_KERNEL	0
+#define SR_TEMP1	1
+#define SR_TEMP2	2
+#define SR_USER		3
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_64BIT
-- 
GitLab


From 5613a930857ecfdb5f670992d55d2e7373d21d1b Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 16 Feb 2022 23:11:20 +0100
Subject: [PATCH 1263/1586] parisc: Use SR_USER and SR_KERNEL in get_user() and
 put_user()

Instead of hardcoding the space registers as strings, use the SR_USER
and SR_KERNEL constants to form the space register in the access
functions.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/uaccess.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 123d5f16cd9db..b3eb4541e441c 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -79,18 +79,18 @@ struct exception_table_entry {
 
 #define __get_user(val, ptr)				\
 ({							\
-	__get_user_internal("%%sr3,", val, ptr);	\
+	__get_user_internal(SR_USER, val, ptr);	\
 })
 
 #define __get_user_asm(sr, val, ldx, ptr)		\
 {							\
 	register long __gu_val;				\
 							\
-	__asm__("1: " ldx " 0(" sr "%2),%0\n"		\
+	__asm__("1: " ldx " 0(%%sr%2,%3),%0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "+r"(__gu_err)        \
-		: "r"(ptr));				\
+		: "i"(sr), "r"(ptr));			\
 							\
 	(val) = (__force __typeof__(*(ptr))) __gu_val;	\
 }
@@ -100,7 +100,7 @@ struct exception_table_entry {
 {							\
 	type __z;					\
 	long __err;					\
-	__err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+	__err = __get_user_internal(SR_KERNEL, __z, (type *)(src)); \
 	if (unlikely(__err))				\
 		goto err_label;				\
 	else						\
@@ -118,13 +118,13 @@ struct exception_table_entry {
 	} __gu_tmp;					\
 							\
 	__asm__("   copy %%r0,%R0\n"			\
-		"1: ldw 0(" sr "%2),%0\n"		\
-		"2: ldw 4(" sr "%2),%R0\n"		\
+		"1: ldw 0(%%sr%2,%3),%0\n"		\
+		"2: ldw 4(%%sr%2,%3),%R0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
 		: "=&r"(__gu_tmp.l), "+r"(__gu_err)	\
-		: "r"(ptr));				\
+		: "i"(sr), "r"(ptr));			\
 							\
 	(val) = __gu_tmp.t;				\
 }
@@ -151,14 +151,14 @@ struct exception_table_entry {
 ({								\
 	__typeof__(&*(ptr)) __ptr = ptr;			\
 	__typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x);	\
-	__put_user_internal("%%sr3,", __x, __ptr);		\
+	__put_user_internal(SR_USER, __x, __ptr);		\
 })
 
 #define __put_kernel_nofault(dst, src, type, err_label)		\
 {								\
 	type __z = *(type *)(src);				\
 	long __err;						\
-	__err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+	__err = __put_user_internal(SR_KERNEL, __z, (type *)(dst)); \
 	if (unlikely(__err))					\
 		goto err_label;					\
 }
@@ -178,24 +178,24 @@ struct exception_table_entry {
 
 #define __put_user_asm(sr, stx, x, ptr)				\
 	__asm__ __volatile__ (					\
-		"1: " stx " %2,0(" sr "%1)\n"			\
+		"1: " stx " %1,0(%%sr%2,%3)\n"			\
 		"9:\n"						\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
 		: "+r"(__pu_err)				\
-		: "r"(ptr), "r"(x))
+		: "r"(x), "i"(sr), "r"(ptr))
 
 
 #if !defined(CONFIG_64BIT)
 
 #define __put_user_asm64(sr, __val, ptr) do {			\
 	__asm__ __volatile__ (					\
-		"1: stw %2,0(" sr "%1)\n"			\
-		"2: stw %R2,4(" sr "%1)\n"			\
+		"1: stw %1,0(%%sr%2,%3)\n"			\
+		"2: stw %R1,4(%%sr%2,%3)\n"			\
 		"9:\n"						\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)		\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)		\
 		: "+r"(__pu_err)				\
-		: "r"(ptr), "r"(__val));			\
+		: "r"(__val), "i"(sr), "r"(ptr));		\
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
-- 
GitLab


From 360bd6c658076f9264760364fe95e6e78cbd188a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 17 Feb 2022 15:26:27 +0100
Subject: [PATCH 1264/1586] parisc: Use constants to encode the space registers
 like SR_KERNEL

Use the provided space register constants instead of hardcoded values.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cache.h         | 11 ++++-------
 arch/parisc/include/asm/mmu_context.h   |  2 +-
 arch/parisc/include/asm/pgtable.h       |  6 +++---
 arch/parisc/include/asm/special_insns.h |  4 ++--
 arch/parisc/kernel/cache.c              |  8 ++++----
 arch/parisc/kernel/pci-dma.c            |  4 ++--
 arch/parisc/lib/memcpy.c                | 16 ++++++++--------
 7 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index d53e9e27dba00..5032e758594e9 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -39,16 +39,13 @@ extern int icache_stride;
 extern struct pdc_cache_info cache_info;
 void parisc_setup_cache_timing(void);
 
-#define pdtlb(addr)	asm volatile("pdtlb 0(%%sr1,%0)" \
+#define pdtlb(sr, addr)	asm volatile("pdtlb 0(%%sr%0,%1)" \
 			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
-			: : "r" (addr) : "memory")
-#define pitlb(addr)	asm volatile("pitlb 0(%%sr1,%0)" \
+			: : "i"(sr), "r" (addr) : "memory")
+#define pitlb(sr, addr)	asm volatile("pitlb 0(%%sr%0,%1)" \
 			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
 			ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \
-			: : "r" (addr) : "memory")
-#define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)"   \
-			ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
-			: : "r" (addr) : "memory")
+			: : "i"(sr), "r" (addr) : "memory")
 
 #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \
 			ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index e788e995bd7a6..c9187fe836a31 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -43,7 +43,7 @@ static inline unsigned long __space_to_prot(mm_context_t context)
 
 static inline void load_context(mm_context_t context)
 {
-	mtsp(context.space_id, 3);
+	mtsp(context.space_id, SR_USER);
 	mtctl(__space_to_prot(context), 8);
 }
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index f0393f00c2ff8..7dff736936d00 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -70,9 +70,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 	unsigned long flags;
 
 	purge_tlb_start(flags);
-	mtsp(mm->context.space_id, 1);
-	pdtlb(addr);
-	pitlb(addr);
+	mtsp(mm->context.space_id, SR_TEMP1);
+	pdtlb(SR_TEMP1, addr);
+	pitlb(SR_TEMP1, addr);
 	purge_tlb_end(flags);
 }
 
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index 16ee41e77174f..41b3ddbd344ce 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -55,8 +55,8 @@ static inline void set_eiem(unsigned long val)
 #define mfsp(reg)	({		\
 	unsigned long cr;		\
 	__asm__ __volatile__(		\
-		"mfsp " #reg ",%0" :	\
-		 "=r" (cr)		\
+		"mfsp %%sr%1,%0"	\
+		: "=r" (cr) : "i"(reg)	\
 	);				\
 	cr;				\
 })
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 00dc82658856e..59c2d079f1789 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -457,7 +457,7 @@ void flush_kernel_dcache_page_addr(void *addr)
 
 	flush_kernel_dcache_page_asm(addr);
 	purge_tlb_start(flags);
-	pdtlb_kernel(addr);
+	pdtlb(SR_KERNEL, addr);
 	purge_tlb_end(flags);
 }
 EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
@@ -496,9 +496,9 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
 	   but cause a purge request to be broadcast to other TLBs.  */
 	while (start < end) {
 		purge_tlb_start(flags);
-		mtsp(sid, 1);
-		pdtlb(start);
-		pitlb(start);
+		mtsp(sid, SR_TEMP1);
+		pdtlb(SR_TEMP1, start);
+		pitlb(SR_TEMP1, start);
 		purge_tlb_end(flags);
 		start += PAGE_SIZE;
 	}
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 36a57aa38e87e..160996f2198ef 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -91,7 +91,7 @@ static inline int map_pte_uncached(pte_t * pte,
 			printk(KERN_ERR "map_pte_uncached: page already exists\n");
 		purge_tlb_start(flags);
 		set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
-		pdtlb_kernel(orig_vaddr);
+		pdtlb(SR_KERNEL, orig_vaddr);
 		purge_tlb_end(flags);
 		vaddr += PAGE_SIZE;
 		orig_vaddr += PAGE_SIZE;
@@ -175,7 +175,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr,
 
 		pte_clear(&init_mm, vaddr, pte);
 		purge_tlb_start(flags);
-		pdtlb_kernel(orig_vaddr);
+		pdtlb(SR_KERNEL, orig_vaddr);
 		purge_tlb_end(flags);
 		vaddr += PAGE_SIZE;
 		orig_vaddr += PAGE_SIZE;
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index ea70a0e083213..5fc0c852c84c8 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -13,8 +13,8 @@
 #include <linux/compiler.h>
 #include <linux/uaccess.h>
 
-#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3))
-#define get_kernel_space() (0)
+#define get_user_space()	mfsp(SR_USER)
+#define get_kernel_space()	SR_KERNEL
 
 /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
 extern unsigned long pa_memcpy(void *dst, const void *src,
@@ -23,8 +23,8 @@ extern unsigned long pa_memcpy(void *dst, const void *src,
 unsigned long raw_copy_to_user(void __user *dst, const void *src,
 			       unsigned long len)
 {
-	mtsp(get_kernel_space(), 1);
-	mtsp(get_user_space(), 2);
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_user_space(), SR_TEMP2);
 	return pa_memcpy((void __force *)dst, src, len);
 }
 EXPORT_SYMBOL(raw_copy_to_user);
@@ -32,16 +32,16 @@ EXPORT_SYMBOL(raw_copy_to_user);
 unsigned long raw_copy_from_user(void *dst, const void __user *src,
 			       unsigned long len)
 {
-	mtsp(get_user_space(), 1);
-	mtsp(get_kernel_space(), 2);
+	mtsp(get_user_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
 	return pa_memcpy(dst, (void __force *)src, len);
 }
 EXPORT_SYMBOL(raw_copy_from_user);
 
 void * memcpy(void * dst,const void *src, size_t count)
 {
-	mtsp(get_kernel_space(), 1);
-	mtsp(get_kernel_space(), 2);
+	mtsp(get_kernel_space(), SR_TEMP1);
+	mtsp(get_kernel_space(), SR_TEMP2);
 	pa_memcpy(dst, src, count);
 	return dst;
 }
-- 
GitLab


From 8278cc16266326f23c05f94d748569faa81319bb Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 17 Feb 2022 13:12:39 +0100
Subject: [PATCH 1265/1586] parisc: Reduce code size by optimizing
 get_current() function calls

The get_current() code uses the mfctl() macro to get the pointer to the
current task struct from %cr30. The problem with the mfctl() macro is,
that it is marked volatile which is basically correct, because mfctl()
is used to get e.g. the current internal timer or interrupt flags as
well.

But specifically the task struct pointer (%cr30) doesn't change over
time when the kernel executes code for a task.

So, by dropping the volatile when retrieving %cr30 the compiler is now
able to get this value only once and optimize the generated code a lot.

A bloat-o-meter comparism shows that this patch saves ~5kB kernel code
on a 32-bit kernel and ~6kB kernel code on a 64-bit kernel.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/current.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h
index 568b739e42afa..dc7aea07c3f38 100644
--- a/arch/parisc/include/asm/current.h
+++ b/arch/parisc/include/asm/current.h
@@ -2,14 +2,16 @@
 #ifndef _ASM_PARISC_CURRENT_H
 #define _ASM_PARISC_CURRENT_H
 
-#include <asm/special_insns.h>
-
 #ifndef __ASSEMBLY__
 struct task_struct;
 
 static __always_inline struct task_struct *get_current(void)
 {
-	return (struct task_struct *) mfctl(30);
+	struct task_struct *ts;
+
+	/* do not use mfctl() macro as it is marked volatile */
+	asm( "mfctl %%cr30,%0" : "=r" (ts) );
+	return ts;
 }
 
 #define current get_current()
-- 
GitLab


From d1434e03b2913c28d85e429eea20c53993fbe1af Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 19 Feb 2022 00:53:05 +0100
Subject: [PATCH 1266/1586] parisc/unaligned: Use EFAULT fixup handler in
 unaligned handlers

Convert the inline assembly code to use the automatic EFAULT exception
handler. With that the fixup code can be dropped.

The other change is to allow double-word only when a 64-bit kernel is
used instead of depending on CONFIG_PA20.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 160 ++++++++++++---------------------
 1 file changed, 55 insertions(+), 105 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 286cec4d86d7b..857dcc790122a 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -31,13 +31,6 @@
 #define RFMT "%08lx"
 #endif
 
-#define FIXUP_BRANCH(lbl) \
-	"\tldil L%%" #lbl ", %%r1\n"			\
-	"\tldo R%%" #lbl "(%%r1), %%r1\n"		\
-	"\tbv,n %%r0(%%r1)\n"
-/* If you use FIXUP_BRANCH, then you must list this clobber */
-#define FIXUP_BRANCH_CLOBBER "r1"
-
 /* 1111 1100 0000 0000 0001 0011 1100 0000 */
 #define OPCODE1(a,b,c)	((a)<<26|(b)<<12|(c)<<6) 
 #define OPCODE2(a,b)	((a)<<26|(b)<<1)
@@ -114,7 +107,6 @@
 #define IM14(i) IM((i),14)
 
 #define ERR_NOTHANDLED	-1
-#define ERR_PAGEFAULT	-2
 
 int unaligned_enabled __read_mostly = 1;
 
@@ -122,7 +114,7 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 {
 	unsigned long saddr = regs->ior;
 	unsigned long val = 0;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", 
 		regs->isr, regs->ior, toreg);
@@ -132,17 +124,12 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 "1:	ldbs	0(%%sr1,%3), %%r20\n"
 "2:	ldbs	1(%%sr1,%3), %0\n"
 "	depw	%%r20, 23, 24, %0\n"
-"	copy	%%r0, %1\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
-	: "=r" (val), "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "=r" (val), "+r" (ret)
 	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r20", FIXUP_BRANCH_CLOBBER );
+	: "r20" );
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
@@ -156,7 +143,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
 	unsigned long val = 0;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", 
 		regs->isr, regs->ior, toreg);
@@ -170,17 +157,12 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 "	subi	32,%%r19,%%r19\n"
 "	mtctl	%%r19,11\n"
 "	vshd	%0,%%r20,%0\n"
-"	copy	%%r0, %1\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b, 4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b, 4b)
-	: "=r" (val), "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "=r" (val), "+r" (ret)
 	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20" );
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
@@ -195,16 +177,15 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
 	__u64 val = 0;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
 		regs->isr, regs->ior, toreg);
-#ifdef CONFIG_PA20
 
-#ifndef CONFIG_64BIT
-	if (!flop)
-		return -1;
-#endif
+	if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+		return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
 	__asm__ __volatile__  (
 "	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
 "	mtsp	%4, %%sr1\n"
@@ -214,17 +195,12 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 "	subi	64,%%r19,%%r19\n"
 "	mtsar	%%r19\n"
 "	shrpd	%0,%%r20,%%sar,%0\n"
-"	copy	%%r0, %1\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %1\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (val), "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "=r" (val), "+r" (ret)
 	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20" );
 #else
     {
 	unsigned long valh=0,vall=0;
@@ -239,18 +215,13 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 "	mtsar	%%r19\n"
 "	vshd	%0,%1,%0\n"
 "	vshd	%1,%%r20,%1\n"
-"	copy	%%r0, %2\n"
 "4:	\n"
-"	.section .fixup,\"ax\"\n"
-"5:	ldi	-2, %2\n"
-	FIXUP_BRANCH(4b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,5b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,5b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,5b)
-	: "=r" (valh), "=r" (vall), "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+	: "=r" (valh), "=r" (vall), "+r" (ret)
 	: "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20" );
 	val=((__u64)valh<<32)|(__u64)vall;
     }
 #endif
@@ -268,7 +239,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 static int emulate_sth(struct pt_regs *regs, int frreg)
 {
 	unsigned long val = regs->gr[frreg];
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (!frreg)
 		val = 0;
@@ -281,17 +252,12 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
 "	extrw,u %1, 23, 8, %%r19\n"
 "1:	stb %1, 1(%%sr1, %2)\n"
 "2:	stb %%r19, 0(%%sr1, %2)\n"
-"	copy	%%r0, %0\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %0\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (ret)
 	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", FIXUP_BRANCH_CLOBBER );
+	: "r19" );
 
 	return ret;
 }
@@ -299,7 +265,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
 static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 {
 	unsigned long val;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (flop)
 		val = ((__u32*)(regs->fr))[frreg];
@@ -328,24 +294,19 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 "	or	%%r1, %%r21, %%r21\n"
 "	stw	%%r20,0(%%sr1,%2)\n"
 "	stw	%%r21,4(%%sr1,%2)\n"
-"	copy	%%r0, %0\n"
 "3:	\n"
-"	.section .fixup,\"ax\"\n"
-"4:	ldi	-2, %0\n"
-	FIXUP_BRANCH(3b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,4b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,4b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+	: "+r" (ret)
 	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r22", "r1" );
 
 	return ret;
 }
 static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 {
 	__u64 val;
-	int ret;
+	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (flop)
 		val = regs->fr[frreg];
@@ -357,11 +318,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 	DPRINTF("store r%d (0x%016llx) to " RFMT ":" RFMT " for 8 bytes\n", frreg, 
 		val,  regs->isr, regs->ior);
 
-#ifdef CONFIG_PA20
-#ifndef CONFIG_64BIT
-	if (!flop)
-		return -1;
-#endif
+	if (!IS_ENABLED(CONFIG_64BIT) && !flop)
+		return ERR_NOTHANDLED;
+
+#ifdef CONFIG_64BIT
 	__asm__ __volatile__ (
 "	mtsp %3, %%sr1\n"
 "	depd,z	%2, 60, 3, %%r19\n"
@@ -378,19 +338,14 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 "	or	%%r1, %%r21, %%r21\n"
 "3:	std	%%r20,0(%%sr1,%2)\n"
 "4:	std	%%r21,8(%%sr1,%2)\n"
-"	copy	%%r0, %0\n"
 "5:	\n"
-"	.section .fixup,\"ax\"\n"
-"6:	ldi	-2, %0\n"
-	FIXUP_BRANCH(5b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,6b)
-	ASM_EXCEPTIONTABLE_ENTRY(4b,6b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+	: "+r" (ret)
 	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r22", "r1" );
 #else
     {
 	unsigned long valh=(val>>32),vall=(val&0xffffffffl);
@@ -412,20 +367,15 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
 "3:	stw	%1,0(%%sr1,%3)\n"
 "4:	stw	%%r1,4(%%sr1,%3)\n"
 "5:	stw	%2,8(%%sr1,%3)\n"
-"	copy	%%r0, %0\n"
 "6:	\n"
-"	.section .fixup,\"ax\"\n"
-"7:	ldi	-2, %0\n"
-	FIXUP_BRANCH(6b)
-"	.previous\n"
-	ASM_EXCEPTIONTABLE_ENTRY(1b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(3b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(4b,7b)
-	ASM_EXCEPTIONTABLE_ENTRY(5b,7b)
-	: "=r" (ret)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
+	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+	: "+r" (ret)
 	: "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr)
-	: "r19", "r20", "r21", "r1", FIXUP_BRANCH_CLOBBER );
+	: "r19", "r20", "r21", "r1" );
     }
 #endif
 
@@ -547,7 +497,7 @@ void handle_unaligned(struct pt_regs *regs)
 		ret = emulate_stw(regs, R2(regs->iir),0);
 		break;
 
-#ifdef CONFIG_PA20
+#ifdef CONFIG_64BIT
 	case OPCODE_LDD_I:
 	case OPCODE_LDDA_I:
 	case OPCODE_LDD_S:
@@ -606,7 +556,7 @@ void handle_unaligned(struct pt_regs *regs)
 		flop=1;
 		ret = emulate_std(regs, R2(regs->iir),1);
 		break;
-#ifdef CONFIG_PA20
+#ifdef CONFIG_64BIT
 	case OPCODE_LDD_L:
 		ret = emulate_ldd(regs, R2(regs->iir),0);
 		break;
@@ -673,7 +623,7 @@ void handle_unaligned(struct pt_regs *regs)
 		printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret);
 		die_if_kernel("Unaligned data reference", regs, 28);
 
-		if (ret == ERR_PAGEFAULT)
+		if (ret == -EFAULT)
 		{
 			force_sig_fault(SIGSEGV, SEGV_MAPERR,
 					(void __user *)regs->ior);
-- 
GitLab


From f85b2af1f046e4a91e94eba54f425683183cdc62 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Feb 2022 17:39:15 +0100
Subject: [PATCH 1267/1586] parisc/unaligned: Rewrite inline assembly of
 emulate_ldh()

Convert to use real temp variables instead of clobbering processor
registers.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 857dcc790122a..b923bc135d293 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -113,7 +113,7 @@ int unaligned_enabled __read_mostly = 1;
 static int emulate_ldh(struct pt_regs *regs, int toreg)
 {
 	unsigned long saddr = regs->ior;
-	unsigned long val = 0;
+	unsigned long val = 0, temp1;
 	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", 
@@ -121,15 +121,14 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 
 	__asm__ __volatile__  (
 "	mtsp	%4, %%sr1\n"
-"1:	ldbs	0(%%sr1,%3), %%r20\n"
+"1:	ldbs	0(%%sr1,%3), %2\n"
 "2:	ldbs	1(%%sr1,%3), %0\n"
-"	depw	%%r20, 23, 24, %0\n"
+"	depw	%2, 23, 24, %0\n"
 "3:	\n"
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
-	: "=r" (val), "+r" (ret)
-	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r20" );
+	: "+r" (val), "+r" (ret), "=&r" (temp1)
+	: "r" (saddr), "r" (regs->isr) );
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
-- 
GitLab


From e8aa7b17fe41c4485da4c96184a375e5b40129c8 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Feb 2022 17:42:19 +0100
Subject: [PATCH 1268/1586] parisc/unaligned: Rewrite inline assembly of
 emulate_ldw()

Convert to use real temp variables instead of clobbering processor
registers.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index b923bc135d293..e8bc0cd8140c4 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -141,27 +141,26 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
-	unsigned long val = 0;
+	unsigned long val = 0, temp1, temp2;
 	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", 
 		regs->isr, regs->ior, toreg);
 
 	__asm__ __volatile__  (
-"	zdep	%3,28,2,%%r19\n"		/* r19=(ofs&3)*8 */
-"	mtsp	%4, %%sr1\n"
-"	depw	%%r0,31,2,%3\n"
-"1:	ldw	0(%%sr1,%3),%0\n"
-"2:	ldw	4(%%sr1,%3),%%r20\n"
-"	subi	32,%%r19,%%r19\n"
-"	mtctl	%%r19,11\n"
-"	vshd	%0,%%r20,%0\n"
+"	zdep	%4,28,2,%2\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%5, %%sr1\n"
+"	depw	%%r0,31,2,%4\n"
+"1:	ldw	0(%%sr1,%4),%0\n"
+"2:	ldw	4(%%sr1,%4),%3\n"
+"	subi	32,%4,%2\n"
+"	mtctl	%2,11\n"
+"	vshd	%0,%3,%0\n"
 "3:	\n"
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
-	: "=r" (val), "+r" (ret)
-	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20" );
+	: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
+	: "r" (saddr), "r" (regs->isr) );
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
-- 
GitLab


From 427c1073a2a14fe38ef0fc98d3635be51d7f7818 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Feb 2022 17:43:36 +0100
Subject: [PATCH 1269/1586] parisc/unaligned: Rewrite 32-bit inline assembly of
 emulate_ldd()

Convert to use real temp variables instead of clobbering processor
registers.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index e8bc0cd8140c4..8fbe000b33e70 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -201,26 +201,24 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 	: "r19", "r20" );
 #else
     {
-	unsigned long valh=0,vall=0;
+	unsigned long shift, temp1;
 	__asm__ __volatile__  (
-"	zdep	%5,29,2,%%r19\n"		/* r19=(ofs&3)*8 */
-"	mtsp	%6, %%sr1\n"
-"	dep	%%r0,31,2,%5\n"
-"1:	ldw	0(%%sr1,%5),%0\n"
-"2:	ldw	4(%%sr1,%5),%1\n"
-"3:	ldw	8(%%sr1,%5),%%r20\n"
-"	subi	32,%%r19,%%r19\n"
-"	mtsar	%%r19\n"
-"	vshd	%0,%1,%0\n"
-"	vshd	%1,%%r20,%1\n"
+"	zdep	%2,29,2,%3\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%5, %%sr1\n"
+"	dep	%%r0,31,2,%2\n"
+"1:	ldw	0(%%sr1,%2),%0\n"
+"2:	ldw	4(%%sr1,%2),%R0\n"
+"3:	ldw	8(%%sr1,%2),%4\n"
+"	subi	32,%3,%3\n"
+"	mtsar	%3\n"
+"	vshd	%0,%R0,%0\n"
+"	vshd	%R0,%4,%R0\n"
 "4:	\n"
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
-	: "=r" (valh), "=r" (vall), "+r" (ret)
-	: "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20" );
-	val=((__u64)valh<<32)|(__u64)vall;
+	: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+	: "r" (regs->isr) );
     }
 #endif
 
-- 
GitLab


From 3029ce31af158631feffdab067e99bb22b7369c4 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Feb 2022 17:45:10 +0100
Subject: [PATCH 1270/1586] parisc/unaligned: Rewrite 32-bit inline assembly of
 emulate_sth()

Convert to use real temp variables instead of clobbering processor
registers.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 8fbe000b33e70..0d6e90d6f4975 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -234,7 +234,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 
 static int emulate_sth(struct pt_regs *regs, int frreg)
 {
-	unsigned long val = regs->gr[frreg];
+	unsigned long val = regs->gr[frreg], temp1;
 	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	if (!frreg)
@@ -244,16 +244,15 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
 		val, regs->isr, regs->ior);
 
 	__asm__ __volatile__ (
-"	mtsp %3, %%sr1\n"
-"	extrw,u %1, 23, 8, %%r19\n"
-"1:	stb %1, 1(%%sr1, %2)\n"
-"2:	stb %%r19, 0(%%sr1, %2)\n"
+"	mtsp %4, %%sr1\n"
+"	extrw,u %2, 23, 8, %1\n"
+"1:	stb %1, 0(%%sr1, %3)\n"
+"2:	stb %2, 1(%%sr1, %3)\n"
 "3:	\n"
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
-	: "+r" (ret)
-	: "r" (val), "r" (regs->ior), "r" (regs->isr)
-	: "r19" );
+	: "+r" (ret), "=&r" (temp1)
+	: "r" (val), "r" (regs->ior), "r" (regs->isr) );
 
 	return ret;
 }
-- 
GitLab


From e5e9e7f222e5b3da5bc441d315d1ce7b000ed1be Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 24 Feb 2022 20:15:34 +0100
Subject: [PATCH 1271/1586] parisc/unaligned: Enhance user-space visible output

Userspace is up to now limited to 32-bit, so it's sufficient to print
only 32-bit values when showing pointer addresses.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unaligned.c | 36 ++++++++++------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 0d6e90d6f4975..ed1e88a74dc42 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -3,14 +3,11 @@
  *    Unaligned memory access handler
  *
  *    Copyright (C) 2001 Randolph Chung <tausq@debian.org>
+ *    Copyright (C) 2022 Helge Deller <deller@gmx.de>
  *    Significantly tweaked by LaMont Jones <lamont@debian.org>
  */
 
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
@@ -25,11 +22,7 @@
 #define DPRINTF(fmt, args...)
 #endif
 
-#ifdef CONFIG_64BIT
-#define RFMT "%016lx"
-#else
-#define RFMT "%08lx"
-#endif
+#define RFMT "%#08lx"
 
 /* 1111 1100 0000 0000 0001 0011 1100 0000 */
 #define OPCODE1(a,b,c)	((a)<<26|(b)<<12|(c)<<6) 
@@ -130,7 +123,7 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
 	: "+r" (val), "+r" (ret), "=&r" (temp1)
 	: "r" (saddr), "r" (regs->isr) );
 
-	DPRINTF("val = 0x" RFMT "\n", val);
+	DPRINTF("val = " RFMT "\n", val);
 
 	if (toreg)
 		regs->gr[toreg] = val;
@@ -162,7 +155,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 	: "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
 	: "r" (saddr), "r" (regs->isr) );
 
-	DPRINTF("val = 0x" RFMT "\n", val);
+	DPRINTF("val = " RFMT "\n", val);
 
 	if (flop)
 		((__u32*)(regs->fr))[toreg] = val;
@@ -240,7 +233,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
 	if (!frreg)
 		val = 0;
 
-	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, 
+	DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg,
 		val, regs->isr, regs->ior);
 
 	__asm__ __volatile__ (
@@ -269,7 +262,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
 	else
 		val = 0;
 
-	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, 
+	DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg,
 		val, regs->isr, regs->ior);
 
 
@@ -383,7 +376,6 @@ void handle_unaligned(struct pt_regs *regs)
 	unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
 	int modify = 0;
 	int ret = ERR_NOTHANDLED;
-	register int flop=0;	/* true if this is a flop */
 
 	__inc_irq_stat(irq_unaligned_count);
 
@@ -395,10 +387,10 @@ void handle_unaligned(struct pt_regs *regs)
 
 		if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
 			__ratelimit(&ratelimit)) {
-			char buf[256];
-			sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
-				current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
-			printk(KERN_WARNING "%s", buf);
+			printk(KERN_WARNING "%s(%d): unaligned access to " RFMT
+				" at ip " RFMT " (iir " RFMT ")\n",
+				current->comm, task_pid_nr(current), regs->ior,
+				regs->iaoq[0], regs->iir);
 #ifdef DEBUG_UNALIGNED
 			show_regs(regs);
 #endif		
@@ -510,13 +502,11 @@ void handle_unaligned(struct pt_regs *regs)
 	case OPCODE_FLDWS:
 	case OPCODE_FLDWXR:
 	case OPCODE_FLDWSR:
-		flop=1;
 		ret = emulate_ldw(regs,FR3(regs->iir),1);
 		break;
 
 	case OPCODE_FLDDX:
 	case OPCODE_FLDDS:
-		flop=1;
 		ret = emulate_ldd(regs,R3(regs->iir),1);
 		break;
 
@@ -524,13 +514,11 @@ void handle_unaligned(struct pt_regs *regs)
 	case OPCODE_FSTWS:
 	case OPCODE_FSTWXR:
 	case OPCODE_FSTWSR:
-		flop=1;
 		ret = emulate_stw(regs,FR3(regs->iir),1);
 		break;
 
 	case OPCODE_FSTDX:
 	case OPCODE_FSTDS:
-		flop=1;
 		ret = emulate_std(regs,R3(regs->iir),1);
 		break;
 
@@ -544,11 +532,9 @@ void handle_unaligned(struct pt_regs *regs)
 	switch (regs->iir & OPCODE2_MASK)
 	{
 	case OPCODE_FLDD_L:
-		flop=1;
 		ret = emulate_ldd(regs,R2(regs->iir),1);
 		break;
 	case OPCODE_FSTD_L:
-		flop=1;
 		ret = emulate_std(regs, R2(regs->iir),1);
 		break;
 #ifdef CONFIG_64BIT
@@ -563,7 +549,6 @@ void handle_unaligned(struct pt_regs *regs)
 	switch (regs->iir & OPCODE3_MASK)
 	{
 	case OPCODE_FLDW_L:
-		flop=1;
 		ret = emulate_ldw(regs, R2(regs->iir), 1);
 		break;
 	case OPCODE_LDW_M:
@@ -571,7 +556,6 @@ void handle_unaligned(struct pt_regs *regs)
 		break;
 
 	case OPCODE_FSTW_L:
-		flop=1;
 		ret = emulate_stw(regs, R2(regs->iir),1);
 		break;
 	case OPCODE_STW_M:
-- 
GitLab


From a58e9d0984e8dad53f17ec73ae3c1cc7f8d88151 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Fri, 11 Mar 2022 18:59:12 +0000
Subject: [PATCH 1272/1586] parisc: Increase parisc_cache_flush_threshold
 setting

In testing the "Fix non-access data TLB cache flush faults" change,
I noticed a significant improvement in glibc build and check times.
This led me to investigate the parisc_cache_flush_threshold setting.
It determines when we switch from line flushing to whole cache flushing.

It turned out that the parisc_cache_flush_threshold setting on
mako and mako2 machines (PA8800 and PA8900 processors) was way too
small. Adjusting this setting provided almost a factor two improvement
in the glibc build and check time.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/cache.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 59c2d079f1789..81f36d6407f0c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -388,7 +388,7 @@ void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
 	unsigned long size;
-	unsigned long threshold;
+	unsigned long threshold, threshold2;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -403,8 +403,20 @@ void __init parisc_setup_cache_timing(void)
 		alltime, size, rangetime);
 
 	threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
-	if (threshold > cache_info.dc_size)
-		threshold = cache_info.dc_size;
+
+	/*
+	 * The threshold computed above isn't very reliable since the
+	 * flush times depend greatly on the percentage of dirty lines
+	 * in the flush range. Further, the whole cache time doesn't
+	 * include the time to refill lines that aren't in the mm/vma
+	 * being flushed. By timing glibc build and checks on mako cpus,
+	 * the following formula seems to work reasonably well. The
+	 * value from the timing calculation is too small, and increases
+	 * build and check times by almost a factor two.
+	 */
+	threshold2 = cache_info.dc_size * num_online_cpus();
+	if (threshold2 > threshold)
+		threshold = threshold2;
 	if (threshold)
 		parisc_cache_flush_threshold = threshold;
 	printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
-- 
GitLab


From 54fc9851c0e0bec8012deaa87fe540d6e1739ac2 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 8 Mar 2022 16:05:33 +0800
Subject: [PATCH 1273/1586] dt-bindings: interrupt-controller: Add Qualcomm MPM
 support

It adds DT binding support for Qualcomm MPM interrupt controller.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220308080534.3384532-2-shawn.guo@linaro.org
---
 .../interrupt-controller/qcom,mpm.yaml        | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml

diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
new file mode 100644
index 0000000000000..509d20c091af8
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/qcom,mpm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcom MPM Interrupt Controller
+
+maintainers:
+  - Shawn Guo <shawn.guo@linaro.org>
+
+description:
+  Qualcomm Technologies Inc. SoCs based on the RPM architecture have a
+  MSM Power Manager (MPM) that is in always-on domain. In addition to managing
+  resources during sleep, the hardware also has an interrupt controller that
+  monitors the interrupts when the system is asleep, wakes up the APSS when
+  one of these interrupts occur and replays it to GIC interrupt controller
+  after GIC becomes operational.
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: qcom,mpm
+
+  reg:
+    maxItems: 1
+    description:
+      Specifies the base address and size of vMPM registers in RPM MSG RAM.
+
+  interrupts:
+    maxItems: 1
+    description:
+      Specify the IRQ used by RPM to wakeup APSS.
+
+  mboxes:
+    maxItems: 1
+    description:
+      Specify the mailbox used to notify RPM for writing vMPM registers.
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 2
+    description:
+      The first cell is the MPM pin number for the interrupt, and the second
+      is the trigger type.
+
+  qcom,mpm-pin-count:
+    description:
+      Specify the total MPM pin count that a SoC supports.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  qcom,mpm-pin-map:
+    description:
+      A set of MPM pin numbers and the corresponding GIC SPIs.
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description: MPM pin number
+        - description: GIC SPI number for the MPM pin
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - mboxes
+  - interrupt-controller
+  - '#interrupt-cells'
+  - qcom,mpm-pin-count
+  - qcom,mpm-pin-map
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    mpm: interrupt-controller@45f01b8 {
+        compatible = "qcom,mpm";
+        interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
+        reg = <0x45f01b8 0x1000>;
+        mboxes = <&apcs_glb 1>;
+        interrupt-controller;
+        #interrupt-cells = <2>;
+        interrupt-parent = <&intc>;
+        qcom,mpm-pin-count = <96>;
+        qcom,mpm-pin-map = <2 275>,
+                           <5 296>,
+                           <12 422>,
+                           <24 79>,
+                           <86 183>,
+                           <90 260>,
+                           <91 260>;
+    };
-- 
GitLab


From a6199bb514d8a63f61c2a22c1f912376e14d0fb2 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 8 Mar 2022 16:05:34 +0800
Subject: [PATCH 1274/1586] irqchip: Add Qualcomm MPM controller driver

Qualcomm SoCs based on the RPM architecture have a MSM Power Manager (MPM)
in always-on domain. In addition to managing resources during sleep, the
hardware also has an interrupt controller that monitors the interrupts
when the system is asleep, wakes up the APSS when one of these interrupts
occur and replays it to GIC after it becomes operational.

It adds an irqchip driver for this interrupt controller, and here are
some notes about it.

- For given SoC, a fixed number of MPM pins are supported, e.g. 96 pins
  on QCM2290.  Each of these MPM pins can be either a MPM_GIC pin or
  a MPM_GPIO pin. The mapping between MPM_GIC pin and GIC interrupt
  is defined by SoC, as well as the mapping between MPM_GPIO pin and
  GPIO number.  The former mapping is retrieved from device tree, while
  the latter is defined in TLMM pinctrl driver.

- The power domain (PD) .power_off hook is used to notify RPM that APSS
  is about to power collapse.  This requires MPM PD be the parent PD of
  CPU cluster.

- When SoC gets awake from sleep mode, the driver will receive an
  interrupt from RPM, so that it can replay interrupt for particular
  polarity.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220308080534.3384532-3-shawn.guo@linaro.org
---
 drivers/irqchip/Kconfig        |   8 +
 drivers/irqchip/Makefile       |   1 +
 drivers/irqchip/irq-qcom-mpm.c | 461 +++++++++++++++++++++++++++++++++
 3 files changed, 470 insertions(+)
 create mode 100644 drivers/irqchip/irq-qcom-mpm.c

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7038957f4a77a..680d2fcf26863 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -430,6 +430,14 @@ config QCOM_PDC
 	  Power Domain Controller driver to manage and configure wakeup
 	  IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
 
+config QCOM_MPM
+	tristate "QCOM MPM"
+	depends on ARCH_QCOM
+	select IRQ_DOMAIN_HIERARCHY
+	help
+	  MSM Power Manager driver to manage and configure wakeup
+	  IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
+
 config CSKY_MPINTC
 	bool
 	depends on CSKY
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c1f611cbfbf88..1f8990f812f1c 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_MESON_IRQ_GPIO)		+= irq-meson-gpio.o
 obj-$(CONFIG_GOLDFISH_PIC) 		+= irq-goldfish-pic.o
 obj-$(CONFIG_NDS32)			+= irq-ativic32.o
 obj-$(CONFIG_QCOM_PDC)			+= qcom-pdc.o
+obj-$(CONFIG_QCOM_MPM)			+= irq-qcom-mpm.o
 obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
 obj-$(CONFIG_RISCV_INTC)		+= irq-riscv-intc.o
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
new file mode 100644
index 0000000000000..eea5a753618c5
--- /dev/null
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Linaro Limited
+ * Copyright (c) 2010-2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/irq.h>
+#include <linux/spinlock.h>
+
+/*
+ * This is the driver for Qualcomm MPM (MSM Power Manager) interrupt controller,
+ * which is commonly found on Qualcomm SoCs built on the RPM architecture.
+ * Sitting in always-on domain, MPM monitors the wakeup interrupts when SoC is
+ * asleep, and wakes up the AP when one of those interrupts occurs.  This driver
+ * doesn't directly access physical MPM registers though.  Instead, the access
+ * is bridged via a piece of internal memory (SRAM) that is accessible to both
+ * AP and RPM.  This piece of memory is called 'vMPM' in the driver.
+ *
+ * When SoC is awake, the vMPM is owned by AP and the register setup by this
+ * driver all happens on vMPM.  When AP is about to get power collapsed, the
+ * driver sends a mailbox notification to RPM, which will take over the vMPM
+ * ownership and dump vMPM into physical MPM registers.  On wakeup, AP is woken
+ * up by a MPM pin/interrupt, and RPM will copy STATUS registers into vMPM.
+ * Then AP start owning vMPM again.
+ *
+ * vMPM register map:
+ *
+ *    31                              0
+ *    +--------------------------------+
+ *    |            TIMER0              | 0x00
+ *    +--------------------------------+
+ *    |            TIMER1              | 0x04
+ *    +--------------------------------+
+ *    |            ENABLE0             | 0x08
+ *    +--------------------------------+
+ *    |              ...               | ...
+ *    +--------------------------------+
+ *    |            ENABLEn             |
+ *    +--------------------------------+
+ *    |          FALLING_EDGE0         |
+ *    +--------------------------------+
+ *    |              ...               |
+ *    +--------------------------------+
+ *    |            STATUSn             |
+ *    +--------------------------------+
+ *
+ *    n = DIV_ROUND_UP(pin_cnt, 32)
+ *
+ */
+
+#define MPM_REG_ENABLE		0
+#define MPM_REG_FALLING_EDGE	1
+#define MPM_REG_RISING_EDGE	2
+#define MPM_REG_POLARITY	3
+#define MPM_REG_STATUS		4
+
+/* MPM pin map to GIC hwirq */
+struct mpm_gic_map {
+	int pin;
+	irq_hw_number_t hwirq;
+};
+
+struct qcom_mpm_priv {
+	void __iomem *base;
+	raw_spinlock_t lock;
+	struct mbox_client mbox_client;
+	struct mbox_chan *mbox_chan;
+	struct mpm_gic_map *maps;
+	unsigned int map_cnt;
+	unsigned int reg_stride;
+	struct irq_domain *domain;
+	struct generic_pm_domain genpd;
+};
+
+static u32 qcom_mpm_read(struct qcom_mpm_priv *priv, unsigned int reg,
+			 unsigned int index)
+{
+	unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+	return readl_relaxed(priv->base + offset);
+}
+
+static void qcom_mpm_write(struct qcom_mpm_priv *priv, unsigned int reg,
+			   unsigned int index, u32 val)
+{
+	unsigned int offset = (reg * priv->reg_stride + index + 2) * 4;
+
+	writel_relaxed(val, priv->base + offset);
+
+	/* Ensure the write is completed */
+	wmb();
+}
+
+static void qcom_mpm_enable_irq(struct irq_data *d, bool en)
+{
+	struct qcom_mpm_priv *priv = d->chip_data;
+	int pin = d->hwirq;
+	unsigned int index = pin / 32;
+	unsigned int shift = pin % 32;
+	unsigned long flags, val;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	val = qcom_mpm_read(priv, MPM_REG_ENABLE, index);
+	__assign_bit(shift, &val, en);
+	qcom_mpm_write(priv, MPM_REG_ENABLE, index, val);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void qcom_mpm_mask(struct irq_data *d)
+{
+	qcom_mpm_enable_irq(d, false);
+
+	if (d->parent_data)
+		irq_chip_mask_parent(d);
+}
+
+static void qcom_mpm_unmask(struct irq_data *d)
+{
+	qcom_mpm_enable_irq(d, true);
+
+	if (d->parent_data)
+		irq_chip_unmask_parent(d);
+}
+
+static void mpm_set_type(struct qcom_mpm_priv *priv, bool set, unsigned int reg,
+			 unsigned int index, unsigned int shift)
+{
+	unsigned long flags, val;
+
+	raw_spin_lock_irqsave(&priv->lock, flags);
+
+	val = qcom_mpm_read(priv, reg, index);
+	__assign_bit(shift, &val, set);
+	qcom_mpm_write(priv, reg, index, val);
+
+	raw_spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int qcom_mpm_set_type(struct irq_data *d, unsigned int type)
+{
+	struct qcom_mpm_priv *priv = d->chip_data;
+	int pin = d->hwirq;
+	unsigned int index = pin / 32;
+	unsigned int shift = pin % 32;
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		mpm_set_type(priv, true, MPM_REG_RISING_EDGE, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_RISING_EDGE, index, shift);
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		mpm_set_type(priv, true, MPM_REG_FALLING_EDGE, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_FALLING_EDGE, index, shift);
+
+	if (type & IRQ_TYPE_LEVEL_HIGH)
+		mpm_set_type(priv, true, MPM_REG_POLARITY, index, shift);
+	else
+		mpm_set_type(priv, false, MPM_REG_POLARITY, index, shift);
+
+	if (!d->parent_data)
+		return 0;
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+
+	return irq_chip_set_type_parent(d, type);
+}
+
+static struct irq_chip qcom_mpm_chip = {
+	.name			= "mpm",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= qcom_mpm_mask,
+	.irq_unmask		= qcom_mpm_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= qcom_mpm_set_type,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.flags			= IRQCHIP_MASK_ON_SUSPEND |
+				  IRQCHIP_SKIP_SET_WAKE,
+};
+
+static struct mpm_gic_map *get_mpm_gic_map(struct qcom_mpm_priv *priv, int pin)
+{
+	struct mpm_gic_map *maps = priv->maps;
+	int i;
+
+	for (i = 0; i < priv->map_cnt; i++) {
+		if (maps[i].pin == pin)
+			return &maps[i];
+	}
+
+	return NULL;
+}
+
+static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq,
+			  unsigned int nr_irqs, void *data)
+{
+	struct qcom_mpm_priv *priv = domain->host_data;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
+	struct mpm_gic_map *map;
+	irq_hw_number_t pin;
+	unsigned int type;
+	int  ret;
+
+	ret = irq_domain_translate_twocell(domain, fwspec, &pin, &type);
+	if (ret)
+		return ret;
+
+	ret = irq_domain_set_hwirq_and_chip(domain, virq, pin,
+					    &qcom_mpm_chip, priv);
+	if (ret)
+		return ret;
+
+	map = get_mpm_gic_map(priv, pin);
+	if (map == NULL)
+		return irq_domain_disconnect_hierarchy(domain->parent, virq);
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 3;
+	parent_fwspec.param[0] = 0;
+	parent_fwspec.param[1] = map->hwirq;
+	parent_fwspec.param[2] = type;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+					    &parent_fwspec);
+}
+
+static const struct irq_domain_ops qcom_mpm_ops = {
+	.alloc		= qcom_mpm_alloc,
+	.free		= irq_domain_free_irqs_common,
+	.translate	= irq_domain_translate_twocell,
+};
+
+/* Triggered by RPM when system resumes from deep sleep */
+static irqreturn_t qcom_mpm_handler(int irq, void *dev_id)
+{
+	struct qcom_mpm_priv *priv = dev_id;
+	unsigned long enable, pending;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+	int i, j;
+
+	for (i = 0; i < priv->reg_stride; i++) {
+		raw_spin_lock_irqsave(&priv->lock, flags);
+		enable = qcom_mpm_read(priv, MPM_REG_ENABLE, i);
+		pending = qcom_mpm_read(priv, MPM_REG_STATUS, i);
+		pending &= enable;
+		raw_spin_unlock_irqrestore(&priv->lock, flags);
+
+		for_each_set_bit(j, &pending, 32) {
+			unsigned int pin = 32 * i + j;
+			struct irq_desc *desc = irq_resolve_mapping(priv->domain, pin);
+			struct irq_data *d = &desc->irq_data;
+
+			if (!irqd_is_level_type(d))
+				irq_set_irqchip_state(d->irq,
+						IRQCHIP_STATE_PENDING, true);
+			ret = IRQ_HANDLED;
+		}
+	}
+
+	return ret;
+}
+
+static int mpm_pd_power_off(struct generic_pm_domain *genpd)
+{
+	struct qcom_mpm_priv *priv = container_of(genpd, struct qcom_mpm_priv,
+						  genpd);
+	int i, ret;
+
+	for (i = 0; i < priv->reg_stride; i++)
+		qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+
+	/* Notify RPM to write vMPM into HW */
+	ret = mbox_send_message(priv->mbox_chan, NULL);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static bool gic_hwirq_is_mapped(struct mpm_gic_map *maps, int cnt, u32 hwirq)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++)
+		if (maps[i].hwirq == hwirq)
+			return true;
+
+	return false;
+}
+
+static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
+{
+	struct platform_device *pdev = of_find_device_by_node(np);
+	struct device *dev = &pdev->dev;
+	struct irq_domain *parent_domain;
+	struct generic_pm_domain *genpd;
+	struct qcom_mpm_priv *priv;
+	unsigned int pin_cnt;
+	int i, irq;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "qcom,mpm-pin-count", &pin_cnt);
+	if (ret) {
+		dev_err(dev, "failed to read qcom,mpm-pin-count: %d\n", ret);
+		return ret;
+	}
+
+	priv->reg_stride = DIV_ROUND_UP(pin_cnt, 32);
+
+	ret = of_property_count_u32_elems(np, "qcom,mpm-pin-map");
+	if (ret < 0) {
+		dev_err(dev, "failed to read qcom,mpm-pin-map: %d\n", ret);
+		return ret;
+	}
+
+	if (ret % 2) {
+		dev_err(dev, "invalid qcom,mpm-pin-map\n");
+		return -EINVAL;
+	}
+
+	priv->map_cnt = ret / 2;
+	priv->maps = devm_kcalloc(dev, priv->map_cnt, sizeof(*priv->maps),
+				  GFP_KERNEL);
+	if (!priv->maps)
+		return -ENOMEM;
+
+	for (i = 0; i < priv->map_cnt; i++) {
+		u32 pin, hwirq;
+
+		of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2, &pin);
+		of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2 + 1, &hwirq);
+
+		if (gic_hwirq_is_mapped(priv->maps, i, hwirq)) {
+			dev_warn(dev, "failed to map pin %d as GIC hwirq %d is already mapped\n",
+				 pin, hwirq);
+			continue;
+		}
+
+		priv->maps[i].pin = pin;
+		priv->maps[i].hwirq = hwirq;
+	}
+
+	raw_spin_lock_init(&priv->lock);
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (!priv->base)
+		return PTR_ERR(priv->base);
+
+	for (i = 0; i < priv->reg_stride; i++) {
+		qcom_mpm_write(priv, MPM_REG_ENABLE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_FALLING_EDGE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_RISING_EDGE, i, 0);
+		qcom_mpm_write(priv, MPM_REG_POLARITY, i, 0);
+		qcom_mpm_write(priv, MPM_REG_STATUS, i, 0);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	genpd = &priv->genpd;
+	genpd->flags = GENPD_FLAG_IRQ_SAFE;
+	genpd->power_off = mpm_pd_power_off;
+
+	genpd->name = devm_kasprintf(dev, GFP_KERNEL, "%s", dev_name(dev));
+	if (!genpd->name)
+		return -ENOMEM;
+
+	ret = pm_genpd_init(genpd, NULL, false);
+	if (ret) {
+		dev_err(dev, "failed to init genpd: %d\n", ret);
+		return ret;
+	}
+
+	ret = of_genpd_add_provider_simple(np, genpd);
+	if (ret) {
+		dev_err(dev, "failed to add genpd provider: %d\n", ret);
+		goto remove_genpd;
+	}
+
+	priv->mbox_client.dev = dev;
+	priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0);
+	if (IS_ERR(priv->mbox_chan)) {
+		ret = PTR_ERR(priv->mbox_chan);
+		dev_err(dev, "failed to acquire IPC channel: %d\n", ret);
+		return ret;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		dev_err(dev, "failed to find MPM parent domain\n");
+		ret = -ENXIO;
+		goto free_mbox;
+	}
+
+	priv->domain = irq_domain_create_hierarchy(parent_domain,
+				IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP, pin_cnt,
+				of_node_to_fwnode(np), &qcom_mpm_ops, priv);
+	if (!priv->domain) {
+		dev_err(dev, "failed to create MPM domain\n");
+		ret = -ENOMEM;
+		goto free_mbox;
+	}
+
+	irq_domain_update_bus_token(priv->domain, DOMAIN_BUS_WAKEUP);
+
+	ret = devm_request_irq(dev, irq, qcom_mpm_handler, IRQF_NO_SUSPEND,
+			       "qcom_mpm", priv);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", ret);
+		goto remove_domain;
+	}
+
+	return 0;
+
+remove_domain:
+	irq_domain_remove(priv->domain);
+free_mbox:
+	mbox_free_channel(priv->mbox_chan);
+remove_genpd:
+	pm_genpd_remove(genpd);
+	return ret;
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_mpm)
+IRQCHIP_MATCH("qcom,mpm", qcom_mpm_init)
+IRQCHIP_PLATFORM_DRIVER_END(qcom_mpm)
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. MSM Power Manager");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 6f22931bbf7546d7a4bac35830b2918b4642b0e3 Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16@zte.com.cn>
Date: Thu, 10 Mar 2022 23:10:11 +0800
Subject: [PATCH 1275/1586] Add Chinese translation for vm/ksm.rst

Translate Documentation/vm/ksm.rst into Chinese.
Update Documentation/translations/zh_CN/vm/index.rst.

Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/ceb82d6458cd79bc3b7060199db0c3518adc3b8b.1646899056.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/vm/index.rst |  1 +
 Documentation/translations/zh_CN/vm/ksm.rst   | 70 +++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/vm/ksm.rst

diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst
index 2f9834eb9475c..a1d2f0356cc1b 100644
--- a/Documentation/translations/zh_CN/vm/index.rst
+++ b/Documentation/translations/zh_CN/vm/index.rst
@@ -26,6 +26,7 @@ TODO:待引用文档集被翻译完毕后请及时修改此处）
    damon/index
    free_page_reporting
    highmem
+   ksm
 
 TODOLIST:
 * arch_pgtable_helpers
diff --git a/Documentation/translations/zh_CN/vm/ksm.rst b/Documentation/translations/zh_CN/vm/ksm.rst
new file mode 100644
index 0000000000000..83b0c73984da1
--- /dev/null
+++ b/Documentation/translations/zh_CN/vm/ksm.rst
@@ -0,0 +1,70 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/vm/ksm.rst
+
+:翻译:
+
+   徐鑫 xu xin <xu.xin16@zte.com.cn>
+
+============
+内核同页合并
+============
+
+KSM 是一种节省内存的数据去重功能，由CONFIG_KSM=y启用，并在2.6.32版本时被添加
+到Linux内核。详见 ``mm/ksm.c`` 的实现，以及http://lwn.net/Articles/306704和
+https://lwn.net/Articles/330589
+
+KSM的用户空间的接口在Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
+文档中有描述。
+
+设计
+====
+
+概述
+----
+
+概述内容请见mm/ksm.c文档中的“DOC: Overview”
+
+逆映射
+------
+KSM维护着稳定树中的KSM页的逆映射信息。
+
+当KSM页面的共享数小于 ``max_page_sharing`` 的虚拟内存区域(VMAs)时，则代表了
+KSM页的稳定树其中的节点指向了一个rmap_item结构体类型的列表。同时，这个KSM页
+的 ``page->mapping`` 指向了该稳定树节点。
+
+如果共享数超过了阈值，KSM将给稳定树添加第二个维度。稳定树就变成链接一个或多
+个稳定树"副本"的"链"。每个副本都保留KSM页的逆映射信息，其中 ``page->mapping``
+指向该"副本"。
+
+每个链以及链接到该链中的所有"副本"强制不变的是，它们代表了相同的写保护内存
+内容，尽管任中一个"副本"是由同一片内存区的不同的KSM复制页所指向的。
+
+这样一来，相比与无限的逆映射链表，稳定树的查找计算复杂性不受影响。但在稳定树
+本身中不能有重复的KSM页面内容仍然是强制要求。
+
+由 ``max_page_sharing`` 强制决定的数据去重限制是必要的，以此来避免虚拟内存
+rmap链表变得过大。rmap的遍历具有O(N)的复杂度，其中N是共享页面的rmap_项（即
+虚拟映射）的数量，而这个共享页面的节点数量又被 ``max_page_sharing`` 所限制。
+因此，这有效地将线性O(N)计算复杂度从rmap遍历中分散到不同的KSM页面上。ksmd进
+程在稳定节点"链"上的遍历也是O(N)，但这个N是稳定树"副本"的数量，而不是rmap项
+的数量，因此它对ksmd性能没有显著影响。实际上，最佳稳定树"副本"的候选节点将
+保留在"副本"列表的开头。
+
+``max_page_sharing`` 的值设置得高了会促使更快的内存合并（因为将有更少的稳定
+树副本排队进入稳定节点chain->hlist）和更高的数据去重系数，但代价是在交换、压
+缩、NUMA平衡和页面迁移过程中可能导致KSM页的最大rmap遍历速度较慢。
+
+``stable_node_dups/stable_node_chains`` 的比值还受 ``max_page_sharing`` 调控
+的影响，高比值可能意味着稳定节点dup中存在碎片，这可以通过在ksmd中引入碎片算
+法来解决，该算法将rmap项从一个稳定节点dup重定位到另一个稳定节点dup，以便释放
+那些仅包含极少rmap项的稳定节点"dup"，但这可能会增加ksmd进程的CPU使用率，并可
+能会减慢应用程序在KSM页面上的只读计算。
+
+KSM会定期扫描稳定节点"链"中链接的所有稳定树"副本"，以便删减过时了的稳定节点。
+这种扫描的频率由 ``stable_node_chains_prune_millisecs`` 这个sysfs 接口定义。
+
+参考
+====
+内核代码请见mm/ksm.c。
+涉及的函数(mm_slot  ksm_scan  stable_node  rmap_item)。
-- 
GitLab


From f0abc76d7b8c2e1229bb5fee6f6bfffc01033b18 Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16@zte.com.cn>
Date: Thu, 10 Mar 2022 23:10:12 +0800
Subject: [PATCH 1276/1586] zh_CN: Add translations for admin-guide/mm/ksm.rst

Translate Documentation/admin-guide/mm/ksm.rst into Chinese.

Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/f987a3a2cbffaad64f6e2377a5e393d9afbb099c.1646899056.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/admin-guide/mm/ksm.rst | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/ksm.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
new file mode 100644
index 0000000000000..4829156ef1aed
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst
@@ -0,0 +1,148 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/ksm.rst
+
+:翻译:
+
+  徐鑫 xu xin <xu.xin16@zte.com.cn>
+
+
+============
+内核同页合并
+============
+
+
+概述
+====
+
+KSM是一种能节省内存的数据去重功能，由CONFIG_KSM=y启用，并在2.6.32版本时被添
+加到Linux内核。详见 ``mm/ksm.c`` 的实现，以及http://lwn.net/Articles/306704
+和https://lwn.net/Articles/330589
+
+KSM最初目的是为了与KVM（即著名的内核共享内存）一起使用而开发的，通过共享虚拟机
+之间的公共数据，将更多虚拟机放入物理内存。但它对于任何会生成多个相同数据实例的
+应用程序都是很有用的。
+
+KSM的守护进程ksmd会定期扫描那些已注册的用户内存区域，查找内容相同的页面，这些
+页面可以被单个写保护页面替换（如果进程以后想要更新其内容，将自动复制）。使用：
+引用:`sysfs intraface  <ksm_sysfs>` 接口来配置KSM守护程序在单个过程中所扫描的页
+数以及两个过程之间的间隔时间。
+
+KSM只合并匿名（私有）页面，从不合并页缓存（文件）页面。KSM的合并页面最初只能被
+锁定在内核内存中，但现在可以就像其他用户页面一样被换出（但当它们被交换回来时共
+享会被破坏: ksmd必须重新发现它们的身份并再次合并）。
+
+以madvise控制KSM
+================
+
+KSM仅在特定的地址空间区域时运行，即应用程序通过使用如下所示的madvise(2)系统调
+用来请求某块地址成为可能的合并候选者的地址空间::
+
+    int madvise(addr, length, MADV_MERGEABLE)
+
+应用程序当然也可以通过调用::
+
+    int madvise(addr, length, MADV_UNMERGEABLE)
+
+来取消该请求，并恢复为非共享页面：此时KSM将去除合并在该范围内的任何合并页。注意：
+这个去除合并的调用可能突然需要的内存量超过实际可用的内存量-那么可能会出现EAGAIN
+失败，但更可能会唤醒OOM killer。
+
+如果KSM未被配置到正在运行的内核中，则madvise MADV_MERGEABLE 和 MADV_UNMERGEABLE
+的调用只会以EINVAL 失败。如果正在运行的内核是用CONFIG_KSM=y方式构建的，那么这些
+调用通常会成功：即使KSM守护程序当前没有运行，MADV_MERGEABLE 仍然会在KSM守护程序
+启动时注册范围，即使该范围不能包含KSM实际可以合并的任何页面，即使MADV_UNMERGEABLE
+应用于从未标记为MADV_MERGEABLE的范围。
+
+如果一块内存区域必须被拆分为至少一个新的MADV_MERGEABLE区域或MADV_UNMERGEABLE区域，
+当该进程将超过 ``vm.max_map_count`` 的设定，则madvise可能返回ENOMEM。（请参阅文档
+Documentation/admin-guide/sysctl/vm.rst）。
+
+与其他madvise调用一样，它们在用户地址空间的映射区域上使用：如果指定的范围包含未
+映射的间隙（尽管在中间的映射区域工作），它们将报告ENOMEM，如果没有足够的内存用于
+内部结构，则可能会因EAGAIN而失败。
+
+KSM守护进程sysfs接口
+====================
+
+KSM守护进程可以由``/sys/kernel/mm/ksm/`` 中的sysfs文件控制，所有人都可以读取，但
+只能由root用户写入。各接口解释如下：
+
+
+pages_to_scan
+        ksmd进程进入睡眠前要扫描的页数。
+        例如， ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``
+
+        默认值：100（该值被选择用于演示目的）
+
+sleep_millisecs
+        ksmd在下次扫描前应休眠多少毫秒
+        例如， ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs``
+
+        默认值：20（该值被选择用于演示目的）
+
+merge_across_nodes
+        指定是否可以合并来自不同NUMA节点的页面。当设置为0时，ksm仅合并在物理上位
+        于同一NUMA节点的内存区域中的页面。这降低了访问共享页面的延迟。在有明显的
+        NUMA距离上，具有更多节点的系统可能受益于设置该值为0时的更低延迟。而对于
+        需要对内存使用量最小化的较小系统来说，设置该值为1（默认设置）则可能会受
+        益于更大共享页面。在决定使用哪种设置之前，您可能希望比较系统在每种设置下
+        的性能。 ``merge_across_nodes`` 仅当系统中没有ksm共享页面时，才能被更改设
+        置：首先将接口`run` 设置为2从而对页进行去合并，然后在修改
+        ``merge_across_nodes`` 后再将‘run’又设置为1，以根据新设置来重新合并。
+
+        默认值：1（如早期的发布版本一样合并跨站点）
+
+run
+        * 设置为0可停止ksmd运行，但保留合并页面，
+        * 设置为1可运行ksmd，例如， ``echo 1 > /sys/kernel/mm/ksm/run`` ，
+        * 设置为2可停止ksmd运行，并且对所有目前已合并的页进行去合并，但保留可合并
+          区域以供下次运行。
+
+        默认值：0（必须设置为1才能激活KSM，除非禁用了CONFIG_SYSFS）
+
+use_zero_pages
+        指定是否应当特殊处理空页（即那些仅含zero的已分配页）。当该值设置为1时，
+        空页与内核零页合并，而不是像通常情况下那样空页自身彼此合并。这可以根据
+        工作负载的不同，在具有着色零页的架构上可以提高性能。启用此设置时应小心，
+        因为它可能会降低某些工作负载的KSM性能，比如，当待合并的候选页面的校验和
+        与空页面的校验和恰好匹配的时候。此设置可随时更改，仅对那些更改后再合并
+        的页面有效。
+
+        默认值：0（如同早期版本的KSM正常表现）
+
+max_page_sharing
+        单个KSM页面允许的最大共享站点数。这将强制执行重复数据消除限制，以避免涉
+        及遍历共享KSM页面的虚拟映射的虚拟内存操作的高延迟。最小值为2，因为新创
+        建的KSM页面将至少有两个共享者。该值越高，KSM合并内存的速度越快，去重
+        因子也越高，但是对于任何给定的KSM页面，虚拟映射的最坏情况遍历的速度也会
+        越慢。减慢了这种遍历速度就意味着在交换、压缩、NUMA平衡和页面迁移期间，
+        某些虚拟内存操作将有更高的延迟，从而降低这些虚拟内存操作调用者的响应能力。
+        其他任务如果不涉及执行虚拟映射遍历的VM操作，其任务调度延迟不受此参数的影
+        响，因为这些遍历本身是调度友好的。
+
+stable_node_chains_prune_millisecs
+        指定KSM检查特定页面的元数据的频率（即那些达到过时信息数据去重限制标准的
+        页面）单位是毫秒。较小的毫秒值将以更低的延迟来释放KSM元数据，但它们将使
+        ksmd在扫描期间使用更多CPU。如果还没有一个KSM页面达到 ``max_page_sharing``
+        标准，那就没有什么用。
+
+KSM与MADV_MERGEABLE的工作有效性体现于 ``/sys/kernel/mm/ksm/`` 路径下的接口：
+
+pages_shared
+        表示多少共享页正在被使用
+pages_sharing
+        表示还有多少站点正在共享这些共享页，即节省了多少
+pages_unshared
+        表示有多少页是唯一的，但被反复检查以进行合并
+pages_volatile
+        表示有多少页因变化太快而无法放在tree中
+full_scans
+        表示所有可合并区域已扫描多少次
+stable_node_chains
+        达到 ``max_page_sharing`` 限制的KSM页数
+stable_node_dups
+        重复的KSM页数
+
+比值 ``pages_sharing/pages_shared`` 的最大值受限制于 ``max_page_sharing``
+的设定。要想增加该比值，则相应地要增加 ``max_page_sharing`` 的值。
-- 
GitLab


From 72e1bfd1b8e9d7648ffea85eeefbc1d784182acb Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16@zte.com.cn>
Date: Thu, 10 Mar 2022 23:10:13 +0800
Subject: [PATCH 1277/1586] zh_CN: Add translation for admin-guide/mm/index.rst

Translate Documentation/admin-guide/mm/index.rst into Chinese.
Update Documentation/admin-guide/index.rst.

Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/2d695dac05efc012b99fbc7525be65a421c7de03.1646899056.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/admin-guide/index.rst  |  2 +-
 .../zh_CN/admin-guide/mm/index.rst            | 49 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/index.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index 548e57f4b3f15..763055f161ddc 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -69,6 +69,7 @@ Todolist:
    lockup-watchdogs
    unicode
    sysrq
+   mm/index
 
 Todolist:
 
@@ -105,7 +106,6 @@ Todolist:
    LSM/index
    md
    media/index
-   mm/index
    module-signing
    mono
    namespaces/index
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
new file mode 100644
index 0000000000000..44f66a45859dc
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
@@ -0,0 +1,49 @@
+.. include:: ../../disclaimer-zh_CN.rst
+
+:Original:   Documentation/admin-guide/mm/index.rst
+
+:翻译:
+
+  徐鑫 xu xin <xu.xin16@zte.com.cn>
+
+
+========
+内存管理
+========
+
+Linux内存管理子系统，顾名思义，是负责系统中的内存管理。它包括了虚拟内存与请求
+分页的实现，内核内部结构和用户空间程序的内存分配、将文件映射到进程地址空间以
+及许多其他很酷的事情。
+
+Linux内存管理是一个具有许多可配置设置的复杂系统, 且这些设置中的大多数都可以通
+过 ``/proc`` 文件系统获得，并且可以使用 ``sysctl`` 进行查询和调整。这些API接
+口被描述在Documentation/admin-guide/sysctl/vm.rst文件和 `man 5 proc`_ 中。
+
+.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
+
+Linux内存管理有它自己的术语，如果你还不熟悉它，请考虑阅读下面参考：
+:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+
+在此目录下，我们详细描述了如何与Linux内存管理中的各种机制交互。
+
+.. toctree::
+   :maxdepth: 1
+
+   ksm
+
+Todolist:
+* concepts
+* cma_debugfs
+* damon/index
+* hugetlbpage
+* idle_page_tracking
+* memory-hotplug
+* nommu-mmap
+* numa_memory_policy
+* numaperf
+* pagemap
+* soft-dirty
+* swap_numa
+* transhuge
+* userfaultfd
+* zswap
-- 
GitLab


From bad0eb76dccade4f5a601944986b3e647b7cd443 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 10 Mar 2022 23:20:48 +0800
Subject: [PATCH 1278/1586] docs/zh_CN: Refactoring the admin-guide directory
 index

The Todolist in the html document looks a mess, now give it a nice looking format.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/d410408ec13d6e9cff97da50a13d793a428e05cf.1646899089.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/admin-guide/index.rst  | 122 +++++++++---------
 1 file changed, 61 insertions(+), 61 deletions(-)

diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index 763055f161ddc..be535ffaf4b0b 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -20,15 +20,15 @@ Linux 内核用户和管理员指南
 
 Todolist:
 
-   kernel-parameters
-   devices
-   sysctl/index
+*   kernel-parameters
+*   devices
+*   sysctl/index
 
 本节介绍CPU漏洞及其缓解措施。
 
 Todolist:
 
-   hw-vuln/index
+*   hw-vuln/index
 
 下面的一组文档，针对的是试图跟踪问题和bug的用户。
 
@@ -44,18 +44,18 @@ Todolist:
 
 Todolist:
 
-   reporting-bugs
-   ramoops
-   dynamic-debug-howto
-   kdump/index
-   perf/index
+*   reporting-bugs
+*   ramoops
+*   dynamic-debug-howto
+*   kdump/index
+*   perf/index
 
 这是应用程序开发人员感兴趣的章节的开始。可以在这里找到涵盖内核ABI各个
 方面的文档。
 
 Todolist:
 
-   sysfs-rules
+*   sysfs-rules
 
 本手册的其余部分包括各种指南，介绍如何根据您的喜好配置内核的特定行为。
 
@@ -73,57 +73,57 @@ Todolist:
 
 Todolist:
 
-   acpi/index
-   aoe/index
-   auxdisplay/index
-   bcache
-   binderfs
-   binfmt-misc
-   blockdev/index
-   bootconfig
-   braille-console
-   btmrvl
-   cgroup-v1/index
-   cgroup-v2
-   cifs/index
-   dell_rbu
-   device-mapper/index
-   edid
-   efi-stub
-   ext4
-   nfs/index
-   gpio/index
-   highuid
-   hw_random
-   initrd
-   iostats
-   java
-   jfs
-   kernel-per-CPU-kthreads
-   laptops/index
-   lcd-panel-cgram
-   ldm
-   LSM/index
-   md
-   media/index
-   module-signing
-   mono
-   namespaces/index
-   numastat
-   parport
-   perf-security
-   pm/index
-   pnp
-   rapidio
-   ras
-   rtc
-   serial-console
-   svga
-   thunderbolt
-   ufs
-   vga-softcursor
-   video-output
-   xfs
+*   acpi/index
+*   aoe/index
+*   auxdisplay/index
+*   bcache
+*   binderfs
+*   binfmt-misc
+*   blockdev/index
+*   bootconfig
+*   braille-console
+*   btmrvl
+*   cgroup-v1/index
+*   cgroup-v2
+*   cifs/index
+*   dell_rbu
+*   device-mapper/index
+*   edid
+*   efi-stub
+*   ext4
+*   nfs/index
+*   gpio/index
+*   highuid
+*   hw_random
+*   initrd
+*   iostats
+*   java
+*   jfs
+*   kernel-per-CPU-kthreads
+*   laptops/index
+*   lcd-panel-cgram
+*   ldm
+*   LSM/index
+*   md
+*   media/index
+*   module-signing
+*   mono
+*   namespaces/index
+*   numastat
+*   parport
+*   perf-security
+*   pm/index
+*   pnp
+*   rapidio
+*   ras
+*   rtc
+*   serial-console
+*   svga
+*   thunderbolt
+*   ufs
+*   vga-softcursor
+*   video-output
+*   xfs
 
 .. only::  subproject and html
 
-- 
GitLab


From 496cc140279b4517a23f4534e468ec9c66283f4b Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 10 Mar 2022 23:20:49 +0800
Subject: [PATCH 1279/1586] docs/zh_CN: add admin-guide damon index translation

Translate .../admin-guide/mm/damon/index.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/0251f09dc926972068329b87b0563dd432849497.1646899089.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/admin-guide/mm/damon/index.rst      | 26 +++++++++++++++++++
 .../zh_CN/admin-guide/mm/index.rst            |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
new file mode 100644
index 0000000000000..4581e1bc25062
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/index.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+============
+监测数据访问
+============
+
+:doc:`DAMON </vm/damon/index>` 允许轻量级的数据访问监测。使用DAMON，
+用户可以分析他们系统的内存访问模式，并优化它们。
+
+.. toctree::
+   :maxdepth: 2
+
+Todolist:
+
+*   start
+*   usage
+*   reclaim
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
index 44f66a45859dc..702271c5b6839 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/index.rst
@@ -29,12 +29,12 @@ Linux内存管理有它自己的术语，如果你还不熟悉它，请考虑阅
 .. toctree::
    :maxdepth: 1
 
+   damon/index
    ksm
 
 Todolist:
 * concepts
 * cma_debugfs
-* damon/index
 * hugetlbpage
 * idle_page_tracking
 * memory-hotplug
-- 
GitLab


From 94b140b085972eccd557f8cf5e4944e5c1486418 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 10 Mar 2022 23:20:50 +0800
Subject: [PATCH 1280/1586] docs/zh_CN: add admin-guide damon start translation

Translate Documentation/admin-guide/mm/damon/start.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/e6e328be018cbf5f9105adfdad56c951acbb8c8f.1646899089.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/admin-guide/mm/damon/index.rst      |   4 +-
 .../zh_CN/admin-guide/mm/damon/start.rst      | 132 ++++++++++++++++++
 2 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
index 4581e1bc25062..4aa122f375e3f 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -19,8 +19,10 @@
 .. toctree::
    :maxdepth: 2
 
+   start
+
 Todolist:
 
-*   start
+
 *   usage
 *   reclaim
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
new file mode 100644
index 0000000000000..67d1b49481dc9
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/start.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+========
+入门指南
+========
+
+本文通过演示DAMON的默认用户空间工具，简要地介绍了如何使用DAMON。请注意，为了简洁
+起见，本文档只描述了它的部分功能。更多细节请参考该工具的使用文档。
+`doc <https://github.com/awslabs/damo/blob/next/USAGE.md>`_ .
+
+
+前提条件
+========
+
+内核
+----
+
+首先，你要确保你当前系统中跑的内核构建时选定了这个功能选项 ``CONFIG_DAMON_*=y``.
+
+
+用户空间工具
+------------
+
+在演示中，我们将使用DAMON的默认用户空间工具，称为DAMON Operator（DAMO）。它可以在
+https://github.com/awslabs/damo找到。下面的例子假设DAMO在你的$PATH上。当然，但
+这并不是强制性的。
+
+因为DAMO使用的是DAMON的debugfs接口(详情请参考 :doc:`usage` 中的使用方法) 你应该
+确保debugfs被挂载。手动挂载它，如下所示::
+
+    # mount -t debugfs none /sys/kernel/debug/
+
+或者在你的 ``/etc/fstab`` 文件中添加以下一行，这样你的系统就可以在启动时自动挂载
+debugfs了::
+
+    debugfs /sys/kernel/debug debugfs defaults 0 0
+
+
+记录数据访问模式
+================
+
+下面的命令记录了一个程序的内存访问模式，并将监测结果保存到文件中。 ::
+
+    $ git clone https://github.com/sjp38/masim
+    $ cd masim; make; ./masim ./configs/zigzag.cfg &
+    $ sudo damo record -o damon.data $(pidof masim)
+
+命令的前两行下载了一个人工内存访问生成器程序并在后台运行。生成器将重复地逐一访问两个
+100 MiB大小的内存区域。你可以用你的真实工作负载来代替它。最后一行要求 ``damo`` 将
+访问模式记录在 ``damon.data`` 文件中。
+
+
+将记录的模式可视化
+==================
+
+你可以在heatmap中直观地看到这种模式，显示哪个内存区域（X轴）何时被访问（Y轴）以及访
+问的频率（数字）。::
+
+    $ sudo damo report heats --heatmap stdout
+    22222222222222222222222222222222222222211111111111111111111111111111111111111100
+    44444444444444444444444444444444444444434444444444444444444444444444444444443200
+    44444444444444444444444444444444444444433444444444444444444444444444444444444200
+    33333333333333333333333333333333333333344555555555555555555555555555555555555200
+    33333333333333333333333333333333333344444444444444444444444444444444444444444200
+    22222222222222222222222222222222222223355555555555555555555555555555555555555200
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    33333333333333333333333333333333333333355555555555555555555555555555555555555200
+    88888888888888888888888888888888888888600000000000000000000000000000000000000000
+    88888888888888888888888888888888888888600000000000000000000000000000000000000000
+    33333333333333333333333333333333333333444444444444444444444444444444444444443200
+    00000000000000000000000000000000000000288888888888888888888888888888888888888400
+    [...]
+    # access_frequency:  0  1  2  3  4  5  6  7  8  9
+    # x-axis: space (139728247021568-139728453431248: 196.848 MiB)
+    # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s)
+    # resolution: 80x40 (2.461 MiB and 1.758 s for each character)
+
+你也可以直观地看到工作集的大小分布，按大小排序。::
+
+    $ sudo damo report wss --range 0 101 10
+    # <percentile> <wss>
+    # target_id     18446632103789443072
+    # avr:  107.708 MiB
+      0             0 B |                                                           |
+     10      95.328 MiB |****************************                               |
+     20      95.332 MiB |****************************                               |
+     30      95.340 MiB |****************************                               |
+     40      95.387 MiB |****************************                               |
+     50      95.387 MiB |****************************                               |
+     60      95.398 MiB |****************************                               |
+     70      95.398 MiB |****************************                               |
+     80      95.504 MiB |****************************                               |
+     90     190.703 MiB |*********************************************************  |
+    100     196.875 MiB |***********************************************************|
+
+在上述命令中使用 ``--sortby`` 选项，可以显示工作集的大小是如何按时间顺序变化的。::
+
+    $ sudo damo report wss --range 0 101 10 --sortby time
+    # <percentile> <wss>
+    # target_id     18446632103789443072
+    # avr:  107.708 MiB
+      0       3.051 MiB |                                                           |
+     10     190.703 MiB |***********************************************************|
+     20      95.336 MiB |*****************************                              |
+     30      95.328 MiB |*****************************                              |
+     40      95.387 MiB |*****************************                              |
+     50      95.332 MiB |*****************************                              |
+     60      95.320 MiB |*****************************                              |
+     70      95.398 MiB |*****************************                              |
+     80      95.398 MiB |*****************************                              |
+     90      95.340 MiB |*****************************                              |
+    100      95.398 MiB |*****************************                              |
+
+
+数据访问模式感知的内存管理
+==========================
+
+以下三个命令使每一个大小>=4K的内存区域在你的工作负载中没有被访问>=60秒，就会被换掉。 ::
+
+    $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme
+    $ echo "4K        max      0       0       60s     max     pageout" >> test_scheme
+    $ damo schemes -c test_scheme <pid of your workload>
-- 
GitLab


From 93b51a1019471839fbd528e39550cccbe000070b Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 10 Mar 2022 23:20:51 +0800
Subject: [PATCH 1281/1586] docs/zh_CN: add damon usage translation

Translate .../admin-guide/mm/damon/usage.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/431f1c2a158c61a6556f58048cb54961ab7a8790.1646899089.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/admin-guide/mm/damon/index.rst      |   2 +-
 .../zh_CN/admin-guide/mm/damon/usage.rst      | 286 ++++++++++++++++++
 2 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
index 4aa122f375e3f..bd1f1a551f0fa 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -20,9 +20,9 @@
    :maxdepth: 2
 
    start
+   usage
 
 Todolist:
 
 
-*   usage
 *   reclaim
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
new file mode 100644
index 0000000000000..5d75333472163
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/usage.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+========
+详细用法
+========
+
+DAMON 为不同的用户提供了下面三种接口。
+
+- *DAMON用户空间工具。*
+  `这 <https://github.com/awslabs/damo>`_ 为有这特权的人， 如系统管理员，希望有一个刚好
+  可以工作的人性化界面。
+  使用它，用户可以以人性化的方式使用DAMON的主要功能。不过，它可能不会为特殊情况进行高度调整。
+  它同时支持虚拟和物理地址空间的监测。更多细节，请参考它的 `使用文档
+  <https://github.com/awslabs/damo/blob/next/USAGE.md>`_。
+- *debugfs接口。*
+  :ref:`这 <debugfs_interface>` 是为那些希望更高级的使用DAMON的特权用户空间程序员准备的。
+  使用它，用户可以通过读取和写入特殊的debugfs文件来使用DAMON的主要功能。因此，你可以编写和使
+  用你个性化的DAMON debugfs包装程序，代替你读/写debugfs文件。  `DAMON用户空间工具
+  <https://github.com/awslabs/damo>`_ 就是这种程序的一个例子  它同时支持虚拟和物理地址
+  空间的监测。注意，这个界面只提供简单的监测结果 :ref:`统计 <damos_stats>`。对于详细的监测
+  结果，DAMON提供了一个:ref:`跟踪点 <tracepoint>`。
+
+- *内核空间编程接口。*
+  :doc:`This </vm/damon/api>` 这是为内核空间程序员准备的。使用它，用户可以通过为你编写内
+  核空间的DAMON应用程序，最灵活有效地利用DAMON的每一个功能。你甚至可以为各种地址空间扩展DAMON。
+  详细情况请参考接口 :doc:`文件 </vm/damon/api>`。
+
+
+debugfs接口
+===========
+
+DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
+``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和
+``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
+
+
+属性
+----
+
+用户可以通过读取和写入 ``attrs`` 文件获得和设置 ``采样间隔`` 、 ``聚集间隔`` 、 ``区域更新间隔``
+以及监测目标区域的最小/最大数量。要详细了解监测属性，请参考 `:doc:/vm/damon/design` 。例如，
+下面的命令将这些值设置为5ms、100ms、1000ms、10和1000，然后再次检查::
+
+    # cd <debugfs>/damon
+    # echo 5000 100000 1000000 10 1000 > attrs
+    # cat attrs
+    5000 100000 1000000 10 1000
+
+
+目标ID
+------
+
+一些类型的地址空间支持多个监测目标。例如，虚拟内存地址空间的监测可以有多个进程作为监测目标。用户
+可以通过写入目标的相关id值来设置目标，并通过读取 ``target_ids`` 文件来获得当前目标的id。在监
+测虚拟地址空间的情况下，这些值应该是监测目标进程的pid。例如，下面的命令将pid为42和4242的进程设
+为监测目标，并再次检查::
+
+    # cd <debugfs>/damon
+    # echo 42 4242 > target_ids
+    # cat target_ids
+    42 4242
+
+用户还可以通过在文件中写入一个特殊的关键字 "paddr\n" 来监测系统的物理内存地址空间。因为物理地
+址空间监测不支持多个目标，读取文件会显示一个假值，即 ``42`` ，如下图所示::
+
+    # cd <debugfs>/damon
+    # echo paddr > target_ids
+    # cat target_ids
+    42
+
+请注意，设置目标ID并不启动监测。
+
+
+初始监测目标区域
+----------------
+
+在虚拟地址空间监测的情况下，DAMON自动设置和更新监测的目标区域，这样就可以覆盖目标进程的整个
+内存映射。然而，用户可能希望将监测区域限制在特定的地址范围内，如堆、栈或特定的文件映射区域。
+或者，一些用户可以知道他们工作负载的初始访问模式，因此希望为“自适应区域调整”设置最佳初始区域。
+
+相比之下，DAMON在物理内存监测的情况下不会自动设置和更新监测目标区域。因此，用户应该自己设置
+监测目标区域。
+
+在这种情况下，用户可以通过在 ``init_regions`` 文件中写入适当的值，明确地设置他们想要的初
+始监测目标区域。输入的每一行应代表一个区域，形式如下::
+
+    <target idx> <start address> <end address>
+
+目标idx应该是 ``target_ids`` 文件中目标的索引，从 ``0`` 开始，区域应该按照地址顺序传递。
+例如，下面的命令将设置几个地址范围， ``1-100`` 和 ``100-200`` 作为pid 42的初始监测目标
+区域，这是 ``target_ids`` 中的第一个（索引 ``0`` ），另外几个地址范围， ``20-40`` 和
+``50-100`` 作为pid 4242的地址，这是 ``target_ids`` 中的第二个（索引 ``1`` ）::
+
+    # cd <debugfs>/damon
+    # cat target_ids
+    42 4242
+    # echo "0   1       100
+            0   100     200
+            1   20      40
+            1   50      100" > init_regions
+
+请注意，这只是设置了初始的监测目标区域。在虚拟内存监测的情况下，DAMON会在一个 ``区域更新间隔``
+后自动更新区域的边界。因此，在这种情况下，如果用户不希望更新的话，应该把 ``区域的更新间隔`` 设
+置得足够大。
+
+
+方案
+----
+
+对于通常的基于DAMON的数据访问感知的内存管理优化，用户只是希望系统对特定访问模式的内存区域应用内
+存管理操作。DAMON从用户那里接收这种形式化的操作方案，并将这些方案应用到目标进程中。
+
+用户可以通过读取和写入 ``scheme`` debugfs文件来获得和设置这些方案。读取该文件还可以显示每个
+方案的统计数据。在文件中，每一个方案都应该在每一行中以下列形式表示出来::
+
+    <target access pattern> <action> <quota> <watermarks>
+
+你可以通过简单地在文件中写入一个空字符串来禁用方案。
+
+目标访问模式
+~~~~~~~~~~~~
+
+``<目标访问模式>`` 是由三个范围构成的，形式如下::
+
+    min-size max-size min-acc max-acc min-age max-age
+
+具体来说，区域大小的字节数（ `min-size` 和 `max-size` ），访问频率的每聚合区间的监测访问次
+数（ `min-acc` 和 `max-acc` ），区域年龄的聚合区间数（ `min-age` 和 `max-age` ）都被指定。
+请注意，这些范围是封闭区间。
+
+动作
+~~~~
+
+``<action>`` 是一个预定义的内存管理动作的整数，DAMON将应用于具有目标访问模式的区域。支持
+的数字和它们的含义如下::
+
+ - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``
+ - 1: Call ``madvise()`` for the region with ``MADV_COLD``
+ - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
+ - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
+ - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
+ - 5: Do nothing but count the statistics
+
+配额
+~~~~
+
+每个 ``动作`` 的最佳 ``目标访问模式`` 取决于工作负载，所以不容易找到。更糟糕的是，将某个
+动作的方案设置得过于激进会导致严重的开销。为了避免这种开销，用户可以通过下面表格中的 ``<quota>``
+来限制方案的时间和大小配额::
+
+    <ms> <sz> <reset interval> <priority weights>
+
+这使得DAMON在 ``<reset interval>`` 毫秒内，尽量只用 ``<ms>`` 毫秒的时间对 ``目标访
+问模式`` 的内存区域应用动作，并在 ``<reset interval>`` 内只对最多<sz>字节的内存区域应
+用动作。将 ``<ms>`` 和 ``<sz>`` 都设置为零，可以禁用配额限制。
+
+当预计超过配额限制时，DAMON会根据 ``目标访问模式`` 的大小、访问频率和年龄，对发现的内存
+区域进行优先排序。为了实现个性化的优先级，用户可以在 ``<优先级权重>`` 中设置这三个属性的
+权重，具体形式如下::
+
+    <size weight> <access frequency weight> <age weight>
+
+水位
+~~~~
+
+有些方案需要根据系统特定指标的当前值来运行，如自由内存比率。对于这种情况，用户可以为该条
+件指定水位。::
+
+    <metric> <check interval> <high mark> <middle mark> <low mark>
+
+``<metric>`` 是一个预定义的整数，用于要检查的度量。支持的数字和它们的含义如下。
+
+ - 0: 忽视水位
+ - 1: 系统空闲内存率 (千分比)
+
+每隔 ``<检查间隔>`` 微秒检查一次公制的值。
+
+如果该值高于 ``<高标>`` 或低于 ``<低标>`` ，该方案被停用。如果该值低于 ``<中标>`` ，
+该方案将被激活。
+
+统计数据
+~~~~~~~~
+
+它还统计每个方案被尝试应用的区域的总数量和字节数，每个方案被成功应用的区域的两个数量，以
+及超过配额限制的总数量。这些统计数据可用于在线分析或调整方案。
+
+统计数据可以通过读取方案文件来显示。读取该文件将显示你在每一行中输入的每个 ``方案`` ，
+统计的五个数字将被加在每一行的末尾。
+
+例子
+~~~~
+
+下面的命令应用了一个方案：”如果一个大小为[4KiB, 8KiB]的内存区域在[10, 20]的聚合时间
+间隔内显示出每一个聚合时间间隔[0, 5]的访问量，请分页出该区域。对于分页，每秒最多只能使
+用10ms，而且每秒分页不能超过1GiB。在这一限制下，首先分页出具有较长年龄的内存区域。另外，
+每5秒钟检查一次系统的可用内存率，当可用内存率低于50%时开始监测和分页，但如果可用内存率
+大于60%，或低于30%，则停止监测“::
+
+    # cd <debugfs>/damon
+    # scheme="4096 8192  0 5    10 20    2"  # target access pattern and action
+    # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
+    # scheme+=" 0 0 100"                     # prioritization weights
+    # scheme+=" 1 5000000 600 500 300"       # watermarks
+    # echo "$scheme" > schemes
+
+
+开关
+----
+
+除非你明确地启动监测，否则如上所述的文件设置不会产生效果。你可以通过写入和读取 ``monitor_on``
+文件来启动、停止和检查监测的当前状态。写入 ``on`` 该文件可以启动对有属性的目标的监测。写入
+``off`` 该文件则停止这些目标。如果每个目标进程被终止，DAMON也会停止。下面的示例命令开启、关
+闭和检查DAMON的状态::
+
+    # cd <debugfs>/damon
+    # echo on > monitor_on
+    # echo off > monitor_on
+    # cat monitor_on
+    off
+
+请注意，当监测开启时，你不能写到上述的debugfs文件。如果你在DAMON运行时写到这些文件，将会返
+回一个错误代码，如 ``-EBUSY`` 。
+
+
+监测线程PID
+-----------
+
+DAMON通过一个叫做kdamond的内核线程来进行请求监测。你可以通过读取 ``kdamond_pid`` 文件获
+得该线程的 ``pid`` 。当监测被 ``关闭`` 时，读取该文件不会返回任何信息::
+
+    # cd <debugfs>/damon
+    # cat monitor_on
+    off
+    # cat kdamond_pid
+    none
+    # echo on > monitor_on
+    # cat kdamond_pid
+    18594
+
+
+使用多个监测线程
+----------------
+
+每个监测上下文都会创建一个 ``kdamond`` 线程。你可以使用 ``mk_contexts`` 和 ``rm_contexts``
+文件为多个 ``kdamond`` 需要的用例创建和删除监测上下文。
+
+将新上下文的名称写入 ``mk_contexts`` 文件，在 ``DAMON debugfs`` 目录上创建一个该名称的目录。
+该目录将有该上下文的 ``DAMON debugfs`` 文件::
+
+    # cd <debugfs>/damon
+    # ls foo
+    # ls: cannot access 'foo': No such file or directory
+    # echo foo > mk_contexts
+    # ls foo
+    # attrs  init_regions  kdamond_pid  schemes  target_ids
+
+如果不再需要上下文，你可以通过把上下文的名字放到 ``rm_contexts`` 文件中来删除它和相应的目录::
+
+    # echo foo > rm_contexts
+    # ls foo
+    # ls: cannot access 'foo': No such file or directory
+
+注意， ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on`` 文件只在根目录下。
+
+
+监测结果的监测点
+================
+
+DAMON通过一个tracepoint ``damon:damon_aggregated`` 提供监测结果.  当监测开启时，你可
+以记录追踪点事件，并使用追踪点支持工具如perf显示结果。比如说::
+
+    # echo on > monitor_on
+    # perf record -e damon:damon_aggregated &
+    # sleep 5
+    # kill 9 $(pidof perf)
+    # echo off > monitor_on
+    # perf script
-- 
GitLab


From ffce1439fe31590fdbacf17b638f2d62efebf02d Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Thu, 10 Mar 2022 23:20:52 +0800
Subject: [PATCH 1282/1586] docs/zh_CN: add damon reclaim translation

Translate .../admin-guide/mm/damon/reclaim.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/0a436fa78814bb0a7b9c2f3049e544b1e1802560.1646899089.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/admin-guide/mm/damon/index.rst      |   4 +-
 .../zh_CN/admin-guide/mm/damon/reclaim.rst    | 232 ++++++++++++++++++
 2 files changed, 234 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
index bd1f1a551f0fa..0c8276109fc0b 100644
--- a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst
@@ -21,8 +21,8 @@
 
    start
    usage
+   reclaim
+
 
-Todolist:
 
 
-*   reclaim
diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
new file mode 100644
index 0000000000000..9e541578f38d3
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst
@@ -0,0 +1,232 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../../../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/mm/damon/reclaim.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+===============
+基于DAMON的回收
+===============
+
+基于DAMON的回收（DAMON_RECLAIM）是一个静态的内核模块，旨在用于轻度内存压力下的主动和轻
+量级的回收。它的目的不是取代基于LRU列表的页面回收，而是有选择地用于不同程度的内存压力和要
+求。
+
+哪些地方需要主动回收？
+======================
+
+在一般的内存超量使用（over-committed systems，虚拟化相关术语）的系统上，主动回收冷页
+有助于节省内存和减少延迟高峰，这些延迟是由直接回收进程或kswapd的CPU消耗引起的，同时只产
+生最小的性能下降 [1]_ [2]_ 。
+
+基于空闲页报告 [3]_ 的内存过度承诺的虚拟化系统就是很好的例子。在这样的系统中，客户机
+向主机报告他们的空闲内存，而主机则将报告的内存重新分配给其他客户。因此，系统的内存得到了充
+分的利用。然而，客户可能不那么节省内存，主要是因为一些内核子系统和用户空间应用程序被设计为
+使用尽可能多的内存。然后，客户机可能只向主机报告少量的内存是空闲的，导致系统的内存利用率下降。
+在客户中运行主动回收可以缓解这个问题。
+
+它是如何工作的？
+================
+
+DAMON_RECLAIM找到在特定时间内没有被访问的内存区域并分页。为了避免它在分页操作中消耗过多
+的CPU，可以配置一个速度限制。在这个速度限制下，它首先分页出那些没有被访问过的内存区域。系
+统管理员还可以配置在什么情况下这个方案应该自动激活和停用三个内存压力水位。
+
+接口: 模块参数
+==============
+
+要使用这个功能，你首先要确保你的系统运行在一个以 ``CONFIG_DAMON_RECLAIM=y`` 构建的内
+核上。
+
+为了让系统管理员启用或禁用它，并为给定的系统进行调整，DAMON_RECLAIM利用了模块参数。也就
+是说，你可以把 ``damon_reclaim.<parameter>=<value>`` 放在内核启动命令行上，或者把
+适当的值写入 ``/sys/modules/damon_reclaim/parameters/<parameter>`` 文件。
+
+注意，除 ``启用`` 外的参数值只在DAMON_RECLAIM启动时应用。因此，如果你想在运行时应用新
+的参数值，而DAMON_RECLAIM已经被启用，你应该通过 ``启用`` 的参数文件禁用和重新启用它。
+在重新启用之前，应将新的参数值写入适当的参数值中。
+
+下面是每个参数的描述。
+
+enable
+------
+
+启用或禁用DAMON_RECLAIM。
+
+你可以通过把这个参数的值设置为 ``Y`` 来启用DAMON_RCLAIM，把它设置为 ``N`` 可以禁用
+DAMON_RECLAIM。注意，由于基于水位的激活条件，DAMON_RECLAIM不能进行真正的监测和回收。
+这一点请参考下面关于水位参数的描述。
+
+min_age
+-------
+
+识别冷内存区域的时间阈值，单位是微秒。
+
+如果一个内存区域在这个时间或更长的时间内没有被访问，DAMON_RECLAIM会将该区域识别为冷的，
+并回收它。
+
+默认为120秒。
+
+quota_ms
+--------
+
+回收的时间限制，以毫秒为单位。
+
+DAMON_RECLAIM 试图在一个时间窗口（quota_reset_interval_ms）内只使用到这个时间，以
+尝试回收冷页。这可以用来限制DAMON_RECLAIM的CPU消耗。如果该值为零，则该限制被禁用。
+
+默认为10ms。
+
+quota_sz
+--------
+
+回收的内存大小限制，单位为字节。
+
+DAMON_RECLAIM 收取在一个时间窗口（quota_reset_interval_ms）内试图回收的内存量，并
+使其不超过这个限制。这可以用来限制CPU和IO的消耗。如果该值为零，则限制被禁用。
+
+默认情况下是128 MiB。
+
+quota_reset_interval_ms
+-----------------------
+
+时间/大小配额收取重置间隔，单位为毫秒。
+
+时间（quota_ms）和大小（quota_sz）的配额的目标重置间隔。也就是说，DAMON_RECLAIM在
+尝试回收‘不’超过quota_ms毫秒或quota_sz字节的内存。
+
+默认为1秒。
+
+wmarks_interval
+---------------
+
+当DAMON_RECLAIM被启用但由于其水位规则而不活跃时，在检查水位之前的最小等待时间。
+
+wmarks_high
+-----------
+
+高水位的可用内存率（每千字节）。
+
+如果系统的可用内存（以每千字节为单位）高于这个数值，DAMON_RECLAIM就会变得不活跃，所以
+它什么也不做，只是定期检查水位。
+
+wmarks_mid
+----------
+
+中间水位的可用内存率（每千字节）。
+
+如果系统的空闲内存（以每千字节为单位）在这个和低水位线之间，DAMON_RECLAIM就会被激活，
+因此开始监测和回收。
+
+wmarks_low
+----------
+
+低水位的可用内存率（每千字节）。
+
+如果系统的空闲内存（以每千字节为单位）低于这个数值，DAMON_RECLAIM就会变得不活跃，所以
+它除了定期检查水位外什么都不做。在这种情况下，系统会退回到基于LRU列表的页面粒度回收逻辑。
+
+sample_interval
+---------------
+
+监测的采样间隔，单位是微秒。
+
+DAMON用于监测冷内存的采样间隔。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+aggr_interval
+-------------
+
+监测的聚集间隔，单位是微秒。
+
+DAMON对冷内存监测的聚集间隔。更多细节请参考DAMON文档 (:doc:`usage`)。
+
+min_nr_regions
+--------------
+
+监测区域的最小数量。
+
+DAMON用于冷内存监测的最小监测区域数。这可以用来设置监测质量的下限。但是，设
+置的太高可能会导致监测开销的增加。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+max_nr_regions
+--------------
+
+监测区域的最大数量。
+
+DAMON用于冷内存监测的最大监测区域数。这可以用来设置监测开销的上限值。但是，
+设置得太低可能会导致监测质量不好。更多细节请参考DAMON文档 (:doc:`usage`) 。
+
+monitor_region_start
+--------------------
+
+目标内存区域的物理地址起点。
+
+DAMON_RECLAIM将对其进行工作的内存区域的起始物理地址。也就是说，DAMON_RECLAIM
+将在这个区域中找到冷的内存区域并进行回收。默认情况下，该区域使用最大系统内存区。
+
+monitor_region_end
+------------------
+
+目标内存区域的结束物理地址。
+
+DAMON_RECLAIM将对其进行工作的内存区域的末端物理地址。也就是说，DAMON_RECLAIM将
+在这个区域内找到冷的内存区域并进行回收。默认情况下，该区域使用最大系统内存区。
+
+kdamond_pid
+-----------
+
+DAMON线程的PID。
+
+如果DAMON_RECLAIM被启用，这将成为工作线程的PID。否则，为-1。
+
+nr_reclaim_tried_regions
+------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的数量。
+
+bytes_reclaim_tried_regions
+---------------------------
+
+试图通过DAMON_RECLAIM回收的内存区域的总字节数。
+
+nr_reclaimed_regions
+--------------------
+
+通过DAMON_RECLAIM成功回收的内存区域的数量。
+
+bytes_reclaimed_regions
+-----------------------
+
+通过DAMON_RECLAIM成功回收的内存区域的总字节数。
+
+nr_quota_exceeds
+----------------
+
+超过时间/空间配额限制的次数。
+
+例子
+====
+
+下面的运行示例命令使DAMON_RECLAIM找到30秒或更长时间没有访问的内存区域并“回收”？
+为了避免DAMON_RECLAIM在分页操作中消耗过多的CPU时间，回收被限制在每秒1GiB以内。
+它还要求DAMON_RECLAIM在系统的可用内存率超过50%时不做任何事情，但如果它低于40%时
+就开始真正的工作。如果DAMON_RECLAIM没有取得进展，因此空闲内存率低于20%，它会要求
+DAMON_RECLAIM再次什么都不做，这样我们就可以退回到基于LRU列表的页面粒度回收了::
+
+    # cd /sys/modules/damon_reclaim/parameters
+    # echo 30000000 > min_age
+    # echo $((1 * 1024 * 1024 * 1024)) > quota_sz
+    # echo 1000 > quota_reset_interval_ms
+    # echo 500 > wmarks_high
+    # echo 400 > wmarks_mid
+    # echo 200 > wmarks_low
+    # echo Y > enabled
+
+.. [1] https://research.google/pubs/pub48551/
+.. [2] https://lwn.net/Articles/787611/
+.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html
-- 
GitLab


From 89ee9301ac16d848d8dccf378d6ab3266fa80f62 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Thu, 10 Mar 2022 13:42:30 +0100
Subject: [PATCH 1283/1586] docs: UML: Mention telnetd for port channel

It is not obvious from the documentation that using the "port" channel
for the console requires telnetd to be installed (see port_connection()
in arch/um/drivers/port_user.c).  Mention this, and the fact that UML
will not boot until a client connects.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Acked-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Link: https://lore.kernel.org/r/20220310124230.3069354-1-vincent.whitchurch@axis.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/uml/user_mode_linux_howto_v2.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
index 2cafd3c3c6cbe..d5ad96c795f6b 100644
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@@ -664,7 +664,11 @@ one is input, the second one output.
 * The fd channel - use file descriptor numbers for input/output. Example:
   ``con1=fd:0,fd:1.``
 
-* The port channel - listen on TCP port number. Example: ``con1=port:4321``
+* The port channel - start a telnet server on TCP port number. Example:
+  ``con1=port:4321``.  The host must have /usr/sbin/in.telnetd (usually part of
+  a telnetd package) and the port-helper from the UML utilities (see the
+  information for the xterm channel below).  UML will not boot until a client
+  connects.
 
 * The pty and pts channels - use system pty/pts.
 
-- 
GitLab


From 33f588f8be651b968d058b67ea5e509ed9dd4e55 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Fri, 4 Mar 2022 18:03:15 +0800
Subject: [PATCH 1284/1586] docs: serial: fix a reference file name in
 driver.rst

Fix the following 'make refcheckdocs' warning:
Warning: Documentation/driver-api/serial/driver.rst references a file
that doesn't exist: Documentation/driver-api/serial/tty.rst

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20220304100315.6732-1-wanjiabing@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/serial/driver.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 31bd4e16fb1f2..06ec04ba086fd 100644
--- a/Documentation/driver-api/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -311,7 +311,7 @@ hardware.
 	This call must not sleep
 
   set_ldisc(port,termios)
-	Notifier for discipline change. See Documentation/driver-api/serial/tty.rst.
+	Notifier for discipline change. See Documentation/tty/tty_ldisc.rst.
 
 	Locking: caller holds tty_port->mutex
 
-- 
GitLab


From ff1368763bb488783820c6ba68d73c03c562a9b7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 30 Jan 2022 16:36:37 -0800
Subject: [PATCH 1285/1586] Docs: ktap: add code-block type

Fix multiple "code-block::" warnings by adding "none" as the type of
code-block. Mends these warnings:

Documentation/dev-tools/ktap.rst:71: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:120: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:126: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:132: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:139: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:145: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:195: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:208: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.
Documentation/dev-tools/ktap.rst:238: WARNING: Error in "code-block" directive:
1 argument(s) required, 0 supplied.

Fixes: a32fa6b2e8b4 ("Documentation: dev-tools: Add KTAP specification")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: David Gow <davidgow@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Reviewed-by: David Gow <davidgow@google.com>
Link: https://lore.kernel.org/r/20220131003637.14274-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/ktap.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst
index d7fe05de40b44..5ee735c6687fd 100644
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@@ -68,7 +68,7 @@ Test case result lines
 Test case result lines indicate the final status of a test.
 They are required and must have the format:
 
-.. code-block::
+.. code-block:: none
 
 	<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
 
@@ -117,32 +117,32 @@ separator.
 
 Example result lines include:
 
-.. code-block::
+.. code-block:: none
 
 	ok 1 test_case_name
 
 The test "test_case_name" passed.
 
-.. code-block::
+.. code-block:: none
 
 	not ok 1 test_case_name
 
 The test "test_case_name" failed.
 
-.. code-block::
+.. code-block:: none
 
 	ok 1 test # SKIP necessary dependency unavailable
 
 The test "test" was SKIPPED with the diagnostic message "necessary dependency
 unavailable".
 
-.. code-block::
+.. code-block:: none
 
 	not ok 1 test # TIMEOUT 30 seconds
 
 The test "test" timed out, with diagnostic data "30 seconds".
 
-.. code-block::
+.. code-block:: none
 
 	ok 5 check return code # rcode=0
 
@@ -202,7 +202,7 @@ allowed to be either indented or not indented.
 
 An example of a test with two nested subtests:
 
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..1
@@ -215,7 +215,7 @@ An example of a test with two nested subtests:
 
 An example format with multiple levels of nested testing:
 
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..2
@@ -250,7 +250,7 @@ nested version line, uses a line of the form
 
 Example KTAP output
 --------------------
-.. code-block::
+.. code-block:: none
 
 	KTAP version 1
 	1..1
-- 
GitLab


From 5e34af4142ffe68f01c8a9acae83300f8911e20c Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@linaro.org>
Date: Thu, 10 Mar 2022 15:25:38 -0800
Subject: [PATCH 1286/1586] net: ipv6: fix skb_over_panic in __ip6_append_data

Syzbot found a kernel bug in the ipv6 stack:
LINK: https://syzkaller.appspot.com/bug?id=205d6f11d72329ab8d62a610c44c5e7e25415580
The reproducer triggers it by sending a crafted message via sendmmsg()
call, which triggers skb_over_panic, and crashes the kernel:

skbuff: skb_over_panic: text:ffffffff84647fb4 len:65575 put:65575
head:ffff888109ff0000 data:ffff888109ff0088 tail:0x100af end:0xfec0
dev:<NULL>

Update the check that prevents an invalid packet with MTU equal
to the fregment header size to eat up all the space for payload.

The reproducer can be found here:
LINK: https://syzkaller.appspot.com/text?tag=ReproC&x=1648c83fb00000

Reported-by: syzbot+e223cf47ec8ae183f2a0@syzkaller.appspotmail.com
Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20220310232538.1044947-1-tadeusz.struk@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ip6_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4788f6b370539..194832663d856 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1476,8 +1476,8 @@ static int __ip6_append_data(struct sock *sk,
 		      sizeof(struct frag_hdr) : 0) +
 		     rt->rt6i_nfheader_len;
 
-	if (mtu < fragheaderlen ||
-	    ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+	if (mtu <= fragheaderlen ||
+	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
 		goto emsgsize;
 
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
-- 
GitLab


From 46b348fd2d81a341b15fb3f3f986204b038f5c42 Mon Sep 17 00:00:00 2001
From: Niels Dossche <dossche.niels@gmail.com>
Date: Fri, 11 Mar 2022 00:27:08 +0100
Subject: [PATCH 1287/1586] alx: acquire mutex for alx_reinit in alx_change_mtu

alx_reinit has a lockdep assertion that the alx->mtx mutex must be held.
alx_reinit is called from two places: alx_reset and alx_change_mtu.
alx_reset does acquire alx->mtx before calling alx_reinit.
alx_change_mtu does not acquire this mutex, nor do its callers or any
path towards alx_change_mtu.
Acquire the mutex in alx_change_mtu.

The issue was introduced when the fine-grained locking was introduced
to the code to replace the RTNL. The same commit also introduced the
lockdep assertion.

Fixes: 4a5fe57e7751 ("alx: use fine-grained locking instead of RTNL")
Signed-off-by: Niels Dossche <dossche.niels@gmail.com>
Link: https://lore.kernel.org/r/20220310232707.44251-1-dossche.niels@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/alx/main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 4ad3fc72e74e3..a89b93cb4e26d 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
 	netdev_update_features(netdev);
-	if (netif_running(netdev))
+	if (netif_running(netdev)) {
+		mutex_lock(&alx->mtx);
 		alx_reinit(alx);
+		mutex_unlock(&alx->mtx);
+	}
 	return 0;
 }
 
-- 
GitLab


From 8e6ed963763fe21429eabfc76c69ce2b0163a3dd Mon Sep 17 00:00:00 2001
From: Jiyong Park <jiyong@google.com>
Date: Fri, 11 Mar 2022 11:00:16 +0900
Subject: [PATCH 1288/1586] vsock: each transport cycles only on its own
 sockets

When iterating over sockets using vsock_for_each_connected_socket, make
sure that a transport filters out sockets that don't belong to the
transport.

There actually was an issue caused by this; in a nested VM
configuration, destroying the nested VM (which often involves the
closing of /dev/vhost-vsock if there was h2g connections to the nested
VM) kills not only the h2g connections, but also all existing g2h
connections to the (outmost) host which are totally unrelated.

Tested: Executed the following steps on Cuttlefish (Android running on a
VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
connection inside the VM, (2) open and then close /dev/vhost-vsock by
`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
session is not reset.

[1] https://android.googlesource.com/device/google/cuttlefish/

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jiyong Park <jiyong@google.com>
Link: https://lore.kernel.org/r/20220311020017.1509316-1-jiyong@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/vhost/vsock.c            | 3 ++-
 include/net/af_vsock.h           | 3 ++-
 net/vmw_vsock/af_vsock.c         | 9 +++++++--
 net/vmw_vsock/virtio_transport.c | 7 +++++--
 net/vmw_vsock/vmci_transport.c   | 5 ++++-
 5 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 37f0b4274113c..e6c9d41db1de7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
 
 	/* Iterating over all connections for all CIDs to find orphans is
 	 * inefficient.  Room for improvement here. */
-	vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+	vsock_for_each_connected_socket(&vhost_transport.transport,
+					vhost_vsock_reset_orphans);
 
 	/* Don't check the owner, because we are in the release path, so we
 	 * need to stop the vsock device in any case.
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8b..f742e50207fbd 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
 					 struct sockaddr_vm *dst);
 void vsock_remove_sock(struct vsock_sock *vsk);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+				     void (*fn)(struct sock *sk));
 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
 bool vsock_find_cid(unsigned int cid);
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e6..f04abf662ec6c 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_remove_sock);
 
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+				     void (*fn)(struct sock *sk))
 {
 	int i;
 
@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
 		struct vsock_sock *vsk;
 		list_for_each_entry(vsk, &vsock_connected_table[i],
-				    connected_table)
+				    connected_table) {
+			if (vsk->transport != transport)
+				continue;
+
 			fn(sk_vsock(vsk));
+		}
 	}
 
 	spin_unlock_bh(&vsock_table_lock);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fb3302fff6279..5afc194a58bbd 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
 static struct workqueue_struct *virtio_vsock_workqueue;
 static struct virtio_vsock __rcu *the_virtio_vsock;
 static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */
 
 struct virtio_vsock {
 	struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
 	switch (le32_to_cpu(event->id)) {
 	case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
 		virtio_vsock_update_guest_cid(vsock);
-		vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+		vsock_for_each_connected_socket(&virtio_transport.transport,
+						virtio_vsock_reset_sock);
 		break;
 	}
 }
@@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 	synchronize_rcu();
 
 	/* Reset all connected sockets when the device disappear */
-	vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+	vsock_for_each_connected_socket(&virtio_transport.transport,
+					virtio_vsock_reset_sock);
 
 	/* Stop all work handlers to make sure no one is accessing the device,
 	 * so we can safely call virtio_reset_device().
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7aef34e32bdf8..b17dc9745188e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
 
 static int PROTOCOL_OVERRIDE = -1;
 
+static struct vsock_transport vmci_transport; /* forward declaration */
+
 /* Helper function to convert from a VMCI error code to a VSock error code. */
 
 static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
 					 const struct vmci_event_data *e_data,
 					 void *client_data)
 {
-	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+	vsock_for_each_connected_socket(&vmci_transport,
+					vmci_transport_handle_detach);
 }
 
 static void vmci_transport_recv_pkt_work(struct work_struct *work)
-- 
GitLab


From f3b6a41eb2bbdf545a42e54d637c34f4b1fdf5b9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 12 Mar 2022 06:50:13 -0700
Subject: [PATCH 1289/1586] io_uring: remove duplicated member check for
 io_msg_ring_prep()

Julia and the kernel test robot report that the prep handling for this
command inadvertently checks one field twice:

fs/io_uring.c:4338:42-56: duplicated argument to && or ||

Get rid of it.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Fixes: 4f57f06ce218 ("io_uring: add support for IORING_OP_MSG_RING command")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 229b31d644ef7..299154efcd8a0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4389,9 +4389,8 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 static int io_msg_ring_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
-	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
-		     sqe->rw_flags || sqe->splice_fd_in || sqe->buf_index ||
-		     sqe->personality))
+	if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
+		     sqe->splice_fd_in || sqe->buf_index || sqe->personality))
 		return -EINVAL;
 
 	if (req->file->f_op != &io_uring_fops)
-- 
GitLab


From ed5f85d4229010235eab1e3d9acf6970d9304963 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 12 Mar 2022 11:05:46 +0100
Subject: [PATCH 1290/1586] netfilter: nf_tables: disable register tracking

The register tracking infrastructure is incomplete, it might lead to
generating incorrect ruleset bytecode, disable it by now given we are
late in the release process.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c86748b3873b2..d71a33ae39b35 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8260,6 +8260,12 @@ void nf_tables_trans_destroy_flush_work(void)
 }
 EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 
+static bool nft_expr_reduce(struct nft_regs_track *track,
+			    const struct nft_expr *expr)
+{
+	return false;
+}
+
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
 {
 	const struct nft_expr *expr, *last;
@@ -8307,8 +8313,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
 		nft_rule_for_each_expr(expr, last, rule) {
 			track.cur = expr;
 
-			if (expr->ops->reduce &&
-			    expr->ops->reduce(&track, expr)) {
+			if (nft_expr_reduce(&track, expr)) {
 				expr = track.cur;
 				continue;
 			}
-- 
GitLab


From 6f98a4bfee72c22f50aedb39fb761567969865fe Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 7 Feb 2022 17:19:24 +0100
Subject: [PATCH 1291/1586] random: block in /dev/urandom

This topic has come up countless times, and usually doesn't go anywhere.
This time I thought I'd bring it up with a slightly narrower focus,
updated for some developments over the last three years: we finally can
make /dev/urandom always secure, in light of the fact that our RNG is
now always seeded.

Ever since Linus' 50ee7529ec45 ("random: try to actively add entropy
rather than passively wait for it"), the RNG does a haveged-style jitter
dance around the scheduler, in order to produce entropy (and credit it)
for the case when we're stuck in wait_for_random_bytes(). How ever you
feel about the Linus Jitter Dance is beside the point: it's been there
for three years and usually gets the RNG initialized in a second or so.

As a matter of fact, this is what happens currently when people use
getrandom(). It's already there and working, and most people have been
using it for years without realizing.

So, given that the kernel has grown this mechanism for seeding itself
from nothing, and that this procedure happens pretty fast, maybe there's
no point any longer in having /dev/urandom give insecure bytes. In the
past we didn't want the boot process to deadlock, which was
understandable. But now, in the worst case, a second goes by, and the
problem is resolved. It seems like maybe we're finally at a point when
we can get rid of the infamous "urandom read hole".

The one slight drawback is that the Linus Jitter Dance relies on random_
get_entropy() being implemented. The first lines of try_to_generate_
entropy() are:

	stack.now = random_get_entropy();
	if (stack.now == random_get_entropy())
		return;

On most platforms, random_get_entropy() is simply aliased to get_cycles().
The number of machines without a cycle counter or some other
implementation of random_get_entropy() in 2022, which can also run a
mainline kernel, and at the same time have a both broken and out of date
userspace that relies on /dev/urandom never blocking at boot is thought
to be exceedingly low. And to be clear: those museum pieces without
cycle counters will continue to run Linux just fine, and even
/dev/urandom will be operable just like before; the RNG just needs to be
seeded first through the usual means, which should already be the case
now.

On systems that really do want unseeded randomness, we already offer
getrandom(GRND_INSECURE), which is in use by, e.g., systemd for seeding
their hash tables at boot. Nothing in this commit would affect
GRND_INSECURE, and it remains the means of getting those types of random
numbers.

This patch goes a long way toward eliminating a long overdue userspace
crypto footgun. After several decades of endless user confusion, we will
finally be able to say, "use any single one of our random interfaces and
you'll be fine. They're all the same. It doesn't matter." And that, I
think, is really something. Finally all of those blog posts and
disagreeing forums and contradictory articles will all become correct
about whatever they happened to recommend, and along with it, a whole
class of vulnerabilities eliminated.

With very minimal downside, we're finally in a position where we can
make this change.

Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Nick Hu <nickhu@andestech.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Guo Ren <guoren@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Joshua Kinard <kumba@gentoo.org>
Cc: David Laight <David.Laight@aculab.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Lennart Poettering <mzxreary@0pointer.de>
Cc: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/mem.c     |  2 +-
 drivers/char/random.c  | 72 ++++++++++--------------------------------
 include/linux/random.h |  2 +-
 3 files changed, 19 insertions(+), 57 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cc296f0823bd0..9f586025dbe62 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -707,7 +707,7 @@ static const struct memdev {
 	 [5] = { "zero", 0666, &zero_fops, FMODE_NOWAIT },
 	 [7] = { "full", 0666, &full_fops, 0 },
 	 [8] = { "random", 0666, &random_fops, 0 },
-	 [9] = { "urandom", 0666, &urandom_fops, 0 },
+	 [9] = { "urandom", 0666, &random_fops, 0 },
 #ifdef CONFIG_PRINTK
 	[11] = { "kmsg", 0644, &kmsg_fops, 0 },
 #endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8171c3bbf460f..9831797e0699d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -89,17 +89,14 @@ static LIST_HEAD(random_ready_list);
 /* Control how we warn userspace. */
 static struct ratelimit_state unseeded_warning =
 	RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
-static struct ratelimit_state urandom_warning =
-	RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
 static int ratelimit_disable __read_mostly;
 module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
 MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 
 /*
  * Returns whether or not the input pool has been seeded and thus guaranteed
- * to supply cryptographically secure random numbers. This applies to: the
- * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
- * ,u64,int,long} family of functions.
+ * to supply cryptographically secure random numbers. This applies to
+ * get_random_bytes() and get_random_{u32,u64,int,long}().
  *
  * Returns: true if the input pool has been seeded.
  *          false if the input pool has not been seeded.
@@ -115,10 +112,10 @@ static void try_to_generate_entropy(void);
 
 /*
  * Wait for the input pool to be seeded and thus guaranteed to supply
- * cryptographically secure random numbers. This applies to: the /dev/urandom
- * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
- * family of functions. Using any of these functions without first calling
- * this function forfeits the guarantee of security.
+ * cryptographically secure random numbers. This applies to
+ * get_random_bytes() and get_random_{u32,u64,int,long}(). Using any
+ * of these functions without first calling this function means that
+ * the returned numbers might not be cryptographically secure.
  *
  * Returns: 0 if the input pool has been seeded.
  *          -ERESTARTSYS if the function was interrupted by a signal.
@@ -256,10 +253,10 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  *	unsigned long get_random_long()
  *
  * These interfaces will return the requested number of random bytes
- * into the given buffer or as a return value. This is equivalent to
- * a read from /dev/urandom. The integer family of functions may be
- * higher performance for one-off random integers, because they do a
- * bit of buffering.
+ * into the given buffer or as a return value. The returned numbers are
+ * the same as those of getrandom(0). The integer family of functions may
+ * be higher performance for one-off random integers, because they do a
+ * bit of buffering and do not invoke reseeding.
  *
  *********************************************************************/
 
@@ -336,11 +333,6 @@ static void crng_reseed(void)
 				  unseeded_warning.missed);
 			unseeded_warning.missed = 0;
 		}
-		if (urandom_warning.missed) {
-			pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
-				  urandom_warning.missed);
-			urandom_warning.missed = 0;
-		}
 	}
 }
 
@@ -993,10 +985,8 @@ int __init rand_initialize(void)
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
 
-	if (ratelimit_disable) {
-		urandom_warning.interval = 0;
+	if (ratelimit_disable)
 		unseeded_warning.interval = 0;
-	}
 	return 0;
 }
 
@@ -1386,20 +1376,16 @@ static void try_to_generate_entropy(void)
  * getrandom(2) is the primary modern interface into the RNG and should
  * be used in preference to anything else.
  *
- * Reading from /dev/random has the same functionality as calling
- * getrandom(2) with flags=0. In earlier versions, however, it had
- * vastly different semantics and should therefore be avoided, to
- * prevent backwards compatibility issues.
- *
- * Reading from /dev/urandom has the same functionality as calling
- * getrandom(2) with flags=GRND_INSECURE. Because it does not block
- * waiting for the RNG to be ready, it should not be used.
+ * Reading from /dev/random and /dev/urandom both have the same effect
+ * as calling getrandom(2) with flags=0. (In earlier versions, however,
+ * they each had different semantics.)
  *
  * Writing to either /dev/random or /dev/urandom adds entropy to
  * the input pool but does not credit it.
  *
- * Polling on /dev/random indicates when the RNG is initialized, on
- * the read side, and when it wants new entropy, on the write side.
+ * Polling on /dev/random or /dev/urandom indicates when the RNG
+ * is initialized, on the read side, and when it wants new entropy,
+ * on the write side.
  *
  * Both /dev/random and /dev/urandom have the same set of ioctls for
  * adding entropy, getting the entropy count, zeroing the count, and
@@ -1484,21 +1470,6 @@ static ssize_t random_write(struct file *file, const char __user *buffer,
 	return (ssize_t)count;
 }
 
-static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
-			    loff_t *ppos)
-{
-	static int maxwarn = 10;
-
-	if (!crng_ready() && maxwarn > 0) {
-		maxwarn--;
-		if (__ratelimit(&urandom_warning))
-			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
-				  current->comm, nbytes);
-	}
-
-	return get_random_bytes_user(buf, nbytes);
-}
-
 static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
 			   loff_t *ppos)
 {
@@ -1585,15 +1556,6 @@ const struct file_operations random_fops = {
 	.llseek = noop_llseek,
 };
 
-const struct file_operations urandom_fops = {
-	.read = urandom_read,
-	.write = random_write,
-	.unlocked_ioctl = random_ioctl,
-	.compat_ioctl = compat_ptr_ioctl,
-	.fasync = random_fasync,
-	.llseek = noop_llseek,
-};
-
 
 /********************************************************************
  *
diff --git a/include/linux/random.h b/include/linux/random.h
index 6148b8d1ccf34..725a4d08c0a05 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -44,7 +44,7 @@ extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
 
 #ifndef MODULE
-extern const struct file_operations random_fops, urandom_fops;
+extern const struct file_operations random_fops;
 #endif
 
 u32 get_random_u32(void);
-- 
GitLab


From d0efdf35a6a71d307a250199af6fce122a7c7e11 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 28 Feb 2022 13:57:57 +0100
Subject: [PATCH 1292/1586] random: give sysctl_random_min_urandom_seed a more
 sensible value

This isn't used by anything or anywhere, but we can't delete it due to
compatibility. So at least give it the correct value of what it's
supposed to be instead of a garbage one.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9831797e0699d..612e45e880770 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1581,7 +1581,7 @@ const struct file_operations random_fops = {
  *   to avoid breaking old userspaces, but writing to it does not
  *   change any behavior of the RNG.
  *
- * - urandom_min_reseed_secs - fixed to the meaningless value "60".
+ * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
  *   It is writable to avoid breaking old userspaces, but writing
  *   to it does not change any behavior of the RNG.
  *
@@ -1591,7 +1591,7 @@ const struct file_operations random_fops = {
 
 #include <linux/sysctl.h>
 
-static int sysctl_random_min_urandom_seed = 60;
+static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
 static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS;
 static int sysctl_poolsize = POOL_BITS;
 static u8 sysctl_bootid[UUID_SIZE];
-- 
GitLab


From 77553cf8f44863b31da242cf24671d76ddb61597 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 28 Feb 2022 14:00:52 +0100
Subject: [PATCH 1293/1586] random: don't let 644 read-only sysctls be written
 to

We leave around these old sysctls for compatibility, and we keep them
"writable" for compatibility, but even after writing, we should keep
reporting the same value. This is consistent with how userspaces tend to
use sysctl_random_write_wakeup_bits, writing to it, and then later
reading from it and using the value.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 612e45e880770..ede97649c5dd8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1631,6 +1631,13 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer,
 	return proc_dostring(&fake_table, 0, buffer, lenp, ppos);
 }
 
+/* The same as proc_dointvec, but writes don't change anything. */
+static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer,
+			    size_t *lenp, loff_t *ppos)
+{
+	return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos);
+}
+
 static struct ctl_table random_table[] = {
 	{
 		.procname	= "poolsize",
@@ -1651,14 +1658,14 @@ static struct ctl_table random_table[] = {
 		.data		= &sysctl_random_write_wakeup_bits,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_do_rointvec,
 	},
 	{
 		.procname	= "urandom_min_reseed_secs",
 		.data		= &sysctl_random_min_urandom_seed,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_do_rointvec,
 	},
 	{
 		.procname	= "boot_id",
-- 
GitLab


From ae099e8e98fb01395228628be5a4661e3bd86fe4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 23 Feb 2022 13:43:44 +0100
Subject: [PATCH 1294/1586] random: add mechanism for VM forks to reinitialize
 crng

When a VM forks, we must immediately mix in additional information to
the stream of random output so that two forks or a rollback don't
produce the same stream of random numbers, which could have catastrophic
cryptographic consequences. This commit adds a simple API, add_vmfork_
randomness(), for that, by force reseeding the crng.

This has the added benefit of also draining the entropy pool and setting
its timer back, so that any old entropy that was there prior -- which
could have already been used by a different fork, or generally gone
stale -- does not contribute to the accounting of the next 256 bits.

Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jann Horn <jannh@google.com>
Cc: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 50 +++++++++++++++++++++++++++++-------------
 include/linux/random.h |  1 +
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index ede97649c5dd8..2b323f6bd96c3 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -286,14 +286,14 @@ static DEFINE_PER_CPU(struct crng, crngs) = {
 };
 
 /* Used by crng_reseed() to extract a new seed from the input pool. */
-static bool drain_entropy(void *buf, size_t nbytes);
+static bool drain_entropy(void *buf, size_t nbytes, bool force);
 
 /*
  * This extracts a new crng key from the input pool, but only if there is a
- * sufficient amount of entropy available, in order to mitigate bruteforcing
- * of newly added bits.
+ * sufficient amount of entropy available or force is true, in order to
+ * mitigate bruteforcing of newly added bits.
  */
-static void crng_reseed(void)
+static void crng_reseed(bool force)
 {
 	unsigned long flags;
 	unsigned long next_gen;
@@ -301,7 +301,7 @@ static void crng_reseed(void)
 	bool finalize_init = false;
 
 	/* Only reseed if we can, to prevent brute forcing a small amount of new bits. */
-	if (!drain_entropy(key, sizeof(key)))
+	if (!drain_entropy(key, sizeof(key), force))
 		return;
 
 	/*
@@ -398,7 +398,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	 * in turn bumps the generation counter that we check below.
 	 */
 	if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL)))
-		crng_reseed();
+		crng_reseed(false);
 
 	local_lock_irqsave(&crngs.lock, flags);
 	crng = raw_cpu_ptr(&crngs);
@@ -763,10 +763,10 @@ EXPORT_SYMBOL(get_random_bytes_arch);
  *
  * Finally, extract entropy via these two, with the latter one
  * setting the entropy count to zero and extracting only if there
- * is POOL_MIN_BITS entropy credited prior:
+ * is POOL_MIN_BITS entropy credited prior or force is true:
  *
  *     static void extract_entropy(void *buf, size_t nbytes)
- *     static bool drain_entropy(void *buf, size_t nbytes)
+ *     static bool drain_entropy(void *buf, size_t nbytes, bool force)
  *
  **********************************************************************/
 
@@ -824,7 +824,7 @@ static void credit_entropy_bits(size_t nbits)
 	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
 
 	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
-		crng_reseed();
+		crng_reseed(false);
 }
 
 /*
@@ -874,16 +874,16 @@ static void extract_entropy(void *buf, size_t nbytes)
 }
 
 /*
- * First we make sure we have POOL_MIN_BITS of entropy in the pool, and then we
- * set the entropy count to zero (but don't actually touch any data). Only then
- * can we extract a new key with extract_entropy().
+ * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force
+ * is true, and then we set the entropy count to zero (but don't actually touch
+ * any data). Only then can we extract a new key with extract_entropy().
  */
-static bool drain_entropy(void *buf, size_t nbytes)
+static bool drain_entropy(void *buf, size_t nbytes, bool force)
 {
 	unsigned int entropy_count;
 	do {
 		entropy_count = READ_ONCE(input_pool.entropy_count);
-		if (entropy_count < POOL_MIN_BITS)
+		if (!force && entropy_count < POOL_MIN_BITS)
 			return false;
 	} while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count);
 	extract_entropy(buf, nbytes);
@@ -907,6 +907,7 @@ static bool drain_entropy(void *buf, size_t nbytes)
  *	void add_hwgenerator_randomness(const void *buffer, size_t count,
  *					size_t entropy);
  *	void add_bootloader_randomness(const void *buf, size_t size);
+ *	void add_vmfork_randomness(const void *unique_vm_id, size_t size);
  *	void add_interrupt_randomness(int irq);
  *
  * add_device_randomness() adds data to the input pool that
@@ -938,6 +939,10 @@ static bool drain_entropy(void *buf, size_t nbytes)
  * add_device_randomness(), depending on whether or not the configuration
  * option CONFIG_RANDOM_TRUST_BOOTLOADER is set.
  *
+ * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
+ * representing the current instance of a VM to the pool, without crediting,
+ * and then force-reseeds the crng so that it takes effect immediately.
+ *
  * add_interrupt_randomness() uses the interrupt timing as random
  * inputs to the entropy pool. Using the cycle counters and the irq source
  * as inputs, it feeds the input pool roughly once a second or after 64
@@ -1163,6 +1168,21 @@ void add_bootloader_randomness(const void *buf, size_t size)
 }
 EXPORT_SYMBOL_GPL(add_bootloader_randomness);
 
+/*
+ * Handle a new unique VM ID, which is unique, not secret, so we
+ * don't credit it, but we do immediately force a reseed after so
+ * that it's used by the crng posthaste.
+ */
+void add_vmfork_randomness(const void *unique_vm_id, size_t size)
+{
+	add_device_randomness(unique_vm_id, size);
+	if (crng_ready()) {
+		crng_reseed(true);
+		pr_notice("crng reseeded due to virtual machine fork\n");
+	}
+}
+EXPORT_SYMBOL_GPL(add_vmfork_randomness);
+
 struct fast_pool {
 	union {
 		u32 pool32[4];
@@ -1534,7 +1554,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (crng_init < 2)
 			return -ENODATA;
-		crng_reseed();
+		crng_reseed(false);
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/include/linux/random.h b/include/linux/random.h
index 725a4d08c0a05..117468f3a92ee 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq) __latent_entropy;
 extern void add_hwgenerator_randomness(const void *buffer, size_t count,
 				       size_t entropy);
+extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
 
 extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
-- 
GitLab


From d273845ecb0e0626842782a4497f0c5876139ec3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <graf@amazon.com>
Date: Fri, 25 Feb 2022 16:55:52 +0100
Subject: [PATCH 1295/1586] ACPI: allow longer device IDs

We create a list of ACPI "PNP" IDs which contains _HID, _CID, and CLS
entries of the respective devices. However, when making structs for
matching, we squeeze those IDs into acpi_device_id, which only has 9
bytes space to store the identifier. The subsystem actually captures the
full length of the IDs, and the modalias has the full length, but this
struct we use for matching is limited. It originally had 16 bytes, but
was changed to only have 9 in 6543becf26ff ("mod/file2alias: make
modalias generation safe for cross compiling"), presumably on the theory
that it would match the ACPI spec so it didn't matter.

Unfortunately, while most people adhere to the ACPI specs, Microsoft
decided that its VM Generation Counter device [1] should only be
identifiable by _CID with a value of "VM_Gen_Counter", which is longer
than 9 characters.

To allow device drivers to match identifiers that exceed the 9 byte
limit, this simply ups the length to 16, just like it was before the
aforementioned commit. Empirical testing indicates that this
doesn't actually increase vmlinux size on 64-bit, because the ulong in
the same struct caused there to be 7 bytes of padding anyway, and when
doing a s/M/Y/g i386_defconfig build, the bzImage only increased by
0.0055%, so negligible.

This patch is a prerequisite to add support for VMGenID in Linux, the
subsequent patch in this series. It has been confirmed to also work on
the udev/modalias side in userspace.

[1] https://download.microsoft.com/download/3/1/C/31CFC307-98CA-4CA5-914C-D9772691E214/VirtualMachineGenerationID.docx

Signed-off-by: Alexander Graf <graf@amazon.com>
Co-developed-by: Jason A. Donenfeld <Jason@zx2c4.com>
[Jason: reworked commit message a bit, went with len=16 approach.]
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 4bb71979a8fd1..5da5d990ff58a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -211,7 +211,7 @@ struct css_device_id {
 	kernel_ulong_t driver_data;
 };
 
-#define ACPI_ID_LEN	9
+#define ACPI_ID_LEN	16
 
 struct acpi_device_id {
 	__u8 id[ACPI_ID_LEN];
-- 
GitLab


From af6b54e2b5baa54c844573b6d49cc91157bcdd7e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 23 Feb 2022 13:46:24 +0100
Subject: [PATCH 1296/1586] virt: vmgenid: notify RNG of VM fork and supply
 generation ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VM Generation ID is a feature from Microsoft, described at
<https://go.microsoft.com/fwlink/?LinkId=260709>, and supported by
Hyper-V and QEMU. Its usage is described in Microsoft's RNG whitepaper,
<https://aka.ms/win10rng>, as:

    If the OS is running in a VM, there is a problem that most
    hypervisors can snapshot the state of the machine and later rewind
    the VM state to the saved state. This results in the machine running
    a second time with the exact same RNG state, which leads to serious
    security problems.  To reduce the window of vulnerability, Windows
    10 on a Hyper-V VM will detect when the VM state is reset, retrieve
    a unique (not random) value from the hypervisor, and reseed the root
    RNG with that unique value.  This does not eliminate the
    vulnerability, but it greatly reduces the time during which the RNG
    system will produce the same outputs as it did during a previous
    instantiation of the same VM state.

Linux has the same issue, and given that vmgenid is supported already by
multiple hypervisors, we can implement more or less the same solution.
So this commit wires up the vmgenid ACPI notification to the RNG's newly
added add_vmfork_randomness() function.

It can be used from qemu via the `-device vmgenid,guid=auto` parameter.
After setting that, use `savevm` in the monitor to save the VM state,
then quit QEMU, start it again, and use `loadvm`. That will trigger this
driver's notify function, which hands the new UUID to the RNG. This is
described in <https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/vmgenid.txt>.
And there are hooks for this in libvirt as well, described in
<https://libvirt.org/formatdomain.html#general-metadata>.

Note, however, that the treatment of this as a UUID is considered to be
an accidental QEMU nuance, per
<https://github.com/libguestfs/virt-v2v/blob/master/docs/vm-generation-id-across-hypervisors.txt>,
so this driver simply treats these bytes as an opaque 128-bit binary
blob, as per the spec. This doesn't really make a difference anyway,
considering that's how it ends up when handed to the RNG in the end.

Cc: Alexander Graf <graf@amazon.com>
Cc: Adrian Catangiu <adrian@parity.io>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Tested-by: Souradeep Chakrabarti <souradch.linux@gmail.com> # With Hyper-V's virtual hardware
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 MAINTAINERS            |   1 +
 drivers/virt/Kconfig   |  11 +++++
 drivers/virt/Makefile  |   1 +
 drivers/virt/vmgenid.c | 100 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+)
 create mode 100644 drivers/virt/vmgenid.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 777cd6fa2b3d6..a10997e15146a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16211,6 +16211,7 @@ M:	Jason A. Donenfeld <Jason@zx2c4.com>
 T:	git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
 S:	Maintained
 F:	drivers/char/random.c
+F:	drivers/virt/vmgenid.c
 
 RAPIDIO SUBSYSTEM
 M:	Matt Porter <mporter@kernel.crashing.org>
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 8061e8ef449f0..121b9293c7375 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -13,6 +13,17 @@ menuconfig VIRT_DRIVERS
 
 if VIRT_DRIVERS
 
+config VMGENID
+	tristate "Virtual Machine Generation ID driver"
+	default y
+	depends on ACPI
+	help
+	  Say Y here to use the hypervisor-provided Virtual Machine Generation ID
+	  to reseed the RNG when the VM is cloned. This is highly recommended if
+	  you intend to do any rollback / cloning / snapshotting of VMs.
+
+	  Prefer Y to M so that this protection is activated very early.
+
 config FSL_HV_MANAGER
 	tristate "Freescale hypervisor management driver"
 	depends on FSL_SOC
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index 3e272ea60cd9f..108d0ffcc9aa9 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
+obj-$(CONFIG_VMGENID)		+= vmgenid.o
 obj-y				+= vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)	+= nitro_enclaves/
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
new file mode 100644
index 0000000000000..0ae1a39f2e281
--- /dev/null
+++ b/drivers/virt/vmgenid.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * The "Virtual Machine Generation ID" is exposed via ACPI and changes when a
+ * virtual machine forks or is cloned. This driver exists for shepherding that
+ * information to random.c.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/random.h>
+
+ACPI_MODULE_NAME("vmgenid");
+
+enum { VMGENID_SIZE = 16 };
+
+struct vmgenid_state {
+	u8 *next_id;
+	u8 this_id[VMGENID_SIZE];
+};
+
+static int vmgenid_add(struct acpi_device *device)
+{
+	struct acpi_buffer parsed = { ACPI_ALLOCATE_BUFFER };
+	struct vmgenid_state *state;
+	union acpi_object *obj;
+	phys_addr_t phys_addr;
+	acpi_status status;
+	int ret = 0;
+
+	state = devm_kmalloc(&device->dev, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	status = acpi_evaluate_object(device->handle, "ADDR", NULL, &parsed);
+	if (ACPI_FAILURE(status)) {
+		ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
+		return -ENODEV;
+	}
+	obj = parsed.pointer;
+	if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 2 ||
+	    obj->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	phys_addr = (obj->package.elements[0].integer.value << 0) |
+		    (obj->package.elements[1].integer.value << 32);
+	state->next_id = devm_memremap(&device->dev, phys_addr, VMGENID_SIZE, MEMREMAP_WB);
+	if (IS_ERR(state->next_id)) {
+		ret = PTR_ERR(state->next_id);
+		goto out;
+	}
+
+	memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+	add_device_randomness(state->this_id, sizeof(state->this_id));
+
+	device->driver_data = state;
+
+out:
+	ACPI_FREE(parsed.pointer);
+	return ret;
+}
+
+static void vmgenid_notify(struct acpi_device *device, u32 event)
+{
+	struct vmgenid_state *state = acpi_driver_data(device);
+	u8 old_id[VMGENID_SIZE];
+
+	memcpy(old_id, state->this_id, sizeof(old_id));
+	memcpy(state->this_id, state->next_id, sizeof(state->this_id));
+	if (!memcmp(old_id, state->this_id, sizeof(old_id)))
+		return;
+	add_vmfork_randomness(state->this_id, sizeof(state->this_id));
+}
+
+static const struct acpi_device_id vmgenid_ids[] = {
+	{ "VM_GEN_COUNTER", 0 },
+	{ }
+};
+
+static struct acpi_driver vmgenid_driver = {
+	.name = "vmgenid",
+	.ids = vmgenid_ids,
+	.owner = THIS_MODULE,
+	.ops = {
+		.add = vmgenid_add,
+		.notify = vmgenid_notify
+	}
+};
+
+module_acpi_driver(vmgenid_driver);
+
+MODULE_DEVICE_TABLE(acpi, vmgenid_ids);
+MODULE_DESCRIPTION("Virtual Machine Generation ID");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
-- 
GitLab


From a4107d34f960df99ca07fa8eb022425a804f59f3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 1 Mar 2022 15:14:04 +0100
Subject: [PATCH 1297/1586] random: do not export add_vmfork_randomness()
 unless needed

Since add_vmfork_randomness() is only called from vmgenid.o, we can
guard it in CONFIG_VMGENID, similarly to how we do with
add_disk_randomness() and CONFIG_BLOCK. If we ever have multiple things
calling into add_vmfork_randomness(), we can add another shared Kconfig
symbol for that, but for now, this is good enough. Even though
add_vmfork_randomess() is a pretty small function, removing it means
that there are only calls to crng_reseed(false) and none to
crng_reseed(true), which means the compiler can constant propagate the
false, removing branches from crng_reseed() and its descendants.

Additionally, we don't even need the symbol to be exported if
CONFIG_VMGENID is not a module, so conditionalize that too.

Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 4 ++++
 include/linux/random.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 2b323f6bd96c3..bc2a4f7e36554 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1168,6 +1168,7 @@ void add_bootloader_randomness(const void *buf, size_t size)
 }
 EXPORT_SYMBOL_GPL(add_bootloader_randomness);
 
+#if IS_ENABLED(CONFIG_VMGENID)
 /*
  * Handle a new unique VM ID, which is unique, not secret, so we
  * don't credit it, but we do immediately force a reseed after so
@@ -1181,7 +1182,10 @@ void add_vmfork_randomness(const void *unique_vm_id, size_t size)
 		pr_notice("crng reseeded due to virtual machine fork\n");
 	}
 }
+#if IS_MODULE(CONFIG_VMGENID)
 EXPORT_SYMBOL_GPL(add_vmfork_randomness);
+#endif
+#endif
 
 struct fast_pool {
 	union {
diff --git a/include/linux/random.h b/include/linux/random.h
index 117468f3a92ee..f209f1a788996 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,7 +34,9 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq) __latent_entropy;
 extern void add_hwgenerator_randomness(const void *buffer, size_t count,
 				       size_t entropy);
+#if IS_ENABLED(CONFIG_VMGENID)
 extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+#endif
 
 extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
-- 
GitLab


From 5acd35487dc911541672b3ffc322851769c32a56 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 1 Mar 2022 20:03:49 +0100
Subject: [PATCH 1298/1586] random: replace custom notifier chain with standard
 one

We previously rolled our own randomness readiness notifier, which only
has two users in the whole kernel. Replace this with a more standard
atomic notifier block that serves the same purpose with less code. Also
unexport the symbols, because no modules use it, only unconditional
builtins. The only drawback is that it's possible for a notification
handler returning the "stop" code to prevent further processing, but
given that there are only two users, and that we're unexporting this
anyway, that doesn't seem like a significant drawback for the
simplification we receive here.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 67 ++++++++++++------------------------------
 include/linux/random.h | 10 ++-----
 lib/random32.c         | 12 ++++----
 lib/vsprintf.c         | 10 ++++---
 4 files changed, 35 insertions(+), 64 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index bc2a4f7e36554..aa7bc9a3a864a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -83,8 +83,8 @@ static int crng_init = 0;
 /* Various types of waiters for crng_init->2 transition. */
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 static struct fasync_struct *fasync;
-static DEFINE_SPINLOCK(random_ready_list_lock);
-static LIST_HEAD(random_ready_list);
+static DEFINE_SPINLOCK(random_ready_chain_lock);
+static RAW_NOTIFIER_HEAD(random_ready_chain);
 
 /* Control how we warn userspace. */
 static struct ratelimit_state unseeded_warning =
@@ -144,72 +144,43 @@ EXPORT_SYMBOL(wait_for_random_bytes);
  *
  * returns: 0 if callback is successfully added
  *	    -EALREADY if pool is already initialised (callback not called)
- *	    -ENOENT if module for callback is not alive
  */
-int add_random_ready_callback(struct random_ready_callback *rdy)
+int register_random_ready_notifier(struct notifier_block *nb)
 {
-	struct module *owner;
 	unsigned long flags;
-	int err = -EALREADY;
+	int ret = -EALREADY;
 
 	if (crng_ready())
-		return err;
-
-	owner = rdy->owner;
-	if (!try_module_get(owner))
-		return -ENOENT;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (crng_ready())
-		goto out;
-
-	owner = NULL;
-
-	list_add(&rdy->list, &random_ready_list);
-	err = 0;
-
-out:
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
-
-	module_put(owner);
+		return ret;
 
-	return err;
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	if (!crng_ready())
+		ret = raw_notifier_chain_register(&random_ready_chain, nb);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+	return ret;
 }
-EXPORT_SYMBOL(add_random_ready_callback);
 
 /*
  * Delete a previously registered readiness callback function.
  */
-void del_random_ready_callback(struct random_ready_callback *rdy)
+int unregister_random_ready_notifier(struct notifier_block *nb)
 {
 	unsigned long flags;
-	struct module *owner = NULL;
-
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (!list_empty(&rdy->list)) {
-		list_del_init(&rdy->list);
-		owner = rdy->owner;
-	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+	int ret;
 
-	module_put(owner);
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	ret = raw_notifier_chain_unregister(&random_ready_chain, nb);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
+	return ret;
 }
-EXPORT_SYMBOL(del_random_ready_callback);
 
 static void process_random_ready_list(void)
 {
 	unsigned long flags;
-	struct random_ready_callback *rdy, *tmp;
 
-	spin_lock_irqsave(&random_ready_list_lock, flags);
-	list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
-		struct module *owner = rdy->owner;
-
-		list_del_init(&rdy->list);
-		rdy->func(rdy);
-		module_put(owner);
-	}
-	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+	spin_lock_irqsave(&random_ready_chain_lock, flags);
+	raw_notifier_call_chain(&random_ready_chain, 0, NULL);
+	spin_unlock_irqrestore(&random_ready_chain_lock, flags);
 }
 
 #define warn_unseeded_randomness(previous) \
diff --git a/include/linux/random.h b/include/linux/random.h
index f209f1a788996..fab1ab0563b48 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -10,11 +10,7 @@
 
 #include <uapi/linux/random.h>
 
-struct random_ready_callback {
-	struct list_head list;
-	void (*func)(struct random_ready_callback *rdy);
-	struct module *owner;
-};
+struct notifier_block;
 
 extern void add_device_randomness(const void *, size_t);
 extern void add_bootloader_randomness(const void *, size_t);
@@ -42,8 +38,8 @@ extern void get_random_bytes(void *buf, size_t nbytes);
 extern int wait_for_random_bytes(void);
 extern int __init rand_initialize(void);
 extern bool rng_is_initialized(void);
-extern int add_random_ready_callback(struct random_ready_callback *rdy);
-extern void del_random_ready_callback(struct random_ready_callback *rdy);
+extern int register_random_ready_notifier(struct notifier_block *nb);
+extern int unregister_random_ready_notifier(struct notifier_block *nb);
 extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
 
 #ifndef MODULE
diff --git a/lib/random32.c b/lib/random32.c
index 3c19820796d04..976632003ec65 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -551,9 +551,11 @@ static void prandom_reseed(struct timer_list *unused)
  * To avoid worrying about whether it's safe to delay that interrupt
  * long enough to seed all CPUs, just schedule an immediate timer event.
  */
-static void prandom_timer_start(struct random_ready_callback *unused)
+static int prandom_timer_start(struct notifier_block *nb,
+			       unsigned long action, void *data)
 {
 	mod_timer(&seed_timer, jiffies);
+	return 0;
 }
 
 #ifdef CONFIG_RANDOM32_SELFTEST
@@ -617,13 +619,13 @@ core_initcall(prandom32_state_selftest);
  */
 static int __init prandom_init_late(void)
 {
-	static struct random_ready_callback random_ready = {
-		.func = prandom_timer_start
+	static struct notifier_block random_ready = {
+		.notifier_call = prandom_timer_start
 	};
-	int ret = add_random_ready_callback(&random_ready);
+	int ret = register_random_ready_notifier(&random_ready);
 
 	if (ret == -EALREADY) {
-		prandom_timer_start(&random_ready);
+		prandom_timer_start(&random_ready, 0, NULL);
 		ret = 0;
 	}
 	return ret;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8129dd374cd..36574a806a812 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -757,14 +757,16 @@ static void enable_ptr_key_workfn(struct work_struct *work)
 
 static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
 
-static void fill_random_ptr_key(struct random_ready_callback *unused)
+static int fill_random_ptr_key(struct notifier_block *nb,
+			       unsigned long action, void *data)
 {
 	/* This may be in an interrupt handler. */
 	queue_work(system_unbound_wq, &enable_ptr_key_work);
+	return 0;
 }
 
-static struct random_ready_callback random_ready = {
-	.func = fill_random_ptr_key
+static struct notifier_block random_ready = {
+	.notifier_call = fill_random_ptr_key
 };
 
 static int __init initialize_ptr_random(void)
@@ -778,7 +780,7 @@ static int __init initialize_ptr_random(void)
 		return 0;
 	}
 
-	ret = add_random_ready_callback(&random_ready);
+	ret = register_random_ready_notifier(&random_ready);
 	if (!ret) {
 		return 0;
 	} else if (ret == -EALREADY) {
-- 
GitLab


From f3c2682bad7bc6033c837e9c66e5af881fe8d465 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 1 Mar 2022 20:22:39 +0100
Subject: [PATCH 1299/1586] random: provide notifier for VM fork

Drivers such as WireGuard need to learn when VMs fork in order to clear
sessions. This commit provides a simple notifier_block for that, with a
register and unregister function. When no VM fork detection is compiled
in, this turns into a no-op, similar to how the power notifier works.

Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c  | 15 +++++++++++++++
 include/linux/random.h |  5 +++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index aa7bc9a3a864a..d9ca441c14bd7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1140,6 +1140,8 @@ void add_bootloader_randomness(const void *buf, size_t size)
 EXPORT_SYMBOL_GPL(add_bootloader_randomness);
 
 #if IS_ENABLED(CONFIG_VMGENID)
+static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
+
 /*
  * Handle a new unique VM ID, which is unique, not secret, so we
  * don't credit it, but we do immediately force a reseed after so
@@ -1152,10 +1154,23 @@ void add_vmfork_randomness(const void *unique_vm_id, size_t size)
 		crng_reseed(true);
 		pr_notice("crng reseeded due to virtual machine fork\n");
 	}
+	blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
 }
 #if IS_MODULE(CONFIG_VMGENID)
 EXPORT_SYMBOL_GPL(add_vmfork_randomness);
 #endif
+
+int register_random_vmfork_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&vmfork_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);
+
+int unregister_random_vmfork_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&vmfork_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
 #endif
 
 struct fast_pool {
diff --git a/include/linux/random.h b/include/linux/random.h
index fab1ab0563b48..c0baffe7afb1d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -32,6 +32,11 @@ extern void add_hwgenerator_randomness(const void *buffer, size_t count,
 				       size_t entropy);
 #if IS_ENABLED(CONFIG_VMGENID)
 extern void add_vmfork_randomness(const void *unique_vm_id, size_t size);
+extern int register_random_vmfork_notifier(struct notifier_block *nb);
+extern int unregister_random_vmfork_notifier(struct notifier_block *nb);
+#else
+static inline int register_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
+static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { return 0; }
 #endif
 
 extern void get_random_bytes(void *buf, size_t nbytes);
-- 
GitLab


From 2d6919c3205b141ba85fb733b2a67937ff85dc7f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 1 Mar 2022 23:26:55 +0100
Subject: [PATCH 1300/1586] wireguard: device: clear keys on VM fork

When a virtual machine forks, it's important that WireGuard clear
existing sessions so that different plaintexts are not transmitted using
the same key+nonce, which can result in catastrophic cryptographic
failure. To accomplish this, we simply hook into the newly added vmfork
notifier.

As a bonus, it turns out that, like the vmfork registration function,
the PM registration function is stubbed out when CONFIG_PM_SLEEP is not
set, so we can actually just remove the maze of ifdefs, which makes it
really quite clean to support both notifiers at once.

Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/net/wireguard/device.c | 38 ++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index a46067c38bf5d..0fad1331303c0 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -59,9 +59,7 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
-			      void *data)
+static int wg_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct wg_device *wg;
 	struct wg_peer *peer;
@@ -92,7 +90,24 @@ static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
 }
 
 static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
-#endif
+
+static int wg_vm_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct wg_device *wg;
+	struct wg_peer *peer;
+
+	rtnl_lock();
+	list_for_each_entry(wg, &device_list, device_list) {
+		mutex_lock(&wg->device_update_lock);
+		list_for_each_entry(peer, &wg->peer_list, peer_list)
+			wg_noise_expire_current_peer_keypairs(peer);
+		mutex_unlock(&wg->device_update_lock);
+	}
+	rtnl_unlock();
+	return 0;
+}
+
+static struct notifier_block vm_notifier = { .notifier_call = wg_vm_notification };
 
 static int wg_stop(struct net_device *dev)
 {
@@ -424,16 +439,18 @@ int __init wg_device_init(void)
 {
 	int ret;
 
-#ifdef CONFIG_PM_SLEEP
 	ret = register_pm_notifier(&pm_notifier);
 	if (ret)
 		return ret;
-#endif
 
-	ret = register_pernet_device(&pernet_ops);
+	ret = register_random_vmfork_notifier(&vm_notifier);
 	if (ret)
 		goto error_pm;
 
+	ret = register_pernet_device(&pernet_ops);
+	if (ret)
+		goto error_vm;
+
 	ret = rtnl_link_register(&link_ops);
 	if (ret)
 		goto error_pernet;
@@ -442,10 +459,10 @@ int __init wg_device_init(void)
 
 error_pernet:
 	unregister_pernet_device(&pernet_ops);
+error_vm:
+	unregister_random_vmfork_notifier(&vm_notifier);
 error_pm:
-#ifdef CONFIG_PM_SLEEP
 	unregister_pm_notifier(&pm_notifier);
-#endif
 	return ret;
 }
 
@@ -453,8 +470,7 @@ void wg_device_uninit(void)
 {
 	rtnl_link_unregister(&link_ops);
 	unregister_pernet_device(&pernet_ops);
-#ifdef CONFIG_PM_SLEEP
+	unregister_random_vmfork_notifier(&vm_notifier);
 	unregister_pm_notifier(&pm_notifier);
-#endif
 	rcu_barrier();
 }
-- 
GitLab


From f5eab0e2db4f881fb2b62b3fdad5b9be673dd7ae Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 11 Feb 2022 14:58:44 +0100
Subject: [PATCH 1301/1586] random: use SipHash as interrupt entropy
 accumulator

The current fast_mix() function is a piece of classic mailing list
crypto, where it just sort of sprung up by an anonymous author without a
lot of real analysis of what precisely it was accomplishing. As an ARX
permutation alone, there are some easily searchable differential trails
in it, and as a means of preventing malicious interrupts, it completely
fails, since it xors new data into the entire state every time. It can't
really be analyzed as a random permutation, because it clearly isn't,
and it can't be analyzed as an interesting linear algebraic structure
either, because it's also not that. There really is very little one can
say about it in terms of entropy accumulation. It might diffuse bits,
some of the time, maybe, we hope, I guess. But for the most part, it
fails to accomplish anything concrete.

As a reminder, the simple goal of add_interrupt_randomness() is to
simply accumulate entropy until ~64 interrupts have elapsed, and then
dump it into the main input pool, which uses a cryptographic hash.

It would be nice to have something cryptographically strong in the
interrupt handler itself, in case a malicious interrupt compromises a
per-cpu fast pool within the 64 interrupts / 1 second window, and then
inside of that same window somehow can control its return address and
cycle counter, even if that's a bit far fetched. However, with a very
CPU-limited budget, actually doing that remains an active research
project (and perhaps there'll be something useful for Linux to come out
of it). And while the abundance of caution would be nice, this isn't
*currently* the security model, and we don't yet have a fast enough
solution to make it our security model. Plus there's not exactly a
pressing need to do that. (And for the avoidance of doubt, the actual
cluster of 64 accumulated interrupts still gets dumped into our
cryptographically secure input pool.)

So, for now we are going to stick with the existing interrupt security
model, which assumes that each cluster of 64 interrupt data samples is
mostly non-malicious and not colluding with an infoleaker. With this as
our goal, we have a few more choices, simply aiming to accumulate
entropy, while discarding the least amount of it.

We know from <https://eprint.iacr.org/2019/198> that random oracles,
instantiated as computational hash functions, make good entropy
accumulators and extractors, which is the justification for using
BLAKE2s in the main input pool. As mentioned, we don't have that luxury
here, but we also don't have the same security model requirements,
because we're assuming that there aren't malicious inputs. A
pseudorandom function instance can approximately behave like a random
oracle, provided that the key is uniformly random. But since we're not
concerned with malicious inputs, we can pick a fixed key, which is not
secret, knowing that "nature" won't interact with a sufficiently chosen
fixed key by accident. So we pick a PRF with a fixed initial key, and
accumulate into it continuously, dumping the result every 64 interrupts
into our cryptographically secure input pool.

For this, we make use of SipHash-1-x on 64-bit and HalfSipHash-1-x on
32-bit, which are already in use in the kernel's hsiphash family of
functions and achieve the same performance as the function they replace.
It would be nice to do two rounds, but we don't exactly have the CPU
budget handy for that, and one round alone is already sufficient.

As mentioned, we start with a fixed initial key (zeros is fine), and
allow SipHash's symmetry breaking constants to turn that into a useful
starting point. Also, since we're dumping the result (or half of it on
64-bit so as to tax our hash function the same amount on all platforms)
into the cryptographically secure input pool, there's no point in
finalizing SipHash's output, since it'll wind up being finalized by
something much stronger. This means that all we need to do is use the
ordinary round function word-by-word, as normal SipHash does.
Simplified, the flow is as follows:

Initialize:

    siphash_state_t state;
    siphash_init(&state, key={0, 0, 0, 0});

Update (accumulate) on interrupt:

    siphash_update(&state, interrupt_data_and_timing);

Dump into input pool after 64 interrupts:

    blake2s_update(&input_pool, &state, sizeof(state) / 2);

The result of all of this is that the security model is unchanged from
before -- we assume non-malicious inputs -- yet we now implement that
model with a stronger argument. I would like to emphasize, again, that
the purpose of this commit is to improve the existing design, by making
it analyzable, without changing any fundamental assumptions. There may
well be value down the road in changing up the existing design, using
something cryptographically strong, or simply using a ring buffer of
samples rather than having a fast_mix() at all, or changing which and
how much data we collect each interrupt so that we can use something
linear, or a variety of other ideas. This commit does not invalidate the
potential for those in the future.

For example, in the future, if we're able to characterize the data we're
collecting on each interrupt, we may be able to inch toward information
theoretic accumulators. <https://eprint.iacr.org/2021/523> shows that `s
= ror32(s, 7) ^ x` and `s = ror64(s, 19) ^ x` make very good
accumulators for 2-monotone distributions, which would apply to
timestamp counters, like random_get_entropy() or jiffies, but would not
apply to our current combination of the two values, or to the various
function addresses and register values we mix in. Alternatively,
<https://eprint.iacr.org/2021/1002> shows that max-period linear
functions with no non-trivial invariant subspace make good extractors,
used in the form `s = f(s) ^ x`. However, this only works if the input
data is both identical and independent, and obviously a collection of
address values and counters fails; so it goes with theoretical papers.
Future directions here may involve trying to characterize more precisely
what we actually need to collect in the interrupt handler, and building
something specific around that.

However, as mentioned, the morass of data we're gathering at the
interrupt handler presently defies characterization, and so we use
SipHash for now, which works well and performs well.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 94 +++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 39 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d9ca441c14bd7..50973a074d2d5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1174,48 +1174,51 @@ EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
 #endif
 
 struct fast_pool {
-	union {
-		u32 pool32[4];
-		u64 pool64[2];
-	};
 	struct work_struct mix;
+	unsigned long pool[4];
 	unsigned long last;
 	unsigned int count;
 	u16 reg_idx;
 };
 
+static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
+#ifdef CONFIG_64BIT
+	/* SipHash constants */
+	.pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL,
+		  0x6c7967656e657261UL, 0x7465646279746573UL }
+#else
+	/* HalfSipHash constants */
+	.pool = { 0, 0, 0x6c796765U, 0x74656462U }
+#endif
+};
+
 /*
- * This is a fast mixing routine used by the interrupt randomness
- * collector. It's hardcoded for an 128 bit pool and assumes that any
- * locks that might be needed are taken by the caller.
+ * This is [Half]SipHash-1-x, starting from an empty key. Because
+ * the key is fixed, it assumes that its inputs are non-malicious,
+ * and therefore this has no security on its own. s represents the
+ * 128 or 256-bit SipHash state, while v represents a 128-bit input.
  */
-static void fast_mix(u32 pool[4])
+static void fast_mix(unsigned long s[4], const unsigned long *v)
 {
-	u32 a = pool[0],	b = pool[1];
-	u32 c = pool[2],	d = pool[3];
-
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 6);	d = rol32(d, 27);
-	d ^= a;			b ^= c;
-
-	a += b;			c += d;
-	b = rol32(b, 16);	d = rol32(d, 14);
-	d ^= a;			b ^= c;
+	size_t i;
 
-	pool[0] = a;  pool[1] = b;
-	pool[2] = c;  pool[3] = d;
+	for (i = 0; i < 16 / sizeof(long); ++i) {
+		s[3] ^= v[i];
+#ifdef CONFIG_64BIT
+		s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32);
+		s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2];
+		s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0];
+		s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32);
+#else
+		s[0] += s[1]; s[1] = rol32(s[1],  5); s[1] ^= s[0]; s[0] = rol32(s[0], 16);
+		s[2] += s[3]; s[3] = rol32(s[3],  8); s[3] ^= s[2];
+		s[0] += s[3]; s[3] = rol32(s[3],  7); s[3] ^= s[0];
+		s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16);
+#endif
+		s[0] ^= v[i];
+	}
 }
 
-static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
-
 #ifdef CONFIG_SMP
 /*
  * This function is called when the CPU has just come online, with
@@ -1257,7 +1260,15 @@ static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs)
 static void mix_interrupt_randomness(struct work_struct *work)
 {
 	struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
-	u32 pool[4];
+	/*
+	 * The size of the copied stack pool is explicitly 16 bytes so that we
+	 * tax mix_pool_byte()'s compression function the same amount on all
+	 * platforms. This means on 64-bit we copy half the pool into this,
+	 * while on 32-bit we copy all of it. The entropy is supposed to be
+	 * sufficiently dispersed between bits that in the sponge-like
+	 * half case, on average we don't wind up "losing" some.
+	 */
+	u8 pool[16];
 
 	/* Check to see if we're running on the wrong CPU due to hotplug. */
 	local_irq_disable();
@@ -1270,7 +1281,7 @@ static void mix_interrupt_randomness(struct work_struct *work)
 	 * Copy the pool to the stack so that the mixer always has a
 	 * consistent view, before we reenable irqs again.
 	 */
-	memcpy(pool, fast_pool->pool32, sizeof(pool));
+	memcpy(pool, fast_pool->pool, sizeof(pool));
 	fast_pool->count = 0;
 	fast_pool->last = jiffies;
 	local_irq_enable();
@@ -1294,25 +1305,30 @@ void add_interrupt_randomness(int irq)
 	struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
 	struct pt_regs *regs = get_irq_regs();
 	unsigned int new_count;
+	union {
+		u32 u32[4];
+		u64 u64[2];
+		unsigned long longs[16 / sizeof(long)];
+	} irq_data;
 
 	if (cycles == 0)
 		cycles = get_reg(fast_pool, regs);
 
 	if (sizeof(cycles) == 8)
-		fast_pool->pool64[0] ^= cycles ^ rol64(now, 32) ^ irq;
+		irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq;
 	else {
-		fast_pool->pool32[0] ^= cycles ^ irq;
-		fast_pool->pool32[1] ^= now;
+		irq_data.u32[0] = cycles ^ irq;
+		irq_data.u32[1] = now;
 	}
 
 	if (sizeof(unsigned long) == 8)
-		fast_pool->pool64[1] ^= regs ? instruction_pointer(regs) : _RET_IP_;
+		irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_;
 	else {
-		fast_pool->pool32[2] ^= regs ? instruction_pointer(regs) : _RET_IP_;
-		fast_pool->pool32[3] ^= get_reg(fast_pool, regs);
+		irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_;
+		irq_data.u32[3] = get_reg(fast_pool, regs);
 	}
 
-	fast_mix(fast_pool->pool32);
+	fast_mix(fast_pool->pool, irq_data.longs);
 	new_count = ++fast_pool->count;
 
 	if (new_count & MIX_INFLIGHT)
-- 
GitLab


From a96cfe2d427064325ecbf56df8816c6b871ec285 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Mar 2022 11:20:17 -0700
Subject: [PATCH 1302/1586] random: make consistent usage of crng_ready()

Rather than sometimes checking `crng_init < 2`, we should always use the
crng_ready() macro, so that should we change anything later, it's
consistent. Additionally, that macro already has a likely() around it,
which means we don't need to open code our own likely() and unlikely()
annotations.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 50973a074d2d5..19a602c69f2fc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -122,18 +122,13 @@ static void try_to_generate_entropy(void);
  */
 int wait_for_random_bytes(void)
 {
-	if (likely(crng_ready()))
-		return 0;
-
-	do {
+	while (!crng_ready()) {
 		int ret;
 		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
 		if (ret)
 			return ret > 0 ? 0 : ret;
-
 		try_to_generate_entropy();
-	} while (!crng_ready());
-
+	}
 	return 0;
 }
 EXPORT_SYMBOL(wait_for_random_bytes);
@@ -288,7 +283,7 @@ static void crng_reseed(bool force)
 		++next_gen;
 	WRITE_ONCE(base_crng.generation, next_gen);
 	WRITE_ONCE(base_crng.birth, jiffies);
-	if (crng_init < 2) {
+	if (!crng_ready()) {
 		crng_init = 2;
 		finalize_init = true;
 	}
@@ -351,7 +346,7 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	 * ready, we do fast key erasure with the base_crng directly, because
 	 * this is what crng_pre_init_inject() mutates during early init.
 	 */
-	if (unlikely(!crng_ready())) {
+	if (!crng_ready()) {
 		bool ready;
 
 		spin_lock_irqsave(&base_crng.lock, flags);
@@ -794,7 +789,7 @@ static void credit_entropy_bits(size_t nbits)
 		entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
 	} while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
 
-	if (crng_init < 2 && entropy_count >= POOL_MIN_BITS)
+	if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
 		crng_reseed(false);
 }
 
@@ -956,7 +951,7 @@ int __init rand_initialize(void)
 	extract_entropy(base_crng.key, sizeof(base_crng.key));
 	++base_crng.generation;
 
-	if (arch_init && trust_cpu && crng_init < 2) {
+	if (arch_init && trust_cpu && !crng_ready()) {
 		crng_init = 2;
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
@@ -1558,7 +1553,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 	case RNDRESEEDCRNG:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		if (crng_init < 2)
+		if (!crng_ready())
 			return -ENODATA;
 		crng_reseed(false);
 		return 0;
-- 
GitLab


From 27b38686a3bb601db48901dbc4e2fc5d77ffa2c1 Mon Sep 17 00:00:00 2001
From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Date: Tue, 8 Mar 2022 15:22:00 +0530
Subject: [PATCH 1303/1586] ext4: make mb_optimize_scan option work with
 set/unset mount cmd

After moving to the new mount API, mb_optimize_scan mount option
handling was not working as expected due to the parsed value always
being overwritten by default. Refactor and fix this to the expected
behavior described below:

*  mb_optimize_scan=1 - On
*  mb_optimize_scan=0 - Off
*  mb_optimize_scan not passed - On if no. of BGs > threshold else off
*  Remounts retain previous value unless we explicitly pass the option
   with a new value

Fixes: cebe85d570cf ("ext4: switch to the new mount api")
Cc: stable@kernel.org
Reported-by: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/c98970fe99f26718586d02e942f293300fb48ef3.1646732698.git.ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1e5f4994fe576..4120a743be521 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2021,12 +2021,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
 #define EXT4_SPEC_s_commit_interval		(1 << 16)
 #define EXT4_SPEC_s_fc_debug_max_replay		(1 << 17)
 #define EXT4_SPEC_s_sb_block			(1 << 18)
+#define EXT4_SPEC_mb_optimize_scan		(1 << 19)
 
 struct ext4_fs_context {
 	char		*s_qf_names[EXT4_MAXQUOTAS];
 	char		*test_dummy_enc_arg;
 	int		s_jquota_fmt;	/* Format of quota to use */
-	int		mb_optimize_scan;
 #ifdef CONFIG_EXT4_DEBUG
 	int s_fc_debug_max_replay;
 #endif
@@ -2464,12 +2464,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 			ctx_clear_mount_opt(ctx, m->mount_opt);
 		return 0;
 	case Opt_mb_optimize_scan:
-		if (result.int_32 != 0 && result.int_32 != 1) {
+		if (result.int_32 == 1) {
+			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+		} else if (result.int_32 == 0) {
+			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+		} else {
 			ext4_msg(NULL, KERN_WARNING,
 				 "mb_optimize_scan should be set to 0 or 1.");
 			return -EINVAL;
 		}
-		ctx->mb_optimize_scan = result.int_32;
 		return 0;
 	}
 
@@ -4398,7 +4403,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 
 	/* Set defaults for the variables that will be set during parsing */
 	ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
-	ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
 
 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
 	sbi->s_sectors_written_start =
@@ -5349,12 +5353,12 @@ no_journal:
 	 * turned off by passing "mb_optimize_scan=0". This can also be
 	 * turned on forcefully by passing "mb_optimize_scan=1".
 	 */
-	if (ctx->mb_optimize_scan == 1)
-		set_opt2(sb, MB_OPTIMIZE_SCAN);
-	else if (ctx->mb_optimize_scan == 0)
-		clear_opt2(sb, MB_OPTIMIZE_SCAN);
-	else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
-		set_opt2(sb, MB_OPTIMIZE_SCAN);
+	if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
+		if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
+			set_opt2(sb, MB_OPTIMIZE_SCAN);
+		else
+			clear_opt2(sb, MB_OPTIMIZE_SCAN);
+	}
 
 	err = ext4_mb_init(sb);
 	if (err) {
-- 
GitLab


From 077d0c2c78df6f7260cdd015a991327efa44d8ad Mon Sep 17 00:00:00 2001
From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Date: Tue, 8 Mar 2022 15:22:01 +0530
Subject: [PATCH 1304/1586] ext4: make mb_optimize_scan performance mount
 option work with extents

Currently mb_optimize_scan scan feature which improves filesystem
performance heavily (when FS is fragmented), seems to be not working
with files with extents (ext4 by default has files with extents).

This patch fixes that and makes mb_optimize_scan feature work
for files with extents.

Below are some performance numbers obtained when allocating a 10M and 100M
file with and w/o this patch on a filesytem with no 1M contiguous block.

<perf numbers>
===============
Workload: dd if=/dev/urandom of=test conv=fsync bs=1M count=10/100

Time taken
=====================================================
no.     Size   without-patch     with-patch    Diff(%)
1       10M      0m8.401s         0m5.623s     33.06%
2       100M     1m40.465s        1m14.737s    25.6%

<debug stats>
=============
w/o patch:
  mballoc:
    reqs: 17056
    success: 11407
    groups_scanned: 13643
    cr0_stats:
            hits: 37
            groups_considered: 9472
            useless_loops: 36
            bad_suggestions: 0
    cr1_stats:
            hits: 11418
            groups_considered: 908560
            useless_loops: 1894
            bad_suggestions: 0
    cr2_stats:
            hits: 1873
            groups_considered: 6913
            useless_loops: 21
    cr3_stats:
            hits: 21
            groups_considered: 5040
            useless_loops: 21
    extents_scanned: 417364
            goal_hits: 3707
            2^n_hits: 37
            breaks: 1873
            lost: 0
    buddies_generated: 239/240
    buddies_time_used: 651080
    preallocated: 705
    discarded: 478

with patch:
  mballoc:
    reqs: 12768
    success: 11305
    groups_scanned: 12768
    cr0_stats:
            hits: 1
            groups_considered: 18
            useless_loops: 0
            bad_suggestions: 0
    cr1_stats:
            hits: 5829
            groups_considered: 50626
            useless_loops: 0
            bad_suggestions: 0
    cr2_stats:
            hits: 6938
            groups_considered: 580363
            useless_loops: 0
    cr3_stats:
            hits: 0
            groups_considered: 0
            useless_loops: 0
    extents_scanned: 309059
            goal_hits: 0
            2^n_hits: 1
            breaks: 1463
            lost: 0
    buddies_generated: 239/240
    buddies_time_used: 791392
    preallocated: 673
    discarded: 446

Fixes: 196e402 (ext4: improve cr 0 / cr 1 group scanning)
Cc: stable@kernel.org
Reported-by: Geetika Moolchandani <Geetika.Moolchandani1@ibm.com>
Reported-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Suggested-by: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Link: https://lore.kernel.org/r/fc9a48f7f8dcfc83891a8b21f6dd8cdf056ed810.1646732698.git.ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3546fa27d0034..252c168454c7f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
 		return 0;
 	if (ac->ac_criteria >= 2)
 		return 0;
-	if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
+	if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
 		return 0;
 	return 1;
 }
-- 
GitLab


From 688b0d8536e0e937f608a93cb6909e14389a0c45 Mon Sep 17 00:00:00 2001
From: lianzhi chang <changlianzhi@uniontech.com>
Date: Thu, 10 Mar 2022 09:44:15 +0800
Subject: [PATCH 1305/1586] doc: fixed a typo in ext4 documentation

The unit of file system size should be TiB, not PiB

Signed-off-by: lianzhi chang <changlianzhi@uniontech.com>
Link: https://lore.kernel.org/r/20220310014415.29937-1-changlianzhi@uniontech.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 Documentation/filesystems/ext4/blocks.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/ext4/blocks.rst b/Documentation/filesystems/ext4/blocks.rst
index bd722ecd92d6f..b0f80ea87c90e 100644
--- a/Documentation/filesystems/ext4/blocks.rst
+++ b/Documentation/filesystems/ext4/blocks.rst
@@ -39,7 +39,7 @@ For 32-bit filesystems, limits are as follows:
      - 4TiB
      - 8TiB
      - 16TiB
-     - 256PiB
+     - 256TiB
    * - Blocks Per Block Group
      - 8,192
      - 16,384
-- 
GitLab


From 2bb8dd401a4f96973d6a9de4218d7f01fbd497ee Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 10 Mar 2022 11:18:32 +0100
Subject: [PATCH 1306/1586] ext4: warn when dirtying page w/o buffers in
 data=journal mode

Recently I've got a report of BUG_ON trigerring during transaction
commit in ext4_journalled_writepage_callback() because we've spotted a
dirty page without buffers. Add WARN_ON_ONCE to
ext4_journalled_set_page_dirty() to catch the problematic condition
earlier where we have better chance of understanding which code path is
creating dirty data without preparing the page properly. Also update the
comment with current information when we are at it.

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220310101832.5645-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 531a94f48637c..26218088f63b3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3566,10 +3566,11 @@ const struct iomap_ops ext4_iomap_report_ops = {
 };
 
 /*
- * Pages can be marked dirty completely asynchronously from ext4's journalling
- * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
- * much here because ->set_page_dirty is called under VFS locks.  The page is
- * not necessarily locked.
+ * Whenever the page is being dirtied, corresponding buffers should already be
+ * attached to the transaction (we take care of this in ext4_page_mkwrite() and
+ * ext4_write_begin()). However we cannot move buffers to dirty transaction
+ * lists here because ->set_page_dirty is called under VFS locks and the page
+ * is not necessarily locked.
  *
  * We cannot just dirty the page and leave attached buffers clean, because the
  * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
@@ -3580,6 +3581,7 @@ const struct iomap_ops ext4_iomap_report_ops = {
  */
 static int ext4_journalled_set_page_dirty(struct page *page)
 {
+	WARN_ON_ONCE(!page_has_buffers(page));
 	SetPageChecked(page);
 	return __set_page_dirty_nobuffers(page);
 }
-- 
GitLab


From c864ccd182d6ff2730a0f5b636c6b7c48f6f4f7f Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:46 +0530
Subject: [PATCH 1307/1586] ext4: remove unused enum EXT4_FC_COMMIT_FAILED

Below commit removed all references of EXT4_FC_COMMIT_FAILED.
commit 0915e464cb274 ("ext4: simplify updating of fast commit stats")

Just remove it since it is not used anymore.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/c941357e476be07a1138c7319ca5faab7fb80fc6.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index 02afa52e8e41b..80414dcba6e1b 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -93,7 +93,6 @@ enum {
 	EXT4_FC_REASON_RENAME_DIR,
 	EXT4_FC_REASON_FALLOC_RANGE,
 	EXT4_FC_REASON_INODE_JOURNAL_DATA,
-	EXT4_FC_COMMIT_FAILED,
 	EXT4_FC_REASON_MAX
 };
 
-- 
GitLab


From 7af1974af0a9ba8a8ed2e3e947d87dd4d9a78d27 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:47 +0530
Subject: [PATCH 1308/1586] ext4: fix ext4_fc_stats trace point

ftrace's __print_symbolic() requires that any enum values used in the
symbol to string translation table be wrapped in a TRACE_DEFINE_ENUM
so that the enum value can be decoded from the ftrace ring buffer by
user space tooling.

This patch also fixes few other problems found in this trace point.
e.g. dereferencing structures in TP_printk which should not be done
at any cost.

Also to avoid checkpatch warnings, this patch removes those
whitespaces/tab stops issues.

Cc: stable@kernel.org
Fixes: aa75f4d3daae ("ext4: main fast-commit commit path")
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/b4b9691414c35c62e570b723e661c80674169f9a.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 78 +++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 19e957b7f9410..1a0b7030f72a3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
 	{ FALLOC_FL_COLLAPSE_RANGE,	"COLLAPSE_RANGE"},	\
 	{ FALLOC_FL_ZERO_RANGE,		"ZERO_RANGE"})
 
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA);
+TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
+
 #define show_fc_reason(reason)						\
 	__print_symbolic(reason,					\
 		{ EXT4_FC_REASON_XATTR,		"XATTR"},		\
@@ -2723,41 +2734,50 @@ TRACE_EVENT(ext4_fc_commit_stop,
 
 #define FC_REASON_NAME_STAT(reason)					\
 	show_fc_reason(reason),						\
-	__entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason]
+	__entry->fc_ineligible_rc[reason]
 
 TRACE_EVENT(ext4_fc_stats,
-	    TP_PROTO(struct super_block *sb),
-
-	    TP_ARGS(sb),
+	TP_PROTO(struct super_block *sb),
 
-	    TP_STRUCT__entry(
-		    __field(dev_t, dev)
-		    __field(struct ext4_sb_info *, sbi)
-		    __field(int, count)
-		    ),
+	TP_ARGS(sb),
 
-	    TP_fast_assign(
-		    __entry->dev = sb->s_dev;
-		    __entry->sbi = EXT4_SB(sb);
-		    ),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX)
+		__field(unsigned long, fc_commits)
+		__field(unsigned long, fc_ineligible_commits)
+		__field(unsigned long, fc_numblks)
+	),
 
-	    TP_printk("dev %d:%d fc ineligible reasons:\n"
-		      "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
-		      "num_commits:%ld, ineligible: %ld, numblks: %ld",
-		      MAJOR(__entry->dev), MINOR(__entry->dev),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
-		      FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
-		      __entry->sbi->s_fc_stats.fc_num_commits,
-		      __entry->sbi->s_fc_stats.fc_ineligible_commits,
-		      __entry->sbi->s_fc_stats.fc_numblks)
+	TP_fast_assign(
+		int i;
 
+		__entry->dev = sb->s_dev;
+		for (i = 0; i < EXT4_FC_REASON_MAX; i++) {
+			__entry->fc_ineligible_rc[i] =
+				EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i];
+		}
+		__entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits;
+		__entry->fc_ineligible_commits =
+			EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
+		__entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks;
+	),
+
+	TP_printk("dev %d,%d fc ineligible reasons:\n"
+		  "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u "
+		  "num_commits:%lu, ineligible: %lu, numblks: %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+		  FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
+		  __entry->fc_commits, __entry->fc_ineligible_commits,
+		  __entry->fc_numblks)
 );
 
 #define DEFINE_TRACE_DENTRY_EVENT(__type)				\
-- 
GitLab


From 8cb5a30372ef5cf2b1d258fce1711d80f834740a Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:48 +0530
Subject: [PATCH 1309/1586] ext4: convert ext4_fc_track_dentry type events to
 use event class

One should use DECLARE_EVENT_CLASS for similar event types instead of
defining TRACE_EVENT for each event type. This is helpful in reducing
the text section footprint for e.g. [1]

[1]: https://lwn.net/Articles/381064/

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/a019cb46219ef4b30e4d98d7ced7d8819a2fc61d.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 56 ++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 1a0b7030f72a3..3d48fcb629876 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2780,33 +2780,39 @@ TRACE_EVENT(ext4_fc_stats,
 		  __entry->fc_numblks)
 );
 
-#define DEFINE_TRACE_DENTRY_EVENT(__type)				\
-	TRACE_EVENT(ext4_fc_track_##__type,				\
-	    TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \
-									\
-	    TP_ARGS(inode, dentry, ret),				\
-									\
-	    TP_STRUCT__entry(						\
-		    __field(dev_t, dev)					\
-		    __field(int, ino)					\
-		    __field(int, error)					\
-		    ),							\
-									\
-	    TP_fast_assign(						\
-		    __entry->dev = inode->i_sb->s_dev;			\
-		    __entry->ino = inode->i_ino;			\
-		    __entry->error = ret;				\
-		    ),							\
-									\
-	    TP_printk("dev %d:%d, inode %d, error %d, fc_%s",		\
-		      MAJOR(__entry->dev), MINOR(__entry->dev),		\
-		      __entry->ino, __entry->error,			\
-		      #__type)						\
+DECLARE_EVENT_CLASS(ext4_fc_track_dentry,
+
+	TP_PROTO(struct inode *inode, struct dentry *dentry, int ret),
+
+	TP_ARGS(inode, dentry, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(ino_t, i_ino)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->i_ino = inode->i_ino;
+		__entry->error = ret;
+	),
+
+	TP_printk("dev %d,%d, ino %lu, error %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->i_ino, __entry->error
 	)
+);
+
+#define DEFINE_EVENT_CLASS_DENTRY(__type)				\
+DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type,		\
+	TP_PROTO(struct inode *inode, struct dentry *dentry, int ret),	\
+	TP_ARGS(inode, dentry, ret)					\
+)
 
-DEFINE_TRACE_DENTRY_EVENT(create);
-DEFINE_TRACE_DENTRY_EVENT(link);
-DEFINE_TRACE_DENTRY_EVENT(unlink);
+DEFINE_EVENT_CLASS_DENTRY(create);
+DEFINE_EVENT_CLASS_DENTRY(link);
+DEFINE_EVENT_CLASS_DENTRY(unlink);
 
 TRACE_EVENT(ext4_fc_track_inode,
 	    TP_PROTO(struct inode *inode, int ret),
-- 
GitLab


From 7f142440847480838e0c4b3092f24455cec111a7 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:49 +0530
Subject: [PATCH 1310/1586] ext4: do not call FC trace event in
 ext4_fc_commit() if FS does not support FC

This just puts trace_ext4_fc_commit_start(sb) & ktime_get()
for measuring FC commit time, after the check of whether sb
supports JOURNAL_FAST_COMMIT or not.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/d53cf3e535924ec0a1eb41a560e96561b0727e7a.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 5ac594e034024..55d33f296caed 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1165,13 +1165,13 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
 	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
 	ktime_t start_time, commit_time;
 
+	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
+		return jbd2_complete_transaction(journal, commit_tid);
+
 	trace_ext4_fc_commit_start(sb);
 
 	start_time = ktime_get();
 
-	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
-		return jbd2_complete_transaction(journal, commit_tid);
-
 restart_fc:
 	ret = jbd2_fc_begin_commit(journal, commit_tid);
 	if (ret == -EALREADY) {
-- 
GitLab


From 7a7ff644aeaf071d433caffb3b8ea57354b55bd3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Mar 2022 23:32:34 -0700
Subject: [PATCH 1311/1586] random: reseed more often immediately after booting

In order to chip away at the "premature first" problem, we augment our
existing entropy accounting with more frequent reseedings at boot.

The idea is that at boot, we're getting entropy from various places, and
we're not very sure which of early boot entropy is good and which isn't.
Even when we're crediting the entropy, we're still not totally certain
that it's any good. Since boot is the one time (aside from a compromise)
that we have zero entropy, it's important that we shepherd entropy into
the crng fairly often.

At the same time, we don't want a "premature next" problem, whereby an
attacker can brute force individual bits of added entropy. In lieu of
going full-on Fortuna (for now), we can pick a simpler strategy of just
reseeding more often during the first 5 minutes after boot. This is
still bounded by the 256-bit entropy credit requirement, so we'll skip a
reseeding if we haven't reached that, but in case entropy /is/ coming
in, this ensures that it makes its way into the crng rather rapidly
during these early stages.

Ordinarily we reseed if the previous reseeding is 300 seconds old. This
commit changes things so that for the first 600 seconds of boot time, we
reseed if the previous reseeding is uptime / 2 seconds old. That means
that we'll reseed at the very least double the uptime of the previous
reseeding.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 19a602c69f2fc..defdba110d1df 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -327,6 +327,28 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
 	memzero_explicit(first_block, sizeof(first_block));
 }
 
+/*
+ * Return whether the crng seed is considered to be sufficiently
+ * old that a reseeding might be attempted. This happens if the last
+ * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at
+ * an interval proportional to the uptime.
+ */
+static bool crng_has_old_seed(void)
+{
+	static bool early_boot = true;
+	unsigned long interval = CRNG_RESEED_INTERVAL;
+
+	if (unlikely(READ_ONCE(early_boot))) {
+		time64_t uptime = ktime_get_seconds();
+		if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
+			WRITE_ONCE(early_boot, false);
+		else
+			interval = max_t(unsigned int, 5 * HZ,
+					 (unsigned int)uptime / 2 * HZ);
+	}
+	return time_after(jiffies, READ_ONCE(base_crng.birth) + interval);
+}
+
 /*
  * This function returns a ChaCha state that you may use for generating
  * random data. It also returns up to 32 bytes on its own of random data
@@ -360,10 +382,10 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
 	}
 
 	/*
-	 * If the base_crng is more than 5 minutes old, we reseed, which
-	 * in turn bumps the generation counter that we check below.
+	 * If the base_crng is old enough, we try to reseed, which in turn
+	 * bumps the generation counter that we check below.
 	 */
-	if (unlikely(time_after(jiffies, READ_ONCE(base_crng.birth) + CRNG_RESEED_INTERVAL)))
+	if (unlikely(crng_has_old_seed()))
 		crng_reseed(false);
 
 	local_lock_irqsave(&crngs.lock, flags);
-- 
GitLab


From 3e504d2026eb6c8762cd6040ae57db166516824a Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 8 Mar 2022 10:12:16 -0700
Subject: [PATCH 1312/1586] random: check for signal and try earlier when
 generating entropy

Rather than waiting a full second in an interruptable waiter before
trying to generate entropy, try to generate entropy first and wait
second. While waiting one second might give an extra second for getting
entropy from elsewhere, we're already pretty late in the init process
here, and whatever else is generating entropy will still continue to
contribute. This has implications on signal handling: we call
try_to_generate_entropy() from wait_for_random_bytes(), and
wait_for_random_bytes() always uses wait_event_interruptible_timeout()
when waiting, since it's called by userspace code in restartable
contexts, where signals can pend. Since try_to_generate_entropy() now
runs first, if a signal is pending, it's necessary for
try_to_generate_entropy() to check for signals, since it won't hit the
wait until after try_to_generate_entropy() has returned. And even before
this change, when entering a busy loop in try_to_generate_entropy(), we
should have been checking to see if any signals are pending, so that a
process doesn't get stuck in that loop longer than expected.

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index defdba110d1df..0bdefada7453a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -124,10 +124,11 @@ int wait_for_random_bytes(void)
 {
 	while (!crng_ready()) {
 		int ret;
+
+		try_to_generate_entropy();
 		ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
 		if (ret)
 			return ret > 0 ? 0 : ret;
-		try_to_generate_entropy();
 	}
 	return 0;
 }
@@ -1398,7 +1399,7 @@ static void try_to_generate_entropy(void)
 		return;
 
 	timer_setup_on_stack(&stack.timer, entropy_timer, 0);
-	while (!crng_ready()) {
+	while (!crng_ready() && !signal_pending(current)) {
 		if (!timer_pending(&stack.timer))
 			mod_timer(&stack.timer, jiffies + 1);
 		mix_pool_bytes(&stack.cycles, sizeof(stack.cycles));
-- 
GitLab


From 40ce1121c1d76daf9048a86e36c83e469281b9fd Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Tue, 8 Mar 2022 18:43:21 +0100
Subject: [PATCH 1313/1586] drm/mgag200: Fix PLL setup for g200wb and g200ew

commit f86c3ed55920 ("drm/mgag200: Split PLL setup into compute and
 update functions") introduced a regression for g200wb and g200ew.
The PLLs are not set up properly, and VGA screen stays
black, or displays "out of range" message.

MGA1064_WB_PIX_PLLC_N/M/P was mistakenly replaced with
MGA1064_PIX_PLLC_N/M/P which have different addresses.

Patch tested on a Dell T310 with g200wb

Fixes: f86c3ed55920 ("drm/mgag200: Split PLL setup into compute and update functions")
Cc: stable@vger.kernel.org
Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20220308174321.225606-1-jfalempe@redhat.com
---
 drivers/gpu/drm/mgag200/mgag200_pll.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c
index e9ae22b4f8138..52be08b744ade 100644
--- a/drivers/gpu/drm/mgag200/mgag200_pll.c
+++ b/drivers/gpu/drm/mgag200/mgag200_pll.c
@@ -404,9 +404,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl
 		udelay(50);
 
 		/* program pixel pll register */
-		WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn);
-		WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm);
-		WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_N, xpixpllcn);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_M, xpixpllcm);
+		WREG_DAC(MGA1064_WB_PIX_PLLC_P, xpixpllcp);
 
 		udelay(50);
 
-- 
GitLab


From a680b1832ced3b5fa7c93484248fd221ea0d614b Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Thu, 10 Mar 2022 18:24:59 -0500
Subject: [PATCH 1314/1586] crypto: qcom-rng - ensure buffer for generate is
 completely filled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The generate function in struct rng_alg expects that the destination
buffer is completely filled if the function returns 0. qcom_rng_read()
can run into a situation where the buffer is partially filled with
randomness and the remaining part of the buffer is zeroed since
qcom_rng_generate() doesn't check the return value. This issue can
be reproduced by running the following from libkcapi:

    kcapi-rng -b 9000000 > OUTFILE

The generated OUTFILE will have three huge sections that contain all
zeros, and this is caused by the code where the test
'val & PRNG_STATUS_DATA_AVAIL' fails.

Let's fix this issue by ensuring that qcom_rng_read() always returns
with a full buffer if the function returns success. Let's also have
qcom_rng_generate() return the correct value.

Here's some statistics from the ent project
(https://www.fourmilab.ch/random/) that shows information about the
quality of the generated numbers:

    $ ent -c qcom-random-before
    Value Char Occurrences Fraction
      0           606748   0.067416
      1            33104   0.003678
      2            33001   0.003667
    ...
    253   �        32883   0.003654
    254   �        33035   0.003671
    255   �        33239   0.003693

    Total:       9000000   1.000000

    Entropy = 7.811590 bits per byte.

    Optimum compression would reduce the size
    of this 9000000 byte file by 2 percent.

    Chi square distribution for 9000000 samples is 9329962.81, and
    randomly would exceed this value less than 0.01 percent of the
    times.

    Arithmetic mean value of data bytes is 119.3731 (127.5 = random).
    Monte Carlo value for Pi is 3.197293333 (error 1.77 percent).
    Serial correlation coefficient is 0.159130 (totally uncorrelated =
    0.0).

Without this patch, the results of the chi-square test is 0.01%, and
the numbers are certainly not random according to ent's project page.
The results improve with this patch:

    $ ent -c qcom-random-after
    Value Char Occurrences Fraction
      0            35432   0.003937
      1            35127   0.003903
      2            35424   0.003936
    ...
    253   �        35201   0.003911
    254   �        34835   0.003871
    255   �        35368   0.003930

    Total:       9000000   1.000000

    Entropy = 7.999979 bits per byte.

    Optimum compression would reduce the size
    of this 9000000 byte file by 0 percent.

    Chi square distribution for 9000000 samples is 258.77, and randomly
    would exceed this value 42.24 percent of the times.

    Arithmetic mean value of data bytes is 127.5006 (127.5 = random).
    Monte Carlo value for Pi is 3.141277333 (error 0.01 percent).
    Serial correlation coefficient is 0.000468 (totally uncorrelated =
    0.0).

This change was tested on a Nexus 5 phone (msm8974 SoC).

Signed-off-by: Brian Masney <bmasney@redhat.com>
Fixes: ceec5f5b5988 ("crypto: qcom-rng - Add Qcom prng driver")
Cc: stable@vger.kernel.org # 4.19+
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qcom-rng.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index 99ba8d51d1020..11f30fd48c141 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -8,6 +8,7 @@
 #include <linux/clk.h>
 #include <linux/crypto.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
 {
 	unsigned int currsize = 0;
 	u32 val;
+	int ret;
 
 	/* read random data from hardware */
 	do {
-		val = readl_relaxed(rng->base + PRNG_STATUS);
-		if (!(val & PRNG_STATUS_DATA_AVAIL))
-			break;
+		ret = readl_poll_timeout(rng->base + PRNG_STATUS, val,
+					 val & PRNG_STATUS_DATA_AVAIL,
+					 200, 10000);
+		if (ret)
+			return ret;
 
 		val = readl_relaxed(rng->base + PRNG_DATA_OUT);
 		if (!val)
-			break;
+			return -EINVAL;
 
 		if ((max - currsize) >= WORD_SZ) {
 			memcpy(data, &val, WORD_SZ);
@@ -61,11 +65,10 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
 		} else {
 			/* copy only remaining bytes */
 			memcpy(data, &val, max - currsize);
-			break;
 		}
 	} while (currsize < max);
 
-	return currsize;
+	return 0;
 }
 
 static int qcom_rng_generate(struct crypto_rng *tfm,
@@ -87,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm,
 	mutex_unlock(&rng->lock);
 	clk_disable_unprepare(rng->clk);
 
-	return 0;
+	return ret;
 }
 
 static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed,
-- 
GitLab


From 7f0f1f3ef62ed7a40e30aff28115bd94c4211d1d Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Mon, 7 Mar 2022 11:07:24 +0000
Subject: [PATCH 1315/1586] hwrng: nomadik - Change clk_disable to
 clk_disable_unprepare

The corresponding API for clk_prepare_enable is clk_disable_unprepare,
other than clk_disable_unprepare.

Fix this by changing clk_disable to clk_disable_unprepare.

Fixes: beca35d05cc2 ("hwrng: nomadik - use clk_prepare_enable()")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/nomadik-rng.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 67947a19aa225..e8f9621e79541 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -65,14 +65,14 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
 out_release:
 	amba_release_regions(dev);
 out_clk:
-	clk_disable(rng_clk);
+	clk_disable_unprepare(rng_clk);
 	return ret;
 }
 
 static void nmk_rng_remove(struct amba_device *dev)
 {
 	amba_release_regions(dev);
-	clk_disable(rng_clk);
+	clk_disable_unprepare(rng_clk);
 }
 
 static const struct amba_id nmk_rng_ids[] = {
-- 
GitLab


From c6ded03bcac9dde5bc7b4a1121efa050a28118b0 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 8 Mar 2022 15:55:37 +0800
Subject: [PATCH 1316/1586] crypto: dh - Remove the unused function
 dh_safe_prime_dh_alg()

Fix the following W=1 kernel warnings:

crypto/dh.c:311:31: warning: unused function 'dh_safe_prime_dh_alg'
[-Wunused-function]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index cca289477485e..4406aeb1ff616 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -308,12 +308,6 @@ static inline struct dh_safe_prime_instance_ctx *dh_safe_prime_instance_ctx(
 	return kpp_instance_ctx(kpp_alg_instance(tfm));
 }
 
-static inline struct kpp_alg *dh_safe_prime_dh_alg(
-	struct dh_safe_prime_tfm_ctx *ctx)
-{
-	return crypto_kpp_alg(ctx->dh_tfm);
-}
-
 static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
 {
 	struct dh_safe_prime_instance_ctx *inst_ctx =
-- 
GitLab


From a5997f1ae84af1f6e57bb19787ab1c658db0ef37 Mon Sep 17 00:00:00 2001
From: Longfang Liu <liulongfang@huawei.com>
Date: Wed, 9 Mar 2022 10:52:44 +0800
Subject: [PATCH 1317/1586] MAINTAINERS: update HPRE/SEC2/TRNG driver
 maintainers list

Zaibo moved projects and is not looking into crypto stuff.
I am responsible for checking the patches of these modules.
so the maintainers list needs to be updated.

I take care of HPRE, Qian Weili take care of TRNG,
Ye Kai and me take care of SEC2.

Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Kai Ye <yekai12@huawei.com>
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6ee244105f5fc..e4400d4346bbd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8635,7 +8635,7 @@ S:	Maintained
 F:	drivers/gpio/gpio-hisi.c
 
 HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
-M:	Zaibo Xu <xuzaibo@huawei.com>
+M:	Longfang Liu <liulongfang@huawei.com>
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/debugfs-hisi-hpre
@@ -8715,8 +8715,8 @@ F:	Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
 F:	drivers/scsi/hisi_sas/
 
 HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
-M:	Zaibo Xu <xuzaibo@huawei.com>
 M:	Kai Ye <yekai13@huawei.com>
+M:	Longfang Liu <liulongfang@huawei.com>
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/debugfs-hisi-sec
@@ -8747,7 +8747,7 @@ F:	Documentation/devicetree/bindings/mfd/hisilicon,hi6421-spmi-pmic.yaml
 F:	drivers/mfd/hi6421-spmi-pmic.c
 
 HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
-M:	Zaibo Xu <xuzaibo@huawei.com>
+M:	Weili Qian <qianweili@huawei.com>
 S:	Maintained
 F:	drivers/crypto/hisilicon/trng/trng.c
 
-- 
GitLab


From 0e03b8fd29363f2df44e2a7a176d486de550757a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 9 Mar 2022 15:20:01 +1200
Subject: [PATCH 1318/1586] crypto: xilinx - Turn SHA into a tristate and allow
 COMPILE_TEST

This patch turns the new SHA driver into a tristate and also allows
compile testing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 597559ec2057e..7b2d138bc83e0 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -809,8 +809,8 @@ config CRYPTO_DEV_ZYNQMP_AES
 	  for AES algorithms.
 
 config CRYPTO_DEV_ZYNQMP_SHA3
-	bool "Support for Xilinx ZynqMP SHA3 hardware accelerator"
-	depends on ZYNQMP_FIRMWARE
+	tristate "Support for Xilinx ZynqMP SHA3 hardware accelerator"
+	depends on ZYNQMP_FIRMWARE || COMPILE_TEST
 	select CRYPTO_SHA3
 	help
 	  Xilinx ZynqMP has SHA3 engine used for secure hash calculation.
-- 
GitLab


From 5e6a7d1eb834be23243c48a964ced67ab0b6e9ce Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 9 Mar 2022 14:29:00 +0100
Subject: [PATCH 1319/1586] nvme-multipath: use vmalloc for ANA log buffer

The ANA log buffer can get really large, as it depends on the
controller configuration. So to avoid an out-of-memory issue
during scanning use kvmalloc() instead of the kmalloc().

Signed-off-by: Hannes Reinecke <hare@suse.de>
Tested-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f8bf6606eb2fc..3ddc1ba66f0d2 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -5,6 +5,7 @@
 
 #include <linux/backing-dev.h>
 #include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
 #include <trace/events/block.h>
 #include "nvme.h"
 
@@ -898,7 +899,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	if (ana_log_size > ctrl->ana_log_size) {
 		nvme_mpath_stop(ctrl);
 		nvme_mpath_uninit(ctrl);
-		ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
+		ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL);
 		if (!ctrl->ana_log_buf)
 			return -ENOMEM;
 	}
@@ -915,7 +916,7 @@ out_uninit:
 
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
 {
-	kfree(ctrl->ana_log_buf);
+	kvfree(ctrl->ana_log_buf);
 	ctrl->ana_log_buf = NULL;
 	ctrl->ana_log_size = 0;
 }
-- 
GitLab


From 8f31dded50d927974a5a696196052ed53f1baf66 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Wed, 9 Mar 2022 14:02:28 +0800
Subject: [PATCH 1320/1586] nvme-multipath: call bio_io_error in
 nvme_ns_head_submit_bio

Use bio_io_error() here since bio_io_error does the same thing.

Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3ddc1ba66f0d2..d13b81cd6225c 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -387,8 +387,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
 	} else {
 		dev_warn_ratelimited(dev, "no available path - failing I/O\n");
 
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
+		bio_io_error(bio);
 	}
 
 	srcu_read_unlock(&head->srcu, srcu_idx);
-- 
GitLab


From 462b8b2d84975758e5b74fe832bfe8145eef403f Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 22 Feb 2022 19:36:56 -0800
Subject: [PATCH 1321/1586] nvme-tcp: don't initialize ret variable

No point in initializing ret variable to 0 in nvme_tcp_start_io_queue()
since it gets overwritten by a call to nvme_tcp_start_queue().

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 6cbcc8b4daaf9..bb03ae5ef11f8 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1674,7 +1674,7 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
 
 static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
 {
-	int i, ret = 0;
+	int i, ret;
 
 	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvme_tcp_start_queue(ctrl, i);
-- 
GitLab


From a387935c241d2517c22546f1206a77a856a40541 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 22 Feb 2022 19:36:57 -0800
Subject: [PATCH 1322/1586] nvme-tcp: don't fold the line

The call to nvme_tcp_alloc_queue() fits perfectly in one line without
exceeding 80 char limit for the line.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index bb03ae5ef11f8..babbc14a4b76a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1714,8 +1714,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 	int i, ret;
 
 	for (i = 1; i < ctrl->queue_count; i++) {
-		ret = nvme_tcp_alloc_queue(ctrl, i,
-				ctrl->sqsize + 1);
+		ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1);
 		if (ret)
 			goto out_free_queues;
 	}
-- 
GitLab


From 841aee4d75f18fdfb53935080b03de0c65e9b92c Mon Sep 17 00:00:00 2001
From: Chris Leech <cleech@redhat.com>
Date: Tue, 15 Feb 2022 18:22:49 -0800
Subject: [PATCH 1323/1586] nvme-tcp: lockdep: annotate in-kernel sockets

Put NVMe/TCP sockets in their own class to avoid some lockdep warnings.
Sockets created by nvme-tcp are not exposed to user-space, and will not
trigger certain code paths that the general socket API exposes.

Lockdep complains about a circular dependency between the socket and
filesystem locks, because setsockopt can trigger a page fault with a
socket lock held, but nvme-tcp sends requests on the socket while file
system locks are held.

  ======================================================
  WARNING: possible circular locking dependency detected
  5.15.0-rc3 #1 Not tainted
  ------------------------------------------------------
  fio/1496 is trying to acquire lock:
  (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_sendpage+0x23/0x80

  but task is already holding lock:
  (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs]

  which lock already depends on the new lock.

  other info that might help us debug this:

  chain exists of:
   sk_lock-AF_INET --> sb_internal --> &xfs_dir_ilock_class/5

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&xfs_dir_ilock_class/5);
                                lock(sb_internal);
                                lock(&xfs_dir_ilock_class/5);
   lock(sk_lock-AF_INET);

  *** DEADLOCK ***

  6 locks held by fio/1496:
   #0: (sb_writers#13){.+.+}-{0:0}, at: path_openat+0x9fc/0xa20
   #1: (&inode->i_sb->s_type->i_mutex_dir_key){++++}-{3:3}, at: path_openat+0x296/0xa20
   #2: (sb_internal){.+.+}-{0:0}, at: xfs_trans_alloc_icreate+0x41/0xd0 [xfs]
   #3: (&xfs_dir_ilock_class/5){+.+.}-{3:3}, at: xfs_ilock+0xcf/0x290 [xfs]
   #4: (hctx->srcu){....}-{0:0}, at: hctx_lock+0x51/0xd0
   #5: (&queue->send_mutex){+.+.}-{3:3}, at: nvme_tcp_queue_rq+0x33e/0x380 [nvme_tcp]

This annotation lets lockdep analyze nvme-tcp controlled sockets
independently of what the user-space sockets API does.

Link: https://lore.kernel.org/linux-nvme/CAHj4cs9MDYLJ+q+2_GXUK9HxFizv2pxUryUR0toX974M040z7g@mail.gmail.com/

Signed-off-by: Chris Leech <cleech@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index babbc14a4b76a..10fc45d95b867 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -30,6 +30,44 @@ static int so_priority;
 module_param(so_priority, int, 0644);
 MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/* lockdep can detect a circular dependency of the form
+ *   sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
+ * because dependencies are tracked for both nvme-tcp and user contexts. Using
+ * a separate class prevents lockdep from conflating nvme-tcp socket use with
+ * user-space socket API use.
+ */
+static struct lock_class_key nvme_tcp_sk_key[2];
+static struct lock_class_key nvme_tcp_slock_key[2];
+
+static void nvme_tcp_reclassify_socket(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
+		return;
+
+	switch (sk->sk_family) {
+	case AF_INET:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
+					      &nvme_tcp_slock_key[0],
+					      "sk_lock-AF_INET-NVME",
+					      &nvme_tcp_sk_key[0]);
+		break;
+	case AF_INET6:
+		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
+					      &nvme_tcp_slock_key[1],
+					      "sk_lock-AF_INET6-NVME",
+					      &nvme_tcp_sk_key[1]);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+#else
+static void nvme_tcp_reclassify_socket(struct socket *sock) { }
+#endif
+
 enum nvme_tcp_send_state {
 	NVME_TCP_SEND_CMD_PDU = 0,
 	NVME_TCP_SEND_H2C_PDU,
@@ -1427,6 +1465,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 		goto err_destroy_mutex;
 	}
 
+	nvme_tcp_reclassify_socket(queue->sock);
+
 	/* Single syn retry */
 	tcp_sock_set_syncnt(queue->sock->sk, 1);
 
-- 
GitLab


From b2fb99e42598ba5d1ac045fb39a2b38fd80dd8d2 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 8 Mar 2022 21:59:25 -0800
Subject: [PATCH 1324/1586] nvmet-fc: fix kernel-doc warning for
 nvmet_fc_register_targetport

This fixes following kernel-doc warning :-

drivers/nvme/target/fc.c:1365: warning: expecting prototype for nvme_fc_register_targetport(). Prototype was for nvmet_fc_register_targetport() instead

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index cb013516784c1..56c2bdfd8e3bf 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1341,7 +1341,7 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport)
 }
 
 /**
- * nvme_fc_register_targetport - transport entry point called by an
+ * nvmet_fc_register_targetport - transport entry point called by an
  *                              LLDD to register the existence of a local
  *                              NVME subystem FC port.
  * @pinfo:     pointer to information about the port to be registered
-- 
GitLab


From 0acb8231ebae5175260a7da853f525f2730cc095 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 8 Mar 2022 21:59:26 -0800
Subject: [PATCH 1325/1586] nvmet-fc: fix kernel-doc warning for
 nvmet_fc_unregister_targetport

This fixes following kernel-doc warning:-

drivers/nvme/target/fc.c:1619: warning: expecting prototype for nvme_fc_unregister_targetport(). Prototype was for nvmet_fc_unregister_targetport() instead

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 56c2bdfd8e3bf..de90001fc5c4c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1604,7 +1604,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
 }
 
 /**
- * nvme_fc_unregister_targetport - transport entry point called by an
+ * nvmet_fc_unregister_targetport - transport entry point called by an
  *                              LLDD to deregister/remove a previously
  *                              registered a local NVME subsystem FC port.
  * @target_port: pointer to the (registered) target port that is to be
-- 
GitLab


From a8adf0cddc230be98da0e68bea846e28686dcb60 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 8 Mar 2022 21:59:27 -0800
Subject: [PATCH 1326/1586] nvmet-rdma: fix kernel-doc warning for
 nvmet_rdma_device_removal

This fixes following kernel-doc warning:-

drivers/nvme/target/rdma.c:1722: warning: expecting prototype for nvme_rdma_device_removal(). Prototype was for nvmet_rdma_device_removal() instead

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 00656754e3314..2446d0918a41a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1703,7 +1703,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 }
 
 /**
- * nvme_rdma_device_removal() - Handle RDMA device removal
+ * nvmet_rdma_device_removal() - Handle RDMA device removal
  * @cm_id:	rdma_cm id, used for nvmet port
  * @queue:      nvmet rdma queue (cm id qp_context)
  *
-- 
GitLab


From 73d77c53ff342bfa23daedf4475cdd06618e447b Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 8 Mar 2022 14:56:57 -0800
Subject: [PATCH 1327/1586] nvmet: don't fold lines

Don't fold line that can fit into 80 char limit. No functional change
in this patch.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 091a0ca16361c..0f5c680c851a4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -93,10 +93,9 @@ CONFIGFS_ATTR(nvmet_, addr_adrfam);
 static ssize_t nvmet_addr_portid_show(struct config_item *item,
 		char *page)
 {
-	struct nvmet_port *port = to_nvmet_port(item);
+	__le16 portid = to_nvmet_port(item)->disc_addr.portid;
 
-	return snprintf(page, PAGE_SIZE, "%d\n",
-			le16_to_cpu(port->disc_addr.portid));
+	return snprintf(page, PAGE_SIZE, "%d\n", le16_to_cpu(portid));
 }
 
 static ssize_t nvmet_addr_portid_store(struct config_item *item,
@@ -124,8 +123,7 @@ static ssize_t nvmet_addr_traddr_show(struct config_item *item,
 {
 	struct nvmet_port *port = to_nvmet_port(item);
 
-	return snprintf(page, PAGE_SIZE, "%s\n",
-			port->disc_addr.traddr);
+	return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.traddr);
 }
 
 static ssize_t nvmet_addr_traddr_store(struct config_item *item,
@@ -199,8 +197,7 @@ static ssize_t nvmet_addr_trsvcid_show(struct config_item *item,
 {
 	struct nvmet_port *port = to_nvmet_port(item);
 
-	return snprintf(page, PAGE_SIZE, "%s\n",
-			port->disc_addr.trsvcid);
+	return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.trsvcid);
 }
 
 static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
@@ -1236,8 +1233,7 @@ CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item,
 			char *page)
 {
-	return snprintf(page, PAGE_SIZE, "%s\n",
-			nvmet_disc_subsys->subsysnqn);
+	return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn);
 }
 
 static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item,
-- 
GitLab


From 98152eb70fffad910ee7c511b110afe98f162fc5 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@nvidia.com>
Date: Tue, 8 Mar 2022 14:56:58 -0800
Subject: [PATCH 1328/1586] nvmet: use snprintf() with PAGE_SIZE in configfs

Instead of using sprintf, use snprintf with buffer size limited to
PAGE_SIZE just like what we have for the rest of the file.

Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 0f5c680c851a4..ac801fa1641bc 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -60,10 +60,11 @@ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
 
 	for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
 		if (nvmet_addr_family[i].type == adrfam)
-			return sprintf(page, "%s\n", nvmet_addr_family[i].name);
+			return snprintf(page, PAGE_SIZE, "%s\n",
+					nvmet_addr_family[i].name);
 	}
 
-	return sprintf(page, "\n");
+	return snprintf(page, PAGE_SIZE, "\n");
 }
 
 static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
@@ -160,10 +161,11 @@ static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
 
 	for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
 		if (treq == nvmet_addr_treq[i].type)
-			return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
+			return snprintf(page, PAGE_SIZE, "%s\n",
+					nvmet_addr_treq[i].name);
 	}
 
-	return sprintf(page, "\n");
+	return snprintf(page, PAGE_SIZE, "\n");
 }
 
 static ssize_t nvmet_addr_treq_store(struct config_item *item,
@@ -281,7 +283,8 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 
 	for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
 		if (port->disc_addr.trtype == nvmet_transport[i].type)
-			return sprintf(page, "%s\n", nvmet_transport[i].name);
+			return snprintf(page, PAGE_SIZE,
+					"%s\n", nvmet_transport[i].name);
 	}
 
 	return sprintf(page, "\n");
-- 
GitLab


From e981bc74aefc6a177b50c16cfa7023599799cf74 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Fri, 11 Mar 2022 13:17:16 +0200
Subject: [PATCH 1329/1586] net: dsa: microchip: add spi_device_id tables

Add spi_device_id tables to avoid logs like "SPI driver ksz9477-switch
has no spi_device_id".

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795_spi.c | 11 +++++++++++
 drivers/net/dsa/microchip/ksz9477_spi.c | 12 ++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 866767b70d65b..b0a7dee27ffc9 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -124,12 +124,23 @@ static const struct of_device_id ksz8795_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
 
+static const struct spi_device_id ksz8795_spi_ids[] = {
+	{ "ksz8765" },
+	{ "ksz8794" },
+	{ "ksz8795" },
+	{ "ksz8863" },
+	{ "ksz8873" },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids);
+
 static struct spi_driver ksz8795_spi_driver = {
 	.driver = {
 		.name	= "ksz8795-switch",
 		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(ksz8795_dt_ids),
 	},
+	.id_table = ksz8795_spi_ids,
 	.probe	= ksz8795_spi_probe,
 	.remove	= ksz8795_spi_remove,
 	.shutdown = ksz8795_spi_shutdown,
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index e3cb0e6c9f6f2..43addeabfc259 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -98,12 +98,24 @@ static const struct of_device_id ksz9477_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
 
+static const struct spi_device_id ksz9477_spi_ids[] = {
+	{ "ksz9477" },
+	{ "ksz9897" },
+	{ "ksz9893" },
+	{ "ksz9563" },
+	{ "ksz8563" },
+	{ "ksz9567" },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids);
+
 static struct spi_driver ksz9477_spi_driver = {
 	.driver = {
 		.name	= "ksz9477-switch",
 		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(ksz9477_dt_ids),
 	},
+	.id_table = ksz9477_spi_ids,
 	.probe	= ksz9477_spi_probe,
 	.remove	= ksz9477_spi_remove,
 	.shutdown = ksz9477_spi_shutdown,
-- 
GitLab


From 4db4075f92af2b28f415fc979ab626e6b37d67b6 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 10 Mar 2022 11:49:00 +0100
Subject: [PATCH 1330/1586] esp6: fix check on ipv6_skip_exthdr's return value

Commit 5f9c55c8066b ("ipv6: check return value of ipv6_skip_exthdr")
introduced an incorrect check, which leads to all ESP packets over
either TCPv6 or UDPv6 encapsulation being dropped. In this particular
case, offset is negative, since skb->data points to the ESP header in
the following chain of headers, while skb->network_header points to
the IPv6 header:

    IPv6 | ext | ... | ext | UDP | ESP | ...

That doesn't seem to be a problem, especially considering that if we
reach esp6_input_done2, we're guaranteed to have a full set of headers
available (otherwise the packet would have been dropped earlier in the
stack). However, it means that the return value will (intentionally)
be negative. We can make the test more specific, as the expected
return value of ipv6_skip_exthdr will be the (negated) size of either
a UDP header, or a TCP header with possible options.

In the future, we should probably either make ipv6_skip_exthdr
explicitly accept negative offsets (and adjust its return value for
error cases), or make ipv6_skip_exthdr only take non-negative
offsets (and audit all callers).

Fixes: 5f9c55c8066b ("ipv6: check return value of ipv6_skip_exthdr")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/esp6.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b0ffbcd5432d6..55d604c9b3b3e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -812,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		struct tcphdr *th;
 
 		offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
-
-		if (offset < 0) {
+		if (offset == -1) {
 			err = -EINVAL;
 			goto out;
 		}
-- 
GitLab


From c816d705b9dbc5ca870bb23e49aa9715da8384b9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 4 Jan 2022 12:53:41 +0000
Subject: [PATCH 1331/1586] btrfs: remove write and wait of struct walk_control

The ->write and ->wait fields of struct walk_control, used for log trees,
are not used since 2008, more specifically since commit d0c803c4049c5c
("Btrfs: Record dirty pages tree-log pages in an extent_io tree") and
since commit d0c803c4049c5c ("Btrfs: Record dirty pages tree-log pages in
an extent_io tree"). So just remove them, along with the function
btrfs_write_tree_block(), which is also not used anymore after removing
the ->write member.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6bc8834ac8f7d..be4f45eda751e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -270,12 +270,6 @@ void btrfs_end_log_trans(struct btrfs_root *root)
 	}
 }
 
-static int btrfs_write_tree_block(struct extent_buffer *buf)
-{
-	return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
-					buf->start + buf->len - 1);
-}
-
 static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 {
 	filemap_fdatawait_range(buf->pages[0]->mapping,
@@ -294,16 +288,6 @@ struct walk_control {
 	 */
 	int free;
 
-	/* should we write out the extent buffer?  This is used
-	 * while flushing the log tree to disk during a sync
-	 */
-	int write;
-
-	/* should we wait for the extent buffer io to finish?  Also used
-	 * while flushing the log tree to disk for a sync
-	 */
-	int wait;
-
 	/* pin only walk, we record which extents on disk belong to the
 	 * log trees
 	 */
@@ -354,17 +338,15 @@ static int process_one_buffer(struct btrfs_root *log,
 			return ret;
 	}
 
-	if (wc->pin)
+	if (wc->pin) {
 		ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
 						      eb->len);
+		if (ret)
+			return ret;
 
-	if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
-		if (wc->pin && btrfs_header_level(eb) == 0)
+		if (btrfs_buffer_uptodate(eb, gen, 0) &&
+		    btrfs_header_level(eb) == 0)
 			ret = btrfs_exclude_logged_extents(eb);
-		if (wc->write)
-			btrfs_write_tree_block(eb);
-		if (wc->wait)
-			btrfs_wait_tree_block_writeback(eb);
 	}
 	return ret;
 }
-- 
GitLab


From dc408ccdf01d3d37ce65aca5dfea0be71d0cc6ec Mon Sep 17 00:00:00 2001
From: Sahil Kang <sahil.kang@asilaycomputing.com>
Date: Wed, 5 Jan 2022 00:30:06 -0800
Subject: [PATCH 1332/1586] btrfs: reuse existing pointers from btrfs_ioctl

btrfs_ioctl already contains pointers to the inode and btrfs_root
structs, so we can pass them into the subfunctions instead of the
toplevel struct file.

Signed-off-by: Sahil Kang <sahil.kang@asilaycomputing.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d47ec5fc4f44..70a08ef9c5ae4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -440,10 +440,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
 	}
 }
 
-static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
+static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
 {
-	struct inode *inode = file_inode(file);
-
 	return put_user(inode->i_generation, arg);
 }
 
@@ -2871,25 +2869,22 @@ out:
 	return ret;
 }
 
-static noinline int btrfs_ioctl_ino_lookup(struct file *file,
+static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
 					   void __user *argp)
 {
 	struct btrfs_ioctl_ino_lookup_args *args;
-	struct inode *inode;
 	int ret = 0;
 
 	args = memdup_user(argp, sizeof(*args));
 	if (IS_ERR(args))
 		return PTR_ERR(args);
 
-	inode = file_inode(file);
-
 	/*
 	 * Unprivileged query to obtain the containing subvolume root id. The
 	 * path is reset so it's consistent with btrfs_search_path_in_tree.
 	 */
 	if (args->treeid == 0)
-		args->treeid = BTRFS_I(inode)->root->root_key.objectid;
+		args->treeid = root->root_key.objectid;
 
 	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
 		args->name[0] = 0;
@@ -2901,7 +2896,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 		goto out;
 	}
 
-	ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
+	ret = btrfs_search_path_in_tree(root->fs_info,
 					args->treeid, args->objectid,
 					args->name);
 
@@ -3077,7 +3072,8 @@ out_free:
  * Return ROOT_REF information of the subvolume containing this inode
  * except the subvolume name.
  */
-static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *subvol_root,
+					  void __user *argp)
 {
 	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
 	struct btrfs_root_ref *rref;
@@ -3085,7 +3081,6 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
-	struct inode *inode;
 	u64 objectid;
 	int slot;
 	int ret;
@@ -3101,9 +3096,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
 		return PTR_ERR(rootrefs);
 	}
 
-	inode = file_inode(file);
-	root = BTRFS_I(inode)->root->fs_info->tree_root;
-	objectid = BTRFS_I(inode)->root->root_key.objectid;
+	root = subvol_root->fs_info->tree_root;
+	objectid = subvol_root->root_key.objectid;
 
 	key.objectid = objectid;
 	key.type = BTRFS_ROOT_REF_KEY;
@@ -5194,7 +5188,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 
 	switch (cmd) {
 	case FS_IOC_GETVERSION:
-		return btrfs_ioctl_getversion(file, argp);
+		return btrfs_ioctl_getversion(inode, argp);
 	case FS_IOC_GETFSLABEL:
 		return btrfs_ioctl_get_fslabel(fs_info, argp);
 	case FS_IOC_SETFSLABEL:
@@ -5242,7 +5236,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_TREE_SEARCH_V2:
 		return btrfs_ioctl_tree_search_v2(file, argp);
 	case BTRFS_IOC_INO_LOOKUP:
-		return btrfs_ioctl_ino_lookup(file, argp);
+		return btrfs_ioctl_ino_lookup(root, argp);
 	case BTRFS_IOC_INO_PATHS:
 		return btrfs_ioctl_ino_to_path(root, argp);
 	case BTRFS_IOC_LOGICAL_INO:
@@ -5321,7 +5315,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_GET_SUBVOL_INFO:
 		return btrfs_ioctl_get_subvol_info(file, argp);
 	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
-		return btrfs_ioctl_get_subvol_rootref(file, argp);
+		return btrfs_ioctl_get_subvol_rootref(root, argp);
 	case BTRFS_IOC_INO_LOOKUP_USER:
 		return btrfs_ioctl_ino_lookup_user(file, argp);
 	case FS_IOC_ENABLE_VERITY:
-- 
GitLab


From a450a4af74331706b51650a0d826720a4da27428 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 15 Dec 2021 12:19:58 +0000
Subject: [PATCH 1333/1586] btrfs: don't log unnecessary boundary keys when
 logging directory

Before we start to log dir index keys from a leaf, we check if there is a
previous index key, which normally is at the end of a leaf that was not
changed in the current transaction. Then we log that key and set the start
of logged range (item of type BTRFS_DIR_LOG_INDEX_KEY) to the offset of
that key. This is to ensure that if there were deleted index keys between
that key and the first key we are going to log, those deletions are
replayed in case we need to replay to the log after a power failure.
However we really don't need to log that previous key, we can just set the
start of the logged range to that key's offset plus 1. This achieves the
same and avoids logging one dir index key.

The same logic is performed when we finish logging the index keys of a
leaf and we find that the next leaf has index keys and was not changed in
the current transaction. We are logging the first key of that next leaf
and use its offset as the end of range we log. This is just to ensure that
if there were deleted index keys between the last index key we logged and
the first key of that next leaf, those index keys are deleted if we end
up replaying the log. However that is not necessary, we can avoid logging
that first index key of the next leaf and instead set the end of the
logged range to match the offset of that index key minus 1.

So avoid logging those index keys at the boundaries and adjust the start
and end offsets of the logged ranges as described above.

This patch is part of a patchset comprised of the following patches:

  1/4 btrfs: don't log unnecessary boundary keys when logging directory
  2/4 btrfs: put initial index value of a directory in a constant
  3/4 btrfs: stop copying old dir items when logging a directory
  4/4 btrfs: stop trying to log subdirectories created in past transactions

Performance test results are listed in the changelog of patch 3/4.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index be4f45eda751e..8386e418b75a9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3906,17 +3906,18 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 	ret = btrfs_previous_item(root, path, ino, BTRFS_DIR_INDEX_KEY);
 	if (ret == 0) {
 		struct btrfs_key tmp;
+
 		btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
-		if (tmp.type == BTRFS_DIR_INDEX_KEY) {
-			first_offset = tmp.offset;
-			ret = overwrite_item(trans, log, dst_path,
-					     path->nodes[0], path->slots[0],
-					     &tmp);
-			if (ret) {
-				err = ret;
-				goto done;
-			}
-		}
+		/*
+		 * The dir index key before the first one we found that needs to
+		 * be logged might be in a previous leaf, and there might be a
+		 * gap between these keys, meaning that we had deletions that
+		 * happened. So the key range item we log (key type
+		 * BTRFS_DIR_LOG_INDEX_KEY) must cover a range that starts at the
+		 * previous key's offset plus 1, so that those deletes are replayed.
+		 */
+		if (tmp.type == BTRFS_DIR_INDEX_KEY)
+			first_offset = tmp.offset + 1;
 	}
 	btrfs_release_path(path);
 
@@ -3964,14 +3965,16 @@ search:
 			goto done;
 		}
 		if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
-			ctx->last_dir_item_offset = min_key.offset;
-			ret = overwrite_item(trans, log, dst_path,
-					     path->nodes[0], path->slots[0],
-					     &min_key);
-			if (ret)
-				err = ret;
-			else
-				last_offset = min_key.offset;
+			/*
+			 * The next leaf was not changed in the current transaction
+			 * and has at least one dir index key.
+			 * We check for the next key because there might have been
+			 * one or more deletions between the last key we logged and
+			 * that next key. So the key range item we log (key type
+			 * BTRFS_DIR_LOG_INDEX_KEY) must end at the next key's
+			 * offset minus 1, so that those deletes are replayed.
+			 */
+			last_offset = min_key.offset - 1;
 			goto done;
 		}
 		if (need_resched()) {
-- 
GitLab


From 528ee697126fddaff448897c2d649bd756153c79 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 15 Dec 2021 12:19:59 +0000
Subject: [PATCH 1334/1586] btrfs: put initial index value of a directory in a
 constant

At btrfs_set_inode_index_count() we refer twice to the number 2 as the
initial index value for a directory (when it's empty), with a proper
comment explaining the reason for that value. In the next patch I'll
have to use that magic value in the directory logging code, so put
the value in a #define at btrfs_inode.h, to avoid hardcoding the
magic value again at tree-log.c.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 12 ++++++++++--
 fs/btrfs/inode.c       | 10 ++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b3e46aabc3d86..6d4f42b0fdcef 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -13,6 +13,13 @@
 #include "ordered-data.h"
 #include "delayed-inode.h"
 
+/*
+ * Since we search a directory based on f_pos (struct dir_context::pos) we have
+ * to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
+ * everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
+ */
+#define BTRFS_DIR_START_INDEX 2
+
 /*
  * ordered_data_close is set by truncate when a file that used
  * to have good data has been truncated to zero.  When it is set
@@ -173,8 +180,9 @@ struct btrfs_inode {
 	u64 disk_i_size;
 
 	/*
-	 * if this is a directory then index_cnt is the counter for the index
-	 * number for new files that are created
+	 * If this is a directory then index_cnt is the counter for the index
+	 * number for new files that are created. For an empty directory, this
+	 * must be initialized to BTRFS_DIR_START_INDEX.
 	 */
 	u64 index_cnt;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5bbea5ec31fc5..5c7aeb588313a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5971,14 +5971,8 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
 		goto out;
 	ret = 0;
 
-	/*
-	 * MAGIC NUMBER EXPLANATION:
-	 * since we search a directory based on f_pos we have to start at 2
-	 * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
-	 * else has to start at 2
-	 */
 	if (path->slots[0] == 0) {
-		inode->index_cnt = 2;
+		inode->index_cnt = BTRFS_DIR_START_INDEX;
 		goto out;
 	}
 
@@ -5989,7 +5983,7 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
 
 	if (found_key.objectid != btrfs_ino(inode) ||
 	    found_key.type != BTRFS_DIR_INDEX_KEY) {
-		inode->index_cnt = 2;
+		inode->index_cnt = BTRFS_DIR_START_INDEX;
 		goto out;
 	}
 
-- 
GitLab


From 732d591a5d6c12478a14083011f59dce6a1e48d4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 15 Dec 2021 12:20:00 +0000
Subject: [PATCH 1335/1586] btrfs: stop copying old dir items when logging a
 directory

When logging a directory, we go over every leaf of the subvolume tree that
was changed in the current transaction and copy all its dir index keys to
the log tree.

That includes copying dir index keys created in past transactions. This is
done mostly for simplicity, as after logging the keys we log an item that
specifies the start and end ranges of the keys we logged. That item is
then used during log replay to figure out which keys need to be deleted -
every key in that range that we find in the subvolume tree and is not in
the log tree, needs to be deleted.

Now that we log only dir index keys, and not dir item keys anymore, when
we remove dentries from a directory (due to unlink and rename operations),
we can get entire leaves that we changed only for deleting old dir index
keys, or that have few dir index keys that are new - this is due to the
fact that the offset for new index keys comes from a monotonically
increasing counter.

We can avoid logging dir index keys from past transactions, and in order
to track the deletions, only log range items (BTRFS_DIR_LOG_INDEX_KEY key
type) when we find gaps between consecutive index keys. This massively
reduces the amount of logged metadata when we have deleted directory
entries, even if it's a small percentage of the total number of entries.
The reduction comes from both less items that are logged and instead of
logging many dir index items (struct btrfs_dir_item), which have a size
of 30 bytes plus a file name, we typically log just a few range items
(struct btrfs_dir_log_item), which take only 8 bytes each.

Even if no entries were deleted from a directory and only new entries
were added, we typically still get a reduction on the amount of logged
metadata, because it's very likely the first leaf that got the new
dir index entries also has several old dir index entries.

So change the logging logic to not log dir index keys created in past
transactions and log a range item for every gap it finds between each
pair of consecutive index keys, to ensure deletions are tracked and
replayed on log replay.

This patch is part of a patchset comprised of the following patches:

 1/4 btrfs: don't log unnecessary boundary keys when logging directory
 2/4 btrfs: put initial index value of a directory in a constant
 3/4 btrfs: stop copying old dir items when logging a directory
 4/4 btrfs: stop trying to log subdirectories created in past transactions

The following test was run on a branch without this patchset and on a
branch with the first three patches applied:

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/nvme0n1
  MNT=/mnt/nvme0n1

  NUM_FILES=1000000
  NUM_FILE_DELETES=10000

  MKFS_OPTIONS="-O no-holes -R free-space-tree"
  MOUNT_OPTIONS="-o ssd"

  mkfs.btrfs -f $MKFS_OPTIONS $DEV
  mount $MOUNT_OPTIONS $DEV $MNT

  mkdir $MNT/testdir
  for ((i = 1; i <= $NUM_FILES; i++)); do
      echo -n > $MNT/testdir/file_$i
  done

  sync

  del_inc=$(( $NUM_FILES / $NUM_FILE_DELETES ))
  for ((i = 1; i <= $NUM_FILES; i += $del_inc)); do
      rm -f $MNT/testdir/file_$i
  done

  start=$(date +%s%N)
  xfs_io -c "fsync" $MNT/testdir
  end=$(date +%s%N)

  dur=$(( (end - start) / 1000000 ))
  echo "dir fsync took $dur ms after deleting $NUM_FILE_DELETES files"
  echo

  umount $MNT

The test was run on a non-debug kernel (Debian's default kernel config),
and the results were the following for various values of NUM_FILES and
NUM_FILE_DELETES:

** before, NUM_FILES = 1 000 000, NUM_FILE_DELETES = 10 000 **

dir fsync took 585 ms after deleting 10000 files

** after, NUM_FILES = 1 000 000, NUM_FILE_DELETES = 10 000 **

dir fsync took 34 ms after deleting 10000 files   (-94.2%)

** before, NUM_FILES = 100 000, NUM_FILE_DELETES = 1 000 **

dir fsync took 50 ms after deleting 1000 files

** after, NUM_FILES = 100 000, NUM_FILE_DELETES = 1 000 **

dir fsync took 7 ms after deleting 1000 files    (-86.0%)

** before, NUM_FILES = 10 000, NUM_FILE_DELETES = 100 **

dir fsync took 9 ms after deleting 100 files

** after, NUM_FILES = 10 000, NUM_FILE_DELETES = 100 **

dir fsync took 5 ms after deleting 100 files     (-44.4%)

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 83 +++++++++++++++++++++++++++++----------------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8386e418b75a9..a367d62572c1f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3725,7 +3725,8 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 				  struct btrfs_inode *inode,
 				  struct btrfs_path *path,
 				  struct btrfs_path *dst_path,
-				  struct btrfs_log_ctx *ctx)
+				  struct btrfs_log_ctx *ctx,
+				  u64 *last_old_dentry_offset)
 {
 	struct btrfs_root *log = inode->root->log_root;
 	struct extent_buffer *src = path->nodes[0];
@@ -3738,6 +3739,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 	int i;
 
 	for (i = path->slots[0]; i < nritems; i++) {
+		struct btrfs_dir_item *di;
 		struct btrfs_key key;
 		int ret;
 
@@ -3748,7 +3750,34 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 			break;
 		}
 
+		di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
 		ctx->last_dir_item_offset = key.offset;
+
+		/*
+		 * Skip ranges of items that consist only of dir item keys created
+		 * in past transactions. However if we find a gap, we must log a
+		 * dir index range item for that gap, so that index keys in that
+		 * gap are deleted during log replay.
+		 */
+		if (btrfs_dir_transid(src, di) < trans->transid) {
+			if (key.offset > *last_old_dentry_offset + 1) {
+				ret = insert_dir_log_key(trans, log, dst_path,
+						 ino, *last_old_dentry_offset + 1,
+						 key.offset - 1);
+				/*
+				 * -EEXIST should never happen because when we
+				 * log a directory in full mode (LOG_INODE_ALL)
+				 * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from
+				 * the log tree.
+				 */
+				ASSERT(ret != -EEXIST);
+				if (ret < 0)
+					return ret;
+			}
+
+			*last_old_dentry_offset = key.offset;
+			continue;
+		}
 		/*
 		 * We must make sure that when we log a directory entry, the
 		 * corresponding inode, after log replay, has a matching link
@@ -3772,14 +3801,10 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 		 * resulting in -ENOTEMPTY errors.
 		 */
 		if (!ctx->log_new_dentries) {
-			struct btrfs_dir_item *di;
 			struct btrfs_key di_key;
 
-			di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
 			btrfs_dir_item_key_to_cpu(src, di, &di_key);
-			if ((btrfs_dir_transid(src, di) == trans->transid ||
-			     btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
-			    di_key.type != BTRFS_ROOT_ITEM_KEY)
+			if (di_key.type != BTRFS_ROOT_ITEM_KEY)
 				ctx->log_new_dentries = true;
 		}
 
@@ -3860,7 +3885,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 	struct btrfs_root *log = root->log_root;
 	int err = 0;
 	int ret;
-	u64 first_offset = min_offset;
+	u64 last_old_dentry_offset = min_offset - 1;
 	u64 last_offset = (u64)-1;
 	u64 ino = btrfs_ino(inode);
 
@@ -3894,10 +3919,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 		 */
 		if (ret == 0) {
 			struct btrfs_key tmp;
+
 			btrfs_item_key_to_cpu(path->nodes[0], &tmp,
 					      path->slots[0]);
 			if (tmp.type == BTRFS_DIR_INDEX_KEY)
-				first_offset = max(min_offset, tmp.offset) + 1;
+				last_old_dentry_offset = tmp.offset;
 		}
 		goto done;
 	}
@@ -3917,7 +3943,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
 		 * previous key's offset plus 1, so that those deletes are replayed.
 		 */
 		if (tmp.type == BTRFS_DIR_INDEX_KEY)
-			first_offset = tmp.offset + 1;
+			last_old_dentry_offset = tmp.offset;
 	}
 	btrfs_release_path(path);
 
@@ -3939,7 +3965,8 @@ search:
 	 * from our directory
 	 */
 	while (1) {
-		ret = process_dir_items_leaf(trans, inode, path, dst_path, ctx);
+		ret = process_dir_items_leaf(trans, inode, path, dst_path, ctx,
+					     &last_old_dentry_offset);
 		if (ret != 0) {
 			if (ret < 0)
 				err = ret;
@@ -3990,13 +4017,21 @@ done:
 	if (err == 0) {
 		*last_offset_ret = last_offset;
 		/*
-		 * insert the log range keys to indicate where the log
-		 * is valid
+		 * In case the leaf was changed in the current transaction but
+		 * all its dir items are from a past transaction, the last item
+		 * in the leaf is a dir item and there's no gap between that last
+		 * dir item and the first one on the next leaf (which did not
+		 * change in the current transaction), then we don't need to log
+		 * a range, last_old_dentry_offset is == to last_offset.
 		 */
-		ret = insert_dir_log_key(trans, log, path, ino, first_offset,
-					 last_offset);
-		if (ret)
-			err = ret;
+		ASSERT(last_old_dentry_offset <= last_offset);
+		if (last_old_dentry_offset < last_offset) {
+			ret = insert_dir_log_key(trans, log, path, ino,
+						 last_old_dentry_offset + 1,
+						 last_offset);
+			if (ret)
+				err = ret;
+		}
 	}
 	return err;
 }
@@ -4038,7 +4073,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
 	if (inode->logged_trans != trans->transid)
 		inode->last_dir_index_offset = (u64)-1;
 
-	min_key = 0;
+	min_key = BTRFS_DIR_START_INDEX;
 	max_key = 0;
 	ctx->last_dir_item_offset = inode->last_dir_index_offset;
 
@@ -5911,7 +5946,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
 				struct btrfs_log_ctx *ctx)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_root *log = root->log_root;
 	struct btrfs_path *path;
 	LIST_HEAD(dir_list);
 	struct btrfs_dir_list *dir_elem;
@@ -5953,7 +5987,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
 		min_key.offset = 0;
 again:
 		btrfs_release_path(path);
-		ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+		ret = btrfs_search_forward(root, &min_key, path, trans->transid);
 		if (ret < 0) {
 			goto next_dir_inode;
 		} else if (ret > 0) {
@@ -5961,7 +5995,6 @@ again:
 			goto next_dir_inode;
 		}
 
-process_leaf:
 		leaf = path->nodes[0];
 		nritems = btrfs_header_nritems(leaf);
 		for (i = path->slots[0]; i < nritems; i++) {
@@ -6018,16 +6051,6 @@ process_leaf:
 			}
 			break;
 		}
-		if (i == nritems) {
-			ret = btrfs_next_leaf(log, path);
-			if (ret < 0) {
-				goto next_dir_inode;
-			} else if (ret > 0) {
-				ret = 0;
-				goto next_dir_inode;
-			}
-			goto process_leaf;
-		}
 		if (min_key.offset < (u64)-1) {
 			min_key.offset++;
 			goto again;
-- 
GitLab


From de6bc7f59896e888d388a2b469807a4d4b4ab55e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 15 Dec 2021 12:20:01 +0000
Subject: [PATCH 1336/1586] btrfs: stop trying to log subdirectories created in
 past transactions

When logging a directory we are trying to log subdirectories that were
changed in the current transaction and created in a past transaction.
This type of behaviour was introduced by commit 2f2ff0ee5e4303 ("Btrfs:
fix metadata inconsistencies after directory fsync"), to fix some metadata
inconsistencies that in the meanwhile no longer need this behaviour due to
numerous other changes that happened throughout the years.

This behaviour, besides not needed anymore, it's also undesirable because:

1) It's not reliable because it's only triggered for the directories
   of dentries (dir items) that happen to be present on a leaf that
   was changed in the current transaction. If a dentry that points to
   a directory resides on a leaf that was not changed in the current
   transaction, then it's not logged, as at log_dir_items() and
   log_new_dir_dentries() we use btrfs_search_forward();

2) It's not required by posix or any standard, it's undefined territory.
   The only way to guarantee a subdirectory is logged, it to explicitly
   fsync it;

Making the behaviour guaranteed would require scanning all directory
items, check which point to a directory, and then fsync each subdirectory
which was modified in the current transaction. This could be very
expensive for large directories with many subdirectories and/or large
subdirectories.

So remove that obsolete logic.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a367d62572c1f..a0364f16e7908 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6012,8 +6012,7 @@ again:
 
 			di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
 			type = btrfs_dir_type(leaf, di);
-			if (btrfs_dir_transid(leaf, di) < trans->transid &&
-			    type != BTRFS_FT_DIR)
+			if (btrfs_dir_transid(leaf, di) < trans->transid)
 				continue;
 			btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
 			if (di_key.type == BTRFS_ROOT_ITEM_KEY)
-- 
GitLab


From ff37c89f94be14b0e22a532d1e6d57187bfd5bb8 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 11 Jan 2022 18:00:26 +0200
Subject: [PATCH 1337/1586] btrfs: move missing device handling in a dedicate
 function

This simplifies the code flow in read_one_chunk and makes error handling
when handling missing devices a bit simpler by reducing it to a single
check if something went wrong. No functional changes.

Reviewed-by: Su Yue <l@damenly.su>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b07d382d53a86..76b6957c619be 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7060,6 +7060,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
 }
 #endif
 
+static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
+						  u64 devid, u8 *uuid)
+{
+	struct btrfs_device *dev;
+
+	if (!btrfs_test_opt(fs_info, DEGRADED)) {
+		btrfs_report_missing_device(fs_info, devid, uuid, true);
+		return ERR_PTR(-ENOENT);
+	}
+
+	dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
+	if (IS_ERR(dev)) {
+		btrfs_err(fs_info, "failed to init missing device %llu: %ld",
+			  devid, PTR_ERR(dev));
+		return dev;
+	}
+	btrfs_report_missing_device(fs_info, devid, uuid, false);
+
+	return dev;
+}
+
 static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
 			  struct btrfs_chunk *chunk)
 {
@@ -7147,28 +7168,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
 				   BTRFS_UUID_SIZE);
 		args.uuid = uuid;
 		map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
-		if (!map->stripes[i].dev &&
-		    !btrfs_test_opt(fs_info, DEGRADED)) {
-			free_extent_map(em);
-			btrfs_report_missing_device(fs_info, devid, uuid, true);
-			return -ENOENT;
-		}
 		if (!map->stripes[i].dev) {
-			map->stripes[i].dev =
-				add_missing_dev(fs_info->fs_devices, devid,
-						uuid);
+			map->stripes[i].dev = handle_missing_device(fs_info,
+								    devid, uuid);
 			if (IS_ERR(map->stripes[i].dev)) {
 				free_extent_map(em);
-				btrfs_err(fs_info,
-					"failed to init missing dev %llu: %ld",
-					devid, PTR_ERR(map->stripes[i].dev));
 				return PTR_ERR(map->stripes[i].dev);
 			}
-			btrfs_report_missing_device(fs_info, devid, uuid, false);
 		}
+
 		set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
 				&(map->stripes[i].dev->dev_state));
-
 	}
 
 	write_lock(&map_tree->lock);
-- 
GitLab


From 9ad1230533efb6d9f5865a351eca8f0617cf5b74 Mon Sep 17 00:00:00 2001
From: Sahil Kang <sahil.kang@asilaycomputing.com>
Date: Sat, 15 Jan 2022 18:48:47 -0800
Subject: [PATCH 1338/1586] btrfs: reuse existing inode from btrfs_ioctl

btrfs_ioctl extracts inode from file so we can pass that into the
callbacks.

Signed-off-by: Sahil Kang <sahil.kang@asilaycomputing.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 41 ++++++++++++++++-------------------------
 fs/btrfs/send.c  |  4 ++--
 fs/btrfs/send.h  |  2 +-
 3 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 70a08ef9c5ae4..dbb0a8dad770a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2227,10 +2227,9 @@ free_args:
 	return ret;
 }
 
-static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
 						void __user *arg)
 {
-	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret = 0;
@@ -2560,12 +2559,11 @@ err:
 	return ret;
 }
 
-static noinline int btrfs_ioctl_tree_search(struct file *file,
-					   void __user *argp)
+static noinline int btrfs_ioctl_tree_search(struct inode *inode,
+					    void __user *argp)
 {
 	struct btrfs_ioctl_search_args __user *uargs;
 	struct btrfs_ioctl_search_key sk;
-	struct inode *inode;
 	int ret;
 	size_t buf_size;
 
@@ -2579,7 +2577,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 
 	buf_size = sizeof(uargs->buf);
 
-	inode = file_inode(file);
 	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
 
 	/*
@@ -2594,12 +2591,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	return ret;
 }
 
-static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
 					       void __user *argp)
 {
 	struct btrfs_ioctl_search_args_v2 __user *uarg;
 	struct btrfs_ioctl_search_args_v2 args;
-	struct inode *inode;
 	int ret;
 	size_t buf_size;
 	const size_t buf_limit = SZ_16M;
@@ -2618,7 +2614,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
 	if (buf_size > buf_limit)
 		buf_size = buf_limit;
 
-	inode = file_inode(file);
 	ret = search_ioctl(inode, &args.key, &buf_size,
 			   (char __user *)(&uarg->buf[0]));
 	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
@@ -2952,7 +2947,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
 }
 
 /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
-static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
 {
 	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
 	struct btrfs_fs_info *fs_info;
@@ -2964,7 +2959,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 	struct extent_buffer *leaf;
 	unsigned long item_off;
 	unsigned long item_len;
-	struct inode *inode;
 	int slot;
 	int ret = 0;
 
@@ -2978,7 +2972,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 		return -ENOMEM;
 	}
 
-	inode = file_inode(file);
 	fs_info = BTRFS_I(inode)->root->fs_info;
 
 	/* Get root_item of inode's subvolume */
@@ -3072,12 +3065,11 @@ out_free:
  * Return ROOT_REF information of the subvolume containing this inode
  * except the subvolume name.
  */
-static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *subvol_root,
+static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
 					  void __user *argp)
 {
 	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
 	struct btrfs_root_ref *rref;
-	struct btrfs_root *root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
@@ -3096,14 +3088,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *subvol_root,
 		return PTR_ERR(rootrefs);
 	}
 
-	root = subvol_root->fs_info->tree_root;
-	objectid = subvol_root->root_key.objectid;
-
+	objectid = root->root_key.objectid;
 	key.objectid = objectid;
 	key.type = BTRFS_ROOT_REF_KEY;
 	key.offset = rootrefs->min_treeid;
 	found = 0;
 
+	root = root->fs_info->tree_root;
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0) {
 		goto out;
@@ -5143,7 +5134,7 @@ out_drop_write:
 	return ret;
 }
 
-static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
+static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
 {
 	struct btrfs_ioctl_send_args *arg;
 	int ret;
@@ -5173,7 +5164,7 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
 		if (IS_ERR(arg))
 			return PTR_ERR(arg);
 	}
-	ret = btrfs_ioctl_send(file, arg);
+	ret = btrfs_ioctl_send(inode, arg);
 	kfree(arg);
 	return ret;
 }
@@ -5208,7 +5199,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SNAP_DESTROY_V2:
 		return btrfs_ioctl_snap_destroy(file, argp, true);
 	case BTRFS_IOC_SUBVOL_GETFLAGS:
-		return btrfs_ioctl_subvol_getflags(file, argp);
+		return btrfs_ioctl_subvol_getflags(inode, argp);
 	case BTRFS_IOC_SUBVOL_SETFLAGS:
 		return btrfs_ioctl_subvol_setflags(file, argp);
 	case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -5232,9 +5223,9 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_BALANCE:
 		return btrfs_ioctl_balance(file, NULL);
 	case BTRFS_IOC_TREE_SEARCH:
-		return btrfs_ioctl_tree_search(file, argp);
+		return btrfs_ioctl_tree_search(inode, argp);
 	case BTRFS_IOC_TREE_SEARCH_V2:
-		return btrfs_ioctl_tree_search_v2(file, argp);
+		return btrfs_ioctl_tree_search_v2(inode, argp);
 	case BTRFS_IOC_INO_LOOKUP:
 		return btrfs_ioctl_ino_lookup(root, argp);
 	case BTRFS_IOC_INO_PATHS:
@@ -5283,10 +5274,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_set_received_subvol_32(file, argp);
 #endif
 	case BTRFS_IOC_SEND:
-		return _btrfs_ioctl_send(file, argp, false);
+		return _btrfs_ioctl_send(inode, argp, false);
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 	case BTRFS_IOC_SEND_32:
-		return _btrfs_ioctl_send(file, argp, true);
+		return _btrfs_ioctl_send(inode, argp, true);
 #endif
 	case BTRFS_IOC_GET_DEV_STATS:
 		return btrfs_ioctl_get_dev_stats(fs_info, argp);
@@ -5313,7 +5304,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SET_FEATURES:
 		return btrfs_ioctl_set_features(file, argp);
 	case BTRFS_IOC_GET_SUBVOL_INFO:
-		return btrfs_ioctl_get_subvol_info(file, argp);
+		return btrfs_ioctl_get_subvol_info(inode, argp);
 	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
 		return btrfs_ioctl_get_subvol_rootref(root, argp);
 	case BTRFS_IOC_INO_LOOKUP_USER:
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 201eb2628aea1..2845febb711b7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7477,10 +7477,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
 		      root->root_key.objectid, root->dedupe_in_progress);
 }
 
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 {
 	int ret = 0;
-	struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
+	struct btrfs_root *send_root = BTRFS_I(inode)->root;
 	struct btrfs_fs_info *fs_info = send_root->fs_info;
 	struct btrfs_root *clone_root;
 	struct send_ctx *sctx = NULL;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 23bcefc84e49a..08602fdd600a6 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -126,7 +126,7 @@ enum {
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 
 #ifdef __KERNEL__
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
 #endif
 
 #endif
-- 
GitLab


From bef16b52987b3cd18222109e2490b2f48018cd95 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 17 Jan 2022 23:50:39 +0800
Subject: [PATCH 1339/1586] btrfs: simplify fs_devices member access in
 btrfs_init_dev_replace_tgtdev

In btrfs_init_dev_replace_tgtdev() we dereference fs_info to get
fs_devices many times, instead save a point to the fs_devices.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 62b9651ea6629..1bd1bcee95c97 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out)
 {
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	struct btrfs_device *device;
 	struct block_device *bdev;
 	struct rcu_string *name;
@@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
 	sync_blockdev(bdev);
 
-	list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 		if (device->bdev == bdev) {
 			btrfs_err(fs_info,
 				  "target device is in the filesystem!");
@@ -320,17 +321,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	device->mode = FMODE_EXCL;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
-	device->fs_devices = fs_info->fs_devices;
+	device->fs_devices = fs_devices;
 
 	ret = btrfs_get_dev_zone_info(device, false);
 	if (ret)
 		goto error;
 
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
-	list_add(&device->dev_list, &fs_info->fs_devices->devices);
-	fs_info->fs_devices->num_devices++;
-	fs_info->fs_devices->open_devices++;
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
+	list_add(&device->dev_list, &fs_devices->devices);
+	fs_devices->num_devices++;
+	fs_devices->open_devices++;
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	*device_out = device;
 	return 0;
-- 
GitLab


From 770c79fb65506fc7c16459855c3839429f46cb32 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Wed, 12 Jan 2022 13:05:59 +0800
Subject: [PATCH 1340/1586] btrfs: harden identification of a stale device

Identifying and removing the stale device from the fs_uuids list is done
by btrfs_free_stale_devices().  btrfs_free_stale_devices() in turn
depends on device_path_matched() to check if the device appears in more
than one btrfs_device structure.

The matching of the device happens by its path, the device path. However,
when device mapper is in use, the dm device paths are nothing but a link
to the actual block device, which leads to the device_path_matched()
failing to match.

Fix this by matching the dev_t as provided by lookup_bdev() instead of
plain string compare of the device paths.

Reported-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 76b6957c619be..188e2a61402d0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -534,15 +534,48 @@ error:
 	return ret;
 }
 
-static bool device_path_matched(const char *path, struct btrfs_device *device)
+/*
+ * Check if the device in the path matches the device in the given struct device.
+ *
+ * Returns:
+ *   true  If it is the same device.
+ *   false If it is not the same device or on error.
+ */
+static bool device_matched(const struct btrfs_device *device, const char *path)
 {
-	int found;
+	char *device_name;
+	dev_t dev_old;
+	dev_t dev_new;
+	int ret;
+
+	/*
+	 * If we are looking for a device with the matching dev_t, then skip
+	 * device without a name (a missing device).
+	 */
+	if (!device->name)
+		return false;
+
+	device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
+	if (!device_name)
+		return false;
 
 	rcu_read_lock();
-	found = strcmp(rcu_str_deref(device->name), path);
+	scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name));
 	rcu_read_unlock();
 
-	return found == 0;
+	ret = lookup_bdev(device_name, &dev_old);
+	kfree(device_name);
+	if (ret)
+		return false;
+
+	ret = lookup_bdev(path, &dev_new);
+	if (ret)
+		return false;
+
+	if (dev_old == dev_new)
+		return true;
+
+	return false;
 }
 
 /*
@@ -575,9 +608,7 @@ static int btrfs_free_stale_devices(const char *path,
 					 &fs_devices->devices, dev_list) {
 			if (skip_device && skip_device == device)
 				continue;
-			if (path && !device->name)
-				continue;
-			if (path && !device_path_matched(path, device))
+			if (path && !device_matched(device, path))
 				continue;
 			if (fs_devices->opened) {
 				/* for an already deleted device return 0 */
-- 
GitLab


From 16cab91a0c8fea50a03ef9c49ca829e6c0c4cf66 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Wed, 12 Jan 2022 13:06:00 +0800
Subject: [PATCH 1341/1586] btrfs: match stale devices by dev_t

After the commit "btrfs: harden identification of the stale device", we
don't have to match the device path anymore. Instead, we match the dev_t.
So pass in the dev_t instead of the device path, in the call chain
btrfs_forget_devices()->btrfs_free_stale_devices().

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c   |  8 ++++++-
 fs/btrfs/volumes.c | 56 +++++++++++++++++++++++++---------------------
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4d947ba32da9d..1f4cda3745dc2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2383,6 +2383,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 {
 	struct btrfs_ioctl_vol_args *vol;
 	struct btrfs_device *device = NULL;
+	dev_t devt = 0;
 	int ret = -ENOTTY;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -2402,7 +2403,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_FORGET_DEV:
-		ret = btrfs_forget_devices(vol->name);
+		if (vol->name[0] != 0) {
+			ret = lookup_bdev(vol->name, &devt);
+			if (ret)
+				break;
+		}
+		ret = btrfs_forget_devices(devt);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 188e2a61402d0..b47dd13877666 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -541,11 +541,10 @@ error:
  *   true  If it is the same device.
  *   false If it is not the same device or on error.
  */
-static bool device_matched(const struct btrfs_device *device, const char *path)
+static bool device_matched(const struct btrfs_device *device, dev_t dev_new)
 {
 	char *device_name;
 	dev_t dev_old;
-	dev_t dev_new;
 	int ret;
 
 	/*
@@ -568,29 +567,26 @@ static bool device_matched(const struct btrfs_device *device, const char *path)
 	if (ret)
 		return false;
 
-	ret = lookup_bdev(path, &dev_new);
-	if (ret)
-		return false;
-
 	if (dev_old == dev_new)
 		return true;
 
 	return false;
 }
 
-/*
- *  Search and remove all stale (devices which are not mounted) devices.
+/**
+ *  Search and remove all stale devices (which are not mounted).
  *  When both inputs are NULL, it will search and release all stale devices.
- *  path:	Optional. When provided will it release all unmounted devices
- *		matching this path only.
- *  skip_dev:	Optional. Will skip this device when searching for the stale
+ *
+ *  @devt:	Optional. When provided will it release all unmounted devices
+ *		matching this devt only.
+ *  @skip_device:  Optional. Will skip this device when searching for the stale
  *		devices.
- *  Return:	0 for success or if @path is NULL.
- * 		-EBUSY if @path is a mounted device.
- * 		-ENOENT if @path does not match any device in the list.
+ *
+ *  Return:	0 for success or if @devt is 0.
+ *		-EBUSY if @devt is a mounted device.
+ *		-ENOENT if @devt does not match any device in the list.
  */
-static int btrfs_free_stale_devices(const char *path,
-				     struct btrfs_device *skip_device)
+static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
 {
 	struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
 	struct btrfs_device *device, *tmp_device;
@@ -598,7 +594,7 @@ static int btrfs_free_stale_devices(const char *path,
 
 	lockdep_assert_held(&uuid_mutex);
 
-	if (path)
+	if (devt)
 		ret = -ENOENT;
 
 	list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
@@ -608,11 +604,11 @@ static int btrfs_free_stale_devices(const char *path,
 					 &fs_devices->devices, dev_list) {
 			if (skip_device && skip_device == device)
 				continue;
-			if (path && !device_matched(device, path))
+			if (devt && !device_matched(device, devt))
 				continue;
 			if (fs_devices->opened) {
 				/* for an already deleted device return 0 */
-				if (path && ret != 0)
+				if (devt && ret != 0)
 					ret = -EBUSY;
 				break;
 			}
@@ -1362,12 +1358,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
 	return disk_super;
 }
 
-int btrfs_forget_devices(const char *path)
+int btrfs_forget_devices(dev_t devt)
 {
 	int ret;
 
 	mutex_lock(&uuid_mutex);
-	ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
+	ret = btrfs_free_stale_devices(devt, NULL);
 	mutex_unlock(&uuid_mutex);
 
 	return ret;
@@ -1416,9 +1412,15 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	}
 
 	device = device_list_add(path, disk_super, &new_device_added);
-	if (!IS_ERR(device)) {
-		if (new_device_added)
-			btrfs_free_stale_devices(path, device);
+	if (!IS_ERR(device) && new_device_added) {
+		dev_t devt;
+
+		/*
+		 * It is ok to ignore if we fail to free the stale device (if
+		 * any). As there is nothing much that can be done about it.
+		 */
+		if (lookup_bdev(path, &devt) == 0)
+			btrfs_free_stale_devices(devt, device);
 	}
 
 	btrfs_release_disk_super(disk_super);
@@ -2650,6 +2652,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	int ret = 0;
 	bool seeding_dev = false;
 	bool locked = false;
+	dev_t devt;
 
 	if (sb_rdonly(sb) && !fs_devices->seeding)
 		return -EROFS;
@@ -2842,10 +2845,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	 * Now that we have written a new super block to this device, check all
 	 * other fs_devices list if device_path alienates any other scanned
 	 * device.
+	 * Skip forget_deivces if lookup_bdev() fails as there is nothing much
+	 * that can be done about it.
 	 * We can ignore the return value as it typically returns -EINVAL and
 	 * only succeeds if the device was an alien.
 	 */
-	btrfs_forget_devices(device_path);
+	if (lookup_bdev(device_path, &devt) == 0)
+		btrfs_forget_devices(devt);
 
 	/* Update ctime/mtime for blkid or udev */
 	update_dev_time(device_path);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 005c9e2a491a1..38b898bc5f0eb 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -505,7 +505,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,
 					   fmode_t flags, void *holder);
-int btrfs_forget_devices(const char *path);
+int btrfs_forget_devices(dev_t devt);
 void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
 void btrfs_assign_next_active_device(struct btrfs_device *device,
-- 
GitLab


From 4889bc05a96e039b055bbd11438c40bf956f057b Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Wed, 12 Jan 2022 13:06:01 +0800
Subject: [PATCH 1342/1586] btrfs: add device major-minor info in the struct
 btrfs_device

Internally it is common to use the major-minor number to identify a
device and, at a few locations in btrfs, we use the major-minor number
to match the device.

So when we identify a new btrfs device through device add or device
replace or device-scan/ready save the device's major-minor (dev_t) in the
struct btrfs_device so that we don't have to call lookup_bdev() again.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c |  3 +++
 fs/btrfs/volumes.c     | 40 +++++++++++++++-------------------------
 fs/btrfs/volumes.h     |  5 +++++
 3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1bd1bcee95c97..71fd99b482832 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -303,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 		goto error;
 	}
 	rcu_assign_pointer(device->name, name);
+	ret = lookup_bdev(device_path, &device->devt);
+	if (ret)
+		goto error;
 
 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	device->generation = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b47dd13877666..ebe655eb8b4df 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -808,11 +808,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 	struct rcu_string *name;
 	u64 found_transid = btrfs_super_generation(disk_super);
 	u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
+	dev_t path_devt;
+	int error;
 	bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
 		BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
 	bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
 					BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
 
+	error = lookup_bdev(path, &path_devt);
+	if (error)
+		return ERR_PTR(error);
+
 	if (fsid_change_in_progress) {
 		if (!has_metadata_uuid)
 			fs_devices = find_fsid_inprogress(disk_super);
@@ -895,6 +901,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 			return ERR_PTR(-ENOMEM);
 		}
 		rcu_assign_pointer(device->name, name);
+		device->devt = path_devt;
 
 		list_add_rcu(&device->dev_list, &fs_devices->devices);
 		fs_devices->num_devices++;
@@ -957,16 +964,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		 * make sure it's the same device if the device is mounted
 		 */
 		if (device->bdev) {
-			int error;
-			dev_t path_dev;
-
-			error = lookup_bdev(path, &path_dev);
-			if (error) {
-				mutex_unlock(&fs_devices->device_list_mutex);
-				return ERR_PTR(error);
-			}
-
-			if (device->bdev->bd_dev != path_dev) {
+			if (device->devt != path_devt) {
 				mutex_unlock(&fs_devices->device_list_mutex);
 				/*
 				 * device->fs_info may not be reliable here, so
@@ -999,6 +997,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 			fs_devices->missing_devices--;
 			clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
 		}
+		device->devt = path_devt;
 	}
 
 	/*
@@ -1412,16 +1411,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	}
 
 	device = device_list_add(path, disk_super, &new_device_added);
-	if (!IS_ERR(device) && new_device_added) {
-		dev_t devt;
-
-		/*
-		 * It is ok to ignore if we fail to free the stale device (if
-		 * any). As there is nothing much that can be done about it.
-		 */
-		if (lookup_bdev(path, &devt) == 0)
-			btrfs_free_stale_devices(devt, device);
-	}
+	if (!IS_ERR(device) && new_device_added)
+		btrfs_free_stale_devices(device->devt, device);
 
 	btrfs_release_disk_super(disk_super);
 
@@ -2652,7 +2643,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	int ret = 0;
 	bool seeding_dev = false;
 	bool locked = false;
-	dev_t devt;
 
 	if (sb_rdonly(sb) && !fs_devices->seeding)
 		return -EROFS;
@@ -2702,6 +2692,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	device->fs_info = fs_info;
 	device->bdev = bdev;
+	ret = lookup_bdev(device_path, &device->devt);
+	if (ret)
+		goto error_free_device;
 
 	ret = btrfs_get_dev_zone_info(device, false);
 	if (ret)
@@ -2845,13 +2838,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	 * Now that we have written a new super block to this device, check all
 	 * other fs_devices list if device_path alienates any other scanned
 	 * device.
-	 * Skip forget_deivces if lookup_bdev() fails as there is nothing much
-	 * that can be done about it.
 	 * We can ignore the return value as it typically returns -EINVAL and
 	 * only succeeds if the device was an alien.
 	 */
-	if (lookup_bdev(device_path, &devt) == 0)
-		btrfs_forget_devices(devt);
+	btrfs_forget_devices(device->devt);
 
 	/* Update ctime/mtime for blkid or udev */
 	update_dev_time(device_path);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 38b898bc5f0eb..bd297f23d19e7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -72,6 +72,11 @@ struct btrfs_device {
 	/* the mode sent to blkdev_get */
 	fmode_t mode;
 
+	/*
+	 * Device's major-minor number. Must be set even if the device is not
+	 * opened (bdev == NULL), unless the device is missing.
+	 */
+	dev_t devt;
 	unsigned long dev_state;
 	blk_status_t last_flush_error;
 
-- 
GitLab


From 330a5bf4551784f2f0baef27f2c78550e40fc8a0 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Wed, 12 Jan 2022 13:06:02 +0800
Subject: [PATCH 1343/1586] btrfs: use dev_t to match device in device_matched

Commit "btrfs: add device major-minor info in the struct btrfs_device"
saved the device major-minor number in the struct btrfs_device upon
discovering it.

So no need to lookup_bdev() again just match, which means
device_matched() can go away.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 41 +----------------------------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ebe655eb8b4df..1e1120ed9f8c6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -534,45 +534,6 @@ error:
 	return ret;
 }
 
-/*
- * Check if the device in the path matches the device in the given struct device.
- *
- * Returns:
- *   true  If it is the same device.
- *   false If it is not the same device or on error.
- */
-static bool device_matched(const struct btrfs_device *device, dev_t dev_new)
-{
-	char *device_name;
-	dev_t dev_old;
-	int ret;
-
-	/*
-	 * If we are looking for a device with the matching dev_t, then skip
-	 * device without a name (a missing device).
-	 */
-	if (!device->name)
-		return false;
-
-	device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
-	if (!device_name)
-		return false;
-
-	rcu_read_lock();
-	scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name));
-	rcu_read_unlock();
-
-	ret = lookup_bdev(device_name, &dev_old);
-	kfree(device_name);
-	if (ret)
-		return false;
-
-	if (dev_old == dev_new)
-		return true;
-
-	return false;
-}
-
 /**
  *  Search and remove all stale devices (which are not mounted).
  *  When both inputs are NULL, it will search and release all stale devices.
@@ -604,7 +565,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
 					 &fs_devices->devices, dev_list) {
 			if (skip_device && skip_device == device)
 				continue;
-			if (devt && !device_matched(device, devt))
+			if (devt && devt != device->devt)
 				continue;
 			if (fs_devices->opened) {
 				/* for an already deleted device return 0 */
-- 
GitLab


From 823f8e5c1f06a050af61137f171d1f1966b0982c Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 13 Jan 2022 12:44:10 +0800
Subject: [PATCH 1344/1586] btrfs: cleanup temporary variables when finding
 rotational device status

The pointer to struct request_queue is used only to get device type
rotating or the non-rotating. So use it directly.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1e1120ed9f8c6..a91e51b0ca811 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -602,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			struct btrfs_device *device, fmode_t flags,
 			void *holder)
 {
-	struct request_queue *q;
 	struct block_device *bdev;
 	struct btrfs_super_block *disk_super;
 	u64 devid;
@@ -644,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	}
 
-	q = bdev_get_queue(bdev);
-	if (!blk_queue_nonrot(q))
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
 		fs_devices->rotating = true;
 
 	device->bdev = bdev;
@@ -2591,7 +2589,6 @@ error:
 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
 {
 	struct btrfs_root *root = fs_info->dev_root;
-	struct request_queue *q;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct block_device *bdev;
@@ -2667,7 +2664,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		goto error_free_zone;
 	}
 
-	q = bdev_get_queue(bdev);
 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	device->generation = trans->transid;
 	device->io_width = fs_info->sectorsize;
@@ -2715,7 +2711,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
-	if (!blk_queue_nonrot(q))
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
 		fs_devices->rotating = true;
 
 	orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
-- 
GitLab


From c4bf190999953c26070a59172ed01b995521c5ed Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 21 Jan 2022 19:43:51 +0800
Subject: [PATCH 1345/1586] btrfs: zoned: remove redundant initialization of
 to_add

to_add is being initialized to len but this is never read as to_add is
overwritten later on. Remove the redundant initialization.

Cleans up the following clang-analyzer warning:

fs/btrfs/extent-tree.c:2769:8: warning: Value stored to 'to_add' during
its initialization is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 96427b1ecac3e..b7b49b4eb68db 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2766,12 +2766,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		spin_unlock(&cache->lock);
 		if (!readonly && return_free_space &&
 		    global_rsv->space_info == space_info) {
-			u64 to_add = len;
-
 			spin_lock(&global_rsv->lock);
 			if (!global_rsv->full) {
-				to_add = min(len, global_rsv->size -
-					     global_rsv->reserved);
+				u64 to_add = min(len, global_rsv->size -
+						      global_rsv->reserved);
+
 				global_rsv->reserved += to_add;
 				btrfs_space_info_update_bytes_may_use(fs_info,
 						space_info, to_add);
-- 
GitLab


From 5c07c53f2d273bda075cacf3d5e41a59cf634462 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 21 Jan 2022 19:42:24 +0800
Subject: [PATCH 1346/1586] btrfs: scrub: remove redundant initialization of
 increment

increment is being initialized to map->stripe_len but this is never
read as increment is overwritten later on. Remove the redundant
initialization.

Cleans up the following clang-analyzer warning:

fs/btrfs/scrub.c:3193:6: warning: Value stored to 'increment' during its
initialization is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2e9a322773f28..11089568b2879 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	u64 generation;
 	int mirror_num;
 	struct btrfs_key key;
-	u64 increment = map->stripe_len;
+	u64 increment;
 	u64 offset;
 	u64 extent_logical;
 	u64 extent_physical;
-- 
GitLab


From db5df254120004471e1c957957ab2f1e612dcbd6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 13 Jan 2022 17:16:18 +0200
Subject: [PATCH 1347/1586] btrfs: move QUOTA_ENABLED check to
 rescan_should_stop from btrfs_qgroup_rescan_worker

Instead of having 2 places that short circuit the qgroup leaf scan have
everything in the qgroup_rescan_leaf function. In addition to that, also
ensure that the inconsistent qgroup flag is set when rescan_should_stop
returns true. This both retains the old behavior when -EINTR was set in
the body of the loop and at the same time also extends this behavior
when scanning is interrupted due to remount or unmount operations.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 30d42ea655ce9..4a9bde1e2c50f 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3268,7 +3268,8 @@ out:
 static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
 {
 	return btrfs_fs_closing(fs_info) ||
-		test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+		test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
+		!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 }
 
 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@@ -3298,11 +3299,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 			err = PTR_ERR(trans);
 			break;
 		}
-		if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-			err = -EINTR;
-		} else {
-			err = qgroup_rescan_leaf(trans, path);
-		}
+
+		err = qgroup_rescan_leaf(trans, path);
+
 		if (err > 0)
 			btrfs_commit_transaction(trans);
 		else
@@ -3316,7 +3315,7 @@ out:
 	if (err > 0 &&
 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-	} else if (err < 0) {
+	} else if (err < 0 || stopped) {
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	}
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
-- 
GitLab


From 0292ecf19b353e255be93afa133955c3cd78f784 Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Tue, 11 Jan 2022 01:57:16 +0000
Subject: [PATCH 1348/1586] btrfs: send: remove redundant ret variable in
 fs_path_copy

Return value from fs_path_add_path() directly instead of taking this in
another redundant variable.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: CGEL ZTE <cgel.zte@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2845febb711b7..7d16429372740 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -528,17 +528,12 @@ out:
 
 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 {
-	int ret;
-
 	p->reversed = from->reversed;
 	fs_path_reset(p);
 
-	ret = fs_path_add_path(p, from);
-
-	return ret;
+	return fs_path_add_path(p, from);
 }
 
-
 static void fs_path_unreverse(struct fs_path *p)
 {
 	char *tmp;
-- 
GitLab


From 839061fe88beacb911bbb9412d18db3e61160e4f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:06 +0000
Subject: [PATCH 1349/1586] btrfs: add helper to delete a dir entry from a log
 tree

Move the code that finds and deletes a logged dir entry out of
btrfs_del_dir_entries_in_log() into a helper function. This new helper
function will be used by another patch in the same series, and serves
to avoid having duplicated logic.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 70 +++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a0364f16e7908..2f4f34924c697 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3508,6 +3508,41 @@ static bool inode_logged(struct btrfs_trans_handle *trans,
 	return false;
 }
 
+/*
+ * Delete a directory entry from the log if it exists.
+ *
+ * Returns < 0 on error
+ *           1 if the entry does not exists
+ *           0 if the entry existed and was successfully deleted
+ */
+static int del_logged_dentry(struct btrfs_trans_handle *trans,
+			     struct btrfs_root *log,
+			     struct btrfs_path *path,
+			     u64 dir_ino,
+			     const char *name, int name_len,
+			     u64 index)
+{
+	struct btrfs_dir_item *di;
+
+	/*
+	 * We only log dir index items of a directory, so we don't need to look
+	 * for dir item keys.
+	 */
+	di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
+					 index, name, name_len, -1);
+	if (IS_ERR(di))
+		return PTR_ERR(di);
+	else if (!di)
+		return 1;
+
+	/*
+	 * We do not need to update the size field of the directory's
+	 * inode item because on log replay we update the field to reflect
+	 * all existing entries in the directory (see overwrite_item()).
+	 */
+	return btrfs_delete_one_dir_name(trans, log, path, di);
+}
+
 /*
  * If both a file and directory are logged, and unlinks or renames are
  * mixed in, we have a few interesting corners:
@@ -3534,12 +3569,8 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 				  const char *name, int name_len,
 				  struct btrfs_inode *dir, u64 index)
 {
-	struct btrfs_root *log;
-	struct btrfs_dir_item *di;
 	struct btrfs_path *path;
 	int ret;
-	int err = 0;
-	u64 dir_ino = btrfs_ino(dir);
 
 	if (!inode_logged(trans, dir))
 		return;
@@ -3550,41 +3581,18 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 
 	mutex_lock(&dir->log_mutex);
 
-	log = root->log_root;
 	path = btrfs_alloc_path();
 	if (!path) {
-		err = -ENOMEM;
+		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
-	/*
-	 * We only log dir index items of a directory, so we don't need to look
-	 * for dir item keys.
-	 */
-	di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
-					 index, name, name_len, -1);
-	if (IS_ERR(di)) {
-		err = PTR_ERR(di);
-		goto fail;
-	}
-	if (di) {
-		ret = btrfs_delete_one_dir_name(trans, log, path, di);
-		if (ret) {
-			err = ret;
-			goto fail;
-		}
-	}
-
-	/*
-	 * We do not need to update the size field of the directory's inode item
-	 * because on log replay we update the field to reflect all existing
-	 * entries in the directory (see overwrite_item()).
-	 */
-fail:
+	ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir),
+				name, name_len, index);
 	btrfs_free_path(path);
 out_unlock:
 	mutex_unlock(&dir->log_mutex);
-	if (err < 0)
+	if (ret < 0)
 		btrfs_set_log_full_commit(trans);
 	btrfs_end_log_trans(root);
 }
-- 
GitLab


From d5f5bd546552a94eefd68c42f40f778c40a89d2c Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:07 +0000
Subject: [PATCH 1350/1586] btrfs: pass the dentry to btrfs_log_new_name()
 instead of the inode

In the next patch in the series, there will be the need to access the old
name, and its length, of an inode when logging the inode during a rename.
So instead of passing the inode to btrfs_log_new_name() pass the dentry,
because from the dentry we can get the inode, the name and its length.

This will avoid passing 3 new parameters to btrfs_log_new_name() in the
next patch - the name, its length and an index number. This way we end
up passing only 1 new parameter, the index number.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c    |  8 ++++----
 fs/btrfs/tree-log.c | 19 +++++++++++++++----
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5c7aeb588313a..8a172e76aadbb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6531,7 +6531,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 				goto fail;
 		}
 		d_instantiate(dentry, inode);
-		btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
+		btrfs_log_new_name(trans, old_dentry, NULL, parent);
 	}
 
 fail:
@@ -9211,13 +9211,13 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		BTRFS_I(new_inode)->dir_index = new_idx;
 
 	if (root_log_pinned) {
-		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
 				   new_dentry->d_parent);
 		btrfs_end_log_trans(root);
 		root_log_pinned = false;
 	}
 	if (dest_log_pinned) {
-		btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
+		btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
 				   old_dentry->d_parent);
 		btrfs_end_log_trans(dest);
 		dest_log_pinned = false;
@@ -9498,7 +9498,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 		BTRFS_I(old_inode)->dir_index = index;
 
 	if (log_pinned) {
-		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
+		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
 				   new_dentry->d_parent);
 		btrfs_end_log_trans(root);
 		log_pinned = false;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2f4f34924c697..0bb0b0439615f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6788,14 +6788,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
 	mutex_unlock(&dir->log_mutex);
 }
 
-/*
- * Call this after adding a new name for a file and it will properly
- * update the log to reflect the new name.
+/**
+ * Update the log after adding a new name for an inode.
+ *
+ * @trans:              Transaction handle.
+ * @old_dentry:         The dentry associated with the old name and the old
+ *                      parent directory.
+ * @old_dir:            The inode of the previous parent directory for the case
+ *                      of a rename. For a link operation, it must be NULL.
+ * @parent:             The dentry associated with the directory under which the
+ *                      new name is located.
+ *
+ * Call this after adding a new name for an inode, as a result of a link or
+ * rename operation, and it will properly update the log to reflect the new name.
  */
 void btrfs_log_new_name(struct btrfs_trans_handle *trans,
-			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+			struct dentry *old_dentry, struct btrfs_inode *old_dir,
 			struct dentry *parent)
 {
+	struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
 	struct btrfs_log_ctx ctx;
 
 	/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index f6811c3df38a6..e69411f308edb 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -86,7 +86,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
 				   struct btrfs_inode *dir);
 void btrfs_log_new_name(struct btrfs_trans_handle *trans,
-			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+			struct dentry *old_dentry, struct btrfs_inode *old_dir,
 			struct dentry *parent);
 
 #endif
-- 
GitLab


From 88d2beec7e53fc500a5ac99beb254e6079d03543 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:08 +0000
Subject: [PATCH 1351/1586] btrfs: avoid logging all directory changes during
 renames

When doing a rename of a file, if the file or its old parent directory
were logged before, we log the new name of the file and then make sure
we log the old parent directory, to ensure that after a log replay the
old name of the file is deleted and the new name added.

The logging of the old parent directory can take some time, because it
will scan all leaves modified in the current transaction, check which
directory entries were already logged, copy the ones that were not
logged before, etc. In this rename context all we need to do is make
sure that the old name of the file is deleted on log replay, so instead
of triggering a directory log operation, we can just delete the old
directory entry from the log if it's there, or in case it isn't there,
just log a range item to signal log replay that the old name must be
deleted. So change btrfs_log_new_name() to do that.

This scenario is actually not uncommon to trigger, and recently on a
5.15 kernel, an openSUSE Tumbleweed user reported package installations
and upgrades, with the zypper tool, were often taking a long time to
complete, much more than usual. With strace it could be observed that
zypper was spending over 99% of its time on rename operations, and then
with further analysis we checked that directory logging was happening
too frequently and causing high latencies for the rename operations.
Taking into account that installation/upgrade of some of these packages
needed about a few thousand file renames, the slowdown was very noticeable
for the user.

The issue was caused indirectly due to an excessive number of inode
evictions on a 5.15 kernel, about 100x more compared to a 5.13, 5.14
or a 5.16-rc8 kernel. After an inode eviction we can't tell for sure,
in an efficient way, if an inode was previously logged in the current
transaction, so we are pessimistic and assume it was, because in case
it was we need to update the logged inode. More details on that in one
of the patches in the same series (subject "btrfs: avoid inode logging
during rename and link when possible"). Either way, in case the parent
directory was logged before, we currently do more work then necessary
during a rename, and this change minimizes that amount of work.

The following script mimics part of what a package installation/upgrade
with zypper does, which is basically renaming a lot of files, in some
directory under /usr, to a name with a suffix of "-RPMDELETE":

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/nvme0n1
  MNT=/mnt/nvme0n1

  NUM_FILES=10000

  mkfs.btrfs -f $DEV
  mount $DEV $MNT

  mkdir $MNT/testdir

  for ((i = 1; i <= $NUM_FILES; i++)); do
      echo -n > $MNT/testdir/file_$i
  done

  sync

  # Do some change to testdir and fsync it.
  echo -n > $MNT/testdir/file_$((NUM_FILES + 1))
  xfs_io -c "fsync" $MNT/testdir

  echo "Renaming $NUM_FILES files..."
  start=$(date +%s%N)
  for ((i = 1; i <= $NUM_FILES; i++)); do
      mv $MNT/testdir/file_$i $MNT/testdir/file_$i-RPMDELETE
  done
  end=$(date +%s%N)

  dur=$(( (end - start) / 1000000 ))
  echo "Renames took $dur milliseconds"

  umount $MNT

Testing this change on box using a non-debug kernel (Debian's default
kernel config) gave the following results:

NUM_FILES=10000, before this patch: 27399 ms
NUM_FILES=10000, after this patch:   9093 ms (-66.8%)

NUM_FILES=5000, before this patch:   9241 ms
NUM_FILES=5000, after this patch:    4642 ms (-49.8%)

NUM_FILES=2000, before this patch:   2550 ms
NUM_FILES=2000, after this patch:    1788 ms (-29.9%)

NUM_FILES=1000, before this patch:   1088 ms
NUM_FILES=1000, after this patch:     905 ms (-16.9%)

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1193549
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c    | 33 ++++++++++++++++------
 fs/btrfs/tree-log.c | 67 +++++++++++++++++++++++++++++++++++----------
 fs/btrfs/tree-log.h |  2 +-
 3 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8a172e76aadbb..50656edad939b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -66,6 +66,11 @@ struct btrfs_dio_data {
 	struct extent_changeset *data_reserved;
 };
 
+struct btrfs_rename_ctx {
+	/* Output field. Stores the index number of the old directory entry. */
+	u64 index;
+};
+
 static const struct inode_operations btrfs_dir_inode_operations;
 static const struct inode_operations btrfs_symlink_inode_operations;
 static const struct inode_operations btrfs_special_inode_operations;
@@ -4062,7 +4067,8 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
 static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 				struct btrfs_inode *dir,
 				struct btrfs_inode *inode,
-				const char *name, int name_len)
+				const char *name, int name_len,
+				struct btrfs_rename_ctx *rename_ctx)
 {
 	struct btrfs_root *root = dir->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4118,6 +4124,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		goto err;
 	}
 skip_backref:
+	if (rename_ctx)
+		rename_ctx->index = index;
+
 	ret = btrfs_delete_delayed_dir_index(trans, dir, index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
@@ -4158,7 +4167,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		       const char *name, int name_len)
 {
 	int ret;
-	ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len);
+	ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
 	if (!ret) {
 		drop_nlink(&inode->vfs_inode);
 		ret = btrfs_update_inode(trans, inode->root, inode);
@@ -6531,7 +6540,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 				goto fail;
 		}
 		d_instantiate(dentry, inode);
-		btrfs_log_new_name(trans, old_dentry, NULL, parent);
+		btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
 	}
 
 fail:
@@ -9024,6 +9033,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	struct inode *new_inode = new_dentry->d_inode;
 	struct inode *old_inode = old_dentry->d_inode;
 	struct timespec64 ctime = current_time(old_inode);
+	struct btrfs_rename_ctx old_rename_ctx;
+	struct btrfs_rename_ctx new_rename_ctx;
 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
 	u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
 	u64 old_idx = 0;
@@ -9164,7 +9175,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
 					   BTRFS_I(old_dentry->d_inode),
 					   old_dentry->d_name.name,
-					   old_dentry->d_name.len);
+					   old_dentry->d_name.len,
+					   &old_rename_ctx);
 		if (!ret)
 			ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
 	}
@@ -9180,7 +9192,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
 					   BTRFS_I(new_dentry->d_inode),
 					   new_dentry->d_name.name,
-					   new_dentry->d_name.len);
+					   new_dentry->d_name.len,
+					   &new_rename_ctx);
 		if (!ret)
 			ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
 	}
@@ -9212,13 +9225,13 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 
 	if (root_log_pinned) {
 		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
-				   new_dentry->d_parent);
+				   old_rename_ctx.index, new_dentry->d_parent);
 		btrfs_end_log_trans(root);
 		root_log_pinned = false;
 	}
 	if (dest_log_pinned) {
 		btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
-				   old_dentry->d_parent);
+				   new_rename_ctx.index, old_dentry->d_parent);
 		btrfs_end_log_trans(dest);
 		dest_log_pinned = false;
 	}
@@ -9324,6 +9337,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 	struct btrfs_root *dest = BTRFS_I(new_dir)->root;
 	struct inode *new_inode = d_inode(new_dentry);
 	struct inode *old_inode = d_inode(old_dentry);
+	struct btrfs_rename_ctx rename_ctx;
 	u64 index = 0;
 	int ret;
 	int ret2;
@@ -9455,7 +9469,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 		ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
 					BTRFS_I(d_inode(old_dentry)),
 					old_dentry->d_name.name,
-					old_dentry->d_name.len);
+					old_dentry->d_name.len,
+					&rename_ctx);
 		if (!ret)
 			ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
 	}
@@ -9499,7 +9514,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 
 	if (log_pinned) {
 		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
-				   new_dentry->d_parent);
+				   rename_ctx.index, new_dentry->d_parent);
 		btrfs_end_log_trans(root);
 		log_pinned = false;
 	}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0bb0b0439615f..44719e9c48f30 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6796,6 +6796,9 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
  *                      parent directory.
  * @old_dir:            The inode of the previous parent directory for the case
  *                      of a rename. For a link operation, it must be NULL.
+ * @old_dir_index:      The index number associated with the old name, meaningful
+ *                      only for rename operations (when @old_dir is not NULL).
+ *                      Ignored for link operations.
  * @parent:             The dentry associated with the directory under which the
  *                      new name is located.
  *
@@ -6804,7 +6807,7 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
  */
 void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct dentry *old_dentry, struct btrfs_inode *old_dir,
-			struct dentry *parent)
+			u64 old_dir_index, struct dentry *parent)
 {
 	struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
 	struct btrfs_log_ctx ctx;
@@ -6826,20 +6829,56 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 
 	/*
 	 * If we are doing a rename (old_dir is not NULL) from a directory that
-	 * was previously logged, make sure the next log attempt on the directory
-	 * is not skipped and logs the inode again. This is because the log may
-	 * not currently be authoritative for a range including the old
-	 * BTRFS_DIR_INDEX_KEY key, so we want to make sure after a log replay we
-	 * do not end up with both the new and old dentries around (in case the
-	 * inode is a directory we would have a directory with two hard links and
-	 * 2 inode references for different parents). The next log attempt of
-	 * old_dir will happen at btrfs_log_all_parents(), called through
-	 * btrfs_log_inode_parent() below, because we have previously set
-	 * inode->last_unlink_trans to the current transaction ID, either here or
-	 * at btrfs_record_unlink_dir() in case the inode is a directory.
+	 * was previously logged, make sure that on log replay we get the old
+	 * dir entry deleted. This is needed because we will also log the new
+	 * name of the renamed inode, so we need to make sure that after log
+	 * replay we don't end up with both the new and old dir entries existing.
 	 */
-	if (old_dir)
-		old_dir->logged_trans = 0;
+	if (old_dir && old_dir->logged_trans == trans->transid) {
+		struct btrfs_root *log = old_dir->root->log_root;
+		struct btrfs_path *path;
+		int ret;
+
+		ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
+
+		path = btrfs_alloc_path();
+		if (!path) {
+			btrfs_set_log_full_commit(trans);
+			return;
+		}
+
+		/*
+		 * Other concurrent task might be logging the old directory,
+		 * as it can be triggered when logging other inode that had or
+		 * still has a dentry in the old directory. So take the old
+		 * directory's log_mutex to prevent getting an -EEXIST when
+		 * logging a key to record the deletion, or having that other
+		 * task logging the old directory get an -EEXIST if it attempts
+		 * to log the same key after we just did it. In both cases that
+		 * would result in falling back to a transaction commit.
+		 */
+		mutex_lock(&old_dir->log_mutex);
+		ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
+					old_dentry->d_name.name,
+					old_dentry->d_name.len, old_dir_index);
+		if (ret > 0) {
+			/*
+			 * The dentry does not exist in the log, so record its
+			 * deletion.
+			 */
+			btrfs_release_path(path);
+			ret = insert_dir_log_key(trans, log, path,
+						 btrfs_ino(old_dir),
+						 old_dir_index, old_dir_index);
+		}
+		mutex_unlock(&old_dir->log_mutex);
+
+		btrfs_free_path(path);
+		if (ret < 0) {
+			btrfs_set_log_full_commit(trans);
+			return;
+		}
+	}
 
 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
 	ctx.logging_new_name = true;
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index e69411f308edb..f1acb7fa944cf 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -87,6 +87,6 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
 				   struct btrfs_inode *dir);
 void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct dentry *old_dentry, struct btrfs_inode *old_dir,
-			struct dentry *parent);
+			u64 old_dir_index, struct dentry *parent);
 
 #endif
-- 
GitLab


From 259c4b96d78dda8477a3ac21d6b3cf0eb9f75c8b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:09 +0000
Subject: [PATCH 1352/1586] btrfs: stop doing unnecessary log updates during a
 rename

During a rename, we call __btrfs_unlink_inode(), which will call
btrfs_del_inode_ref_in_log() and btrfs_del_dir_entries_in_log(), in order
to remove an inode reference and a directory entry from the log. These
are necessary when __btrfs_unlink_inode() is called from the unlink path,
but not necessary when it's called from a rename context, because:

1) For the btrfs_del_inode_ref_in_log() call, it's pointless to delete the
   inode reference related to the old name, because later in the rename
   path we call btrfs_log_new_name(), which will drop all inode references
   from the log and copy all inode references from the subvolume tree to
   the log tree. So we are doing one unnecessary btree operation which
   adds additional latency and lock contention in case there are other
   tasks accessing the log tree;

2) For the btrfs_del_dir_entries_in_log() call, we are now doing the
   equivalent at btrfs_log_new_name() since the previous patch in the
   series, that has the subject "btrfs: avoid logging all directory
   changes during renames". In fact, having __btrfs_unlink_inode() call
   this function not only adds additional latency and lock contention due
   to the extra btree operation, but also can make btrfs_log_new_name()
   unnecessarily log a range item to track the deletion of the old name,
   since it has no way to known that the directory entry related to the
   old name was previously logged and already deleted by
   __btrfs_unlink_inode() through its call to
   btrfs_del_dir_entries_in_log().

So skip those calls at __btrfs_unlink_inode() when we are doing a rename.
Skipping them also allows us now to reduce the duration of time we are
pinning a log transaction during renames, which is always beneficial as
it's not delaying so much other tasks trying to sync the log tree, in
particular we end up not holding the log transaction pinned while adding
the new name (adding inode ref, directory entry, etc).

This change is part of a patchset comprised of the following patches:

  1/5 btrfs: add helper to delete a dir entry from a log tree
  2/5 btrfs: pass the dentry to btrfs_log_new_name() instead of the inode
  3/5 btrfs: avoid logging all directory changes during renames
  4/5 btrfs: stop doing unnecessary log updates during a rename
  5/5 btrfs: avoid inode logging during rename and link when possible

Just like the previous patch in the series, "btrfs: avoid logging all
directory changes during renames", the following script mimics part of
what a package installation/upgrade with zypper does, which is basically
renaming a lot of files, in some directory under /usr, to a name with a
suffix of "-RPMDELETE":

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/nvme0n1
  MNT=/mnt/nvme0n1

  NUM_FILES=10000

  mkfs.btrfs -f $DEV
  mount $DEV $MNT

  mkdir $MNT/testdir

  for ((i = 1; i <= $NUM_FILES; i++)); do
      echo -n > $MNT/testdir/file_$i
  done

  sync

  # Do some change to testdir and fsync it.
  echo -n > $MNT/testdir/file_$((NUM_FILES + 1))
  xfs_io -c "fsync" $MNT/testdir

  echo "Renaming $NUM_FILES files..."
  start=$(date +%s%N)
  for ((i = 1; i <= $NUM_FILES; i++)); do
      mv $MNT/testdir/file_$i $MNT/testdir/file_$i-RPMDELETE
  done
  end=$(date +%s%N)

  dur=$(( (end - start) / 1000000 ))
  echo "Renames took $dur milliseconds"

  umount $MNT

Testing this change on box a using a non-debug kernel (Debian's default
kernel config) gave the following results:

NUM_FILES=10000, before patchset:                   27399 ms
NUM_FILES=10000, after patches 1/5 to 3/5 applied:   9093 ms (-66.8%)
NUM_FILES=10000, after patches 1/5 to 4/5 applied:   9016 ms (-67.1%)

NUM_FILES=5000, before patchset:                     9241 ms
NUM_FILES=5000, after patches 1/5 to 3/5 applied:    4642 ms (-49.8%)
NUM_FILES=5000, after patches 1/5 to 4/5 applied:    4553 ms (-50.7%)

NUM_FILES=2000, before patchset:                     2550 ms
NUM_FILES=2000, after patches 1/5 to 3/5 applied:    1788 ms (-29.9%)
NUM_FILES=2000, after patches 1/5 to 4/5 applied:    1767 ms (-30.7%)

NUM_FILES=1000, before patchset:                     1088 ms
NUM_FILES=1000, after patches 1/5 to 3/5 applied:     905 ms (-16.9%)
NUM_FILES=1000, after patches 1/5 to 4/5 applied:     883 ms (-18.8%)

The next patch in the series (5/5), also contains dbench results after
applying to whole patchset.

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1193549
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c    | 140 ++++++++++----------------------------------
 fs/btrfs/tree-log.c |  34 ++++++++---
 2 files changed, 59 insertions(+), 115 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 50656edad939b..9e21fbcbac6d2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4133,9 +4133,18 @@ skip_backref:
 		goto err;
 	}
 
-	btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
-				   dir_ino);
-	btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index);
+	/*
+	 * If we are in a rename context, we don't need to update anything in the
+	 * log. That will be done later during the rename by btrfs_log_new_name().
+	 * Besides that, doing it here would only cause extra unncessary btree
+	 * operations on the log tree, increasing latency for applications.
+	 */
+	if (!rename_ctx) {
+		btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
+					   dir_ino);
+		btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
+					     index);
+	}
 
 	/*
 	 * If we have a pending delayed iput we could end up with the final iput
@@ -9041,8 +9050,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	u64 new_idx = 0;
 	int ret;
 	int ret2;
-	bool root_log_pinned = false;
-	bool dest_log_pinned = false;
 	bool need_abort = false;
 
 	/*
@@ -9145,29 +9152,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 				BTRFS_I(new_inode), 1);
 	}
 
-	/*
-	 * Now pin the logs of the roots. We do it to ensure that no other task
-	 * can sync the logs while we are in progress with the rename, because
-	 * that could result in an inconsistency in case any of the inodes that
-	 * are part of this rename operation were logged before.
-	 *
-	 * We pin the logs even if at this precise moment none of the inodes was
-	 * logged before. This is because right after we checked for that, some
-	 * other task fsyncing some other inode not involved with this rename
-	 * operation could log that one of our inodes exists.
-	 *
-	 * We don't need to pin the logs before the above calls to
-	 * btrfs_insert_inode_ref(), since those don't ever need to change a log.
-	 */
-	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_pin_log_trans(root);
-		root_log_pinned = true;
-	}
-	if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_pin_log_trans(dest);
-		dest_log_pinned = true;
-	}
-
 	/* src is a subvolume */
 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
 		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9223,46 +9207,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	if (new_inode->i_nlink == 1)
 		BTRFS_I(new_inode)->dir_index = new_idx;
 
-	if (root_log_pinned) {
+	/*
+	 * Now pin the logs of the roots. We do it to ensure that no other task
+	 * can sync the logs while we are in progress with the rename, because
+	 * that could result in an inconsistency in case any of the inodes that
+	 * are part of this rename operation were logged before.
+	 */
+	if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+		btrfs_pin_log_trans(root);
+	if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
+		btrfs_pin_log_trans(dest);
+
+	/* Do the log updates for all inodes. */
+	if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
 		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
 				   old_rename_ctx.index, new_dentry->d_parent);
-		btrfs_end_log_trans(root);
-		root_log_pinned = false;
-	}
-	if (dest_log_pinned) {
+	if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
 		btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
 				   new_rename_ctx.index, old_dentry->d_parent);
+
+	/* Now unpin the logs. */
+	if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+		btrfs_end_log_trans(root);
+	if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
 		btrfs_end_log_trans(dest);
-		dest_log_pinned = false;
-	}
 out_fail:
-	/*
-	 * If we have pinned a log and an error happened, we unpin tasks
-	 * trying to sync the log and force them to fallback to a transaction
-	 * commit if the log currently contains any of the inodes involved in
-	 * this rename operation (to ensure we do not persist a log with an
-	 * inconsistent state for any of these inodes or leading to any
-	 * inconsistencies when replayed). If the transaction was aborted, the
-	 * abortion reason is propagated to userspace when attempting to commit
-	 * the transaction. If the log does not contain any of these inodes, we
-	 * allow the tasks to sync it.
-	 */
-	if (ret && (root_log_pinned || dest_log_pinned)) {
-		if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
-		    btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
-		    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
-		    btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
-			btrfs_set_log_full_commit(trans);
-
-		if (root_log_pinned) {
-			btrfs_end_log_trans(root);
-			root_log_pinned = false;
-		}
-		if (dest_log_pinned) {
-			btrfs_end_log_trans(dest);
-			dest_log_pinned = false;
-		}
-	}
 	ret2 = btrfs_end_transaction(trans);
 	ret = ret ? ret : ret2;
 out_notrans:
@@ -9342,7 +9311,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 	int ret;
 	int ret2;
 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
-	bool log_pinned = false;
 
 	if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
 		return -EPERM;
@@ -9447,25 +9415,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
 		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
 	} else {
-		/*
-		 * Now pin the log. We do it to ensure that no other task can
-		 * sync the log while we are in progress with the rename, as
-		 * that could result in an inconsistency in case any of the
-		 * inodes that are part of this rename operation were logged
-		 * before.
-		 *
-		 * We pin the log even if at this precise moment none of the
-		 * inodes was logged before. This is because right after we
-		 * checked for that, some other task fsyncing some other inode
-		 * not involved with this rename operation could log that one of
-		 * our inodes exists.
-		 *
-		 * We don't need to pin the logs before the above call to
-		 * btrfs_insert_inode_ref(), since that does not need to change
-		 * a log.
-		 */
-		btrfs_pin_log_trans(root);
-		log_pinned = true;
 		ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
 					BTRFS_I(d_inode(old_dentry)),
 					old_dentry->d_name.name,
@@ -9512,12 +9461,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 	if (old_inode->i_nlink == 1)
 		BTRFS_I(old_inode)->dir_index = index;
 
-	if (log_pinned) {
+	if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
 		btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
 				   rename_ctx.index, new_dentry->d_parent);
-		btrfs_end_log_trans(root);
-		log_pinned = false;
-	}
 
 	if (flags & RENAME_WHITEOUT) {
 		ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
@@ -9529,28 +9475,6 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
 		}
 	}
 out_fail:
-	/*
-	 * If we have pinned the log and an error happened, we unpin tasks
-	 * trying to sync the log and force them to fallback to a transaction
-	 * commit if the log currently contains any of the inodes involved in
-	 * this rename operation (to ensure we do not persist a log with an
-	 * inconsistent state for any of these inodes or leading to any
-	 * inconsistencies when replayed). If the transaction was aborted, the
-	 * abortion reason is propagated to userspace when attempting to commit
-	 * the transaction. If the log does not contain any of these inodes, we
-	 * allow the tasks to sync it.
-	 */
-	if (ret && log_pinned) {
-		if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
-		    btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
-		    btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
-		    (new_inode &&
-		     btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
-			btrfs_set_log_full_commit(trans);
-
-		btrfs_end_log_trans(root);
-		log_pinned = false;
-	}
 	ret2 = btrfs_end_transaction(trans);
 	ret = ret ? ret : ret2;
 out_notrans:
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 44719e9c48f30..d697f8a33a5a7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6810,7 +6810,10 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			u64 old_dir_index, struct dentry *parent)
 {
 	struct btrfs_inode *inode = BTRFS_I(d_inode(old_dentry));
+	struct btrfs_root *root = inode->root;
 	struct btrfs_log_ctx ctx;
+	bool log_pinned = false;
+	int ret = 0;
 
 	/*
 	 * this will force the logging code to walk the dentry chain
@@ -6837,14 +6840,22 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	if (old_dir && old_dir->logged_trans == trans->transid) {
 		struct btrfs_root *log = old_dir->root->log_root;
 		struct btrfs_path *path;
-		int ret;
 
 		ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);
 
+		/*
+		 * We have two inodes to update in the log, the old directory and
+		 * the inode that got renamed, so we must pin the log to prevent
+		 * anyone from syncing the log until we have updated both inodes
+		 * in the log.
+		 */
+		log_pinned = true;
+		btrfs_pin_log_trans(root);
+
 		path = btrfs_alloc_path();
 		if (!path) {
-			btrfs_set_log_full_commit(trans);
-			return;
+			ret = -ENOMEM;
+			goto out;
 		}
 
 		/*
@@ -6874,10 +6885,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 		mutex_unlock(&old_dir->log_mutex);
 
 		btrfs_free_path(path);
-		if (ret < 0) {
-			btrfs_set_log_full_commit(trans);
-			return;
-		}
+		if (ret < 0)
+			goto out;
 	}
 
 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
@@ -6890,5 +6899,16 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 * inconsistent state after a rename operation.
 	 */
 	btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
+out:
+	if (log_pinned) {
+		/*
+		 * If an error happened mark the log for a full commit because
+		 * it's not consistent and up to date. Do it before unpinning the
+		 * log, to avoid any races with someone else trying to commit it.
+		 */
+		if (ret < 0)
+			btrfs_set_log_full_commit(trans);
+		btrfs_end_log_trans(root);
+	}
 }
 
-- 
GitLab


From 0f8ce49821de3e99251d1a6849bfe5f2be1ecbea Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:10 +0000
Subject: [PATCH 1353/1586] btrfs: avoid inode logging during rename and link
 when possible

During a rename or link operation, we need to determine if an inode was
previously logged or not, and if it was, do some update to the logged
inode. We used to rely exclusively on the logged_trans field of struct
btrfs_inode to determine that, but that was not reliable because the
value of that field is not persisted in the inode item, so it's lost
when an inode is evicted and loaded back again. That led to several
issues in the past, such as not persisting deletions (such as the case
fixed by commit 803f0f64d17769 ("Btrfs: fix fsync not persisting dentry
deletions due to inode evictions")), or resulting in losing a file
after an inode eviction followed by a rename (commit ecc64fab7d49c6
("btrfs: fix lost inode on log replay after mix of fsync, rename and
inode eviction")), besides other issues.

So the inode_logged() helper was introduced and used to determine if an
inode was possibly logged before in the current transaction, with the
caveat that it could return false positives, in the sense that even if an
inode was not logged before in the current transaction, it could still
return true, but never to return false in case the inode was logged.
>From a functional point of view that is fine, but from a performance
perspective it can introduce significant latencies to rename and link
operations, as they will end up doing inode logging even when it is not
necessary.

Recently on a 5.15 kernel, an openSUSE Tumbleweed user reported package
installations and upgrades, with the zypper tool, were often taking a
long time to complete. With strace it could be observed that zypper was
spending about 99% of its time on rename operations, and then with
further analysis we checked that directory logging was happening too
frequently. Taking into account that installation/upgrade of some of the
packages needed a few thousand file renames, the slowdown was very
noticeable for the user.

The issue was caused indirectly due to an excessive number of inode
evictions on a 5.15 kernel, about 100x more compared to a 5.13, 5.14 or
a 5.16-rc8 kernel. While triggering the inode evictions if something
outside btrfs' control, btrfs could still behave better by eliminating
the false positives from the inode_logged() helper.

So change inode_logged() to actually eliminate such false positives caused
by inode eviction and when an inode was never logged since the filesystem
was mounted, as both cases relate to when the logged_trans field of struct
btrfs_inode has a value of zero. When it can not determine if the inode
was logged based only on the logged_trans value, lookup for the existence
of the inode item in the log tree - if it's there then we known the inode
was logged, if it's not there then it can not have been logged in the
current transaction. Once we determine if the inode was logged, update
the logged_trans value to avoid future calls to have to search in the log
tree again.

Alternatively, we could start storing logged_trans in the on disk inode
item structure (struct btrfs_inode_item) in the unused space it still has,
but that would be a bit odd because:

1) We only care about logged_trans since the filesystem was mounted, we
   don't care about its value from a previous mount. Having it persisted
   in the inode item structure would not make the best use of the precious
   unused space;

2) In order to get logged_trans persisted before inode eviction, we would
   have to update the delayed inode when we finish logging the inode and
   update its logged_trans in struct btrfs_inode, which makes it a bit
   cumbersome since we need to check if the delayed inode exists, if not
   create it and populate it and deal with any errors (-ENOMEM mostly).

This change is part of a patchset comprised of the following patches:

  1/5 btrfs: add helper to delete a dir entry from a log tree
  2/5 btrfs: pass the dentry to btrfs_log_new_name() instead of the inode
  3/5 btrfs: avoid logging all directory changes during renames
  4/5 btrfs: stop doing unnecessary log updates during a rename
  5/5 btrfs: avoid inode logging during rename and link when possible

The following test script mimics part of what the zypper tool does during
package installations/upgrades. It does not triggers inode evictions, but
it's similar because it triggers false positives from the inode_logged()
helper, because the inodes have a logged_trans of 0, there's a log tree
due to a fsync of an unrelated file and the directory inode has its
last_trans field set to the current transaction:

  $ cat test.sh

  #!/bin/bash

  DEV=/dev/nvme0n1
  MNT=/mnt/nvme0n1

  NUM_FILES=10000

  mkfs.btrfs -f $DEV
  mount $DEV $MNT

  mkdir $MNT/testdir

  for ((i = 1; i <= $NUM_FILES; i++)); do
      echo -n > $MNT/testdir/file_$i
  done

  sync

  # Now do some change to an unrelated file and fsync it.
  # This is just to create a log tree to make sure that inode_logged()
  # does not return false when called against "testdir".
  xfs_io -f -c "pwrite 0 4K" -c "fsync" $MNT/foo

  # Do some change to testdir. This is to make sure inode_logged()
  # will return true when called against "testdir", because its
  # logged_trans is 0, it was changed in the current transaction
  # and there's a log tree.
  echo -n > $MNT/testdir/file_$((NUM_FILES + 1))

  echo "Renaming $NUM_FILES files..."
  start=$(date +%s%N)
  for ((i = 1; i <= $NUM_FILES; i++)); do
      mv $MNT/testdir/file_$i $MNT/testdir/file_$i-RPMDELETE
  done
  end=$(date +%s%N)

  dur=$(( (end - start) / 1000000 ))
  echo "Renames took $dur milliseconds"

  umount $MNT

Testing this change on a box using a non-debug kernel (Debian's default
kernel config) gave the following results:

NUM_FILES=10000, before patchset:                   27837 ms
NUM_FILES=10000, after patches 1/5 to 4/5 applied:   9236 ms (-66.8%)
NUM_FILES=10000, after whole patchset applied:       8902 ms (-68.0%)

NUM_FILES=5000, before patchset:                     9127 ms
NUM_FILES=5000, after patches 1/5 to 4/5 applied:    4640 ms (-49.2%)
NUM_FILES=5000, after whole patchset applied:        4441 ms (-51.3%)

NUM_FILES=2000, before patchset:                     2528 ms
NUM_FILES=2000, after patches 1/5 to 4/5 applied:    1983 ms (-21.6%)
NUM_FILES=2000, after whole patchset applied:        1747 ms (-30.9%)

NUM_FILES=1000, before patchset:                     1085 ms
NUM_FILES=1000, after patches 1/5 to 4/5 applied:     893 ms (-17.7%)
NUM_FILES=1000, after whole patchset applied:         867 ms (-20.1%)

Running dbench on the same physical machine with the following script:

  $ cat run-dbench.sh
  #!/bin/bash

  NUM_JOBS=$(nproc --all)

  DEV=/dev/nvme0n1
  MNT=/mnt/nvme0n1
  MOUNT_OPTIONS="-o ssd"
  MKFS_OPTIONS="-O no-holes -R free-space-tree"

  echo "performance" | \
      tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

  mkfs.btrfs -f $MKFS_OPTIONS $DEV
  mount $MOUNT_OPTIONS $DEV $MNT

  dbench -D $MNT -t 120 $NUM_JOBS

  umount $MNT

Before patchset:

 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 NTCreateX    3761352     0.032   143.843
 Close        2762770     0.002     2.273
 Rename        159304     0.291    67.037
 Unlink        759784     0.207   143.998
 Deltree           72     4.028    15.977
 Mkdir             36     0.003     0.006
 Qpathinfo    3409780     0.013     9.678
 Qfileinfo     596772     0.001     0.878
 Qfsinfo       625189     0.003     1.245
 Sfileinfo     306443     0.006     1.840
 Find         1318106     0.063    19.798
 WriteX       1871137     0.021     8.532
 ReadX        5897325     0.003     3.567
 LockX          12252     0.003     0.258
 UnlockX        12252     0.002     0.100
 Flush         263666     3.327   155.632

Throughput 980.047 MB/sec  12 clients  12 procs  max_latency=155.636 ms

After whole patchset applied:

 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 NTCreateX    4195584     0.033   107.742
 Close        3081932     0.002     1.935
 Rename        177641     0.218    14.905
 Unlink        847333     0.166   107.822
 Deltree          118     5.315    15.247
 Mkdir             59     0.004     0.048
 Qpathinfo    3802612     0.014    10.302
 Qfileinfo     666748     0.001     1.034
 Qfsinfo       697329     0.003     0.944
 Sfileinfo     341712     0.006     2.099
 Find         1470365     0.065     9.359
 WriteX       2093921     0.021     8.087
 ReadX        6576234     0.003     3.407
 LockX          13660     0.003     0.308
 UnlockX        13660     0.002     0.114
 Flush         294090     2.906   115.539

Throughput 1093.11 MB/sec  12 clients  12 procs  max_latency=115.544 ms

+11.5% throughput    -25.8% max latency   rename max latency -77.8%

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1193549
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 245 ++++++++++++++++++++++++++++++++------------
 fs/btrfs/tree-log.h |   3 +
 2 files changed, 183 insertions(+), 65 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d697f8a33a5a7..49661297c74ff 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3477,35 +3477,121 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 }
 
 /*
- * Check if an inode was logged in the current transaction. This may often
- * return some false positives, because logged_trans is an in memory only field,
- * not persisted anywhere. This is meant to be used in contexts where a false
- * positive has no functional consequences.
+ * Check if an inode was logged in the current transaction. This correctly deals
+ * with the case where the inode was logged but has a logged_trans of 0, which
+ * happens if the inode is evicted and loaded again, as logged_trans is an in
+ * memory only field (not persisted).
+ *
+ * Returns 1 if the inode was logged before in the transaction, 0 if it was not,
+ * and < 0 on error.
  */
-static bool inode_logged(struct btrfs_trans_handle *trans,
-			 struct btrfs_inode *inode)
+static int inode_logged(struct btrfs_trans_handle *trans,
+			struct btrfs_inode *inode,
+			struct btrfs_path *path_in)
 {
+	struct btrfs_path *path = path_in;
+	struct btrfs_key key;
+	int ret;
+
 	if (inode->logged_trans == trans->transid)
-		return true;
+		return 1;
 
-	if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
-		return false;
+	/*
+	 * If logged_trans is not 0, then we know the inode logged was not logged
+	 * in this transaction, so we can return false right away.
+	 */
+	if (inode->logged_trans > 0)
+		return 0;
+
+	/*
+	 * If no log tree was created for this root in this transaction, then
+	 * the inode can not have been logged in this transaction. In that case
+	 * set logged_trans to anything greater than 0 and less than the current
+	 * transaction's ID, to avoid the search below in a future call in case
+	 * a log tree gets created after this.
+	 */
+	if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) {
+		inode->logged_trans = trans->transid - 1;
+		return 0;
+	}
+
+	/*
+	 * We have a log tree and the inode's logged_trans is 0. We can't tell
+	 * for sure if the inode was logged before in this transaction by looking
+	 * only at logged_trans. We could be pessimistic and assume it was, but
+	 * that can lead to unnecessarily logging an inode during rename and link
+	 * operations, and then further updating the log in followup rename and
+	 * link operations, specially if it's a directory, which adds latency
+	 * visible to applications doing a series of rename or link operations.
+	 *
+	 * A logged_trans of 0 here can mean several things:
+	 *
+	 * 1) The inode was never logged since the filesystem was mounted, and may
+	 *    or may have not been evicted and loaded again;
+	 *
+	 * 2) The inode was logged in a previous transaction, then evicted and
+	 *    then loaded again;
+	 *
+	 * 3) The inode was logged in the current transaction, then evicted and
+	 *    then loaded again.
+	 *
+	 * For cases 1) and 2) we don't want to return true, but we need to detect
+	 * case 3) and return true. So we do a search in the log root for the inode
+	 * item.
+	 */
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	if (!path) {
+		path = btrfs_alloc_path();
+		if (!path)
+			return -ENOMEM;
+	}
+
+	ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0);
+
+	if (path_in)
+		btrfs_release_path(path);
+	else
+		btrfs_free_path(path);
 
 	/*
-	 * The inode's logged_trans is always 0 when we load it (because it is
-	 * not persisted in the inode item or elsewhere). So if it is 0, the
-	 * inode was last modified in the current transaction then the inode may
-	 * have been logged before in the current transaction, then evicted and
-	 * loaded again in the current transaction - or may have never been logged
-	 * in the current transaction, but since we can not be sure, we have to
-	 * assume it was, otherwise our callers can leave an inconsistent log.
+	 * Logging an inode always results in logging its inode item. So if we
+	 * did not find the item we know the inode was not logged for sure.
 	 */
-	if (inode->logged_trans == 0 &&
-	    inode->last_trans == trans->transid &&
-	    !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
-		return true;
+	if (ret < 0) {
+		return ret;
+	} else if (ret > 0) {
+		/*
+		 * Set logged_trans to a value greater than 0 and less then the
+		 * current transaction to avoid doing the search in future calls.
+		 */
+		inode->logged_trans = trans->transid - 1;
+		return 0;
+	}
+
+	/*
+	 * The inode was previously logged and then evicted, set logged_trans to
+	 * the current transacion's ID, to avoid future tree searches as long as
+	 * the inode is not evicted again.
+	 */
+	inode->logged_trans = trans->transid;
+
+	/*
+	 * If it's a directory, then we must set last_dir_index_offset to the
+	 * maximum possible value, so that the next attempt to log the inode does
+	 * not skip checking if dir index keys found in modified subvolume tree
+	 * leaves have been logged before, otherwise it would result in attempts
+	 * to insert duplicate dir index keys in the log tree. This must be done
+	 * because last_dir_index_offset is an in-memory only field, not persisted
+	 * in the inode item or any other on-disk structure, so its value is lost
+	 * once the inode is evicted.
+	 */
+	if (S_ISDIR(inode->vfs_inode.i_mode))
+		inode->last_dir_index_offset = (u64)-1;
 
-	return false;
+	return 1;
 }
 
 /*
@@ -3572,8 +3658,13 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	int ret;
 
-	if (!inode_logged(trans, dir))
+	ret = inode_logged(trans, dir, NULL);
+	if (ret == 0)
+		return;
+	else if (ret < 0) {
+		btrfs_set_log_full_commit(trans);
 		return;
+	}
 
 	ret = join_running_log_trans(root);
 	if (ret)
@@ -3607,8 +3698,13 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
 	u64 index;
 	int ret;
 
-	if (!inode_logged(trans, inode))
+	ret = inode_logged(trans, inode, NULL);
+	if (ret == 0)
 		return;
+	else if (ret < 0) {
+		btrfs_set_log_full_commit(trans);
+		return;
+	}
 
 	ret = join_running_log_trans(root);
 	if (ret)
@@ -3740,7 +3836,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 	struct extent_buffer *src = path->nodes[0];
 	const int nritems = btrfs_header_nritems(src);
 	const u64 ino = btrfs_ino(inode);
-	const bool inode_logged_before = inode_logged(trans, inode);
 	bool last_found = false;
 	int batch_start = 0;
 	int batch_size = 0;
@@ -3816,14 +3911,16 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
 				ctx->log_new_dentries = true;
 		}
 
-		if (!inode_logged_before)
+		if (!ctx->logged_before)
 			goto add_to_batch;
 
 		/*
 		 * If we were logged before and have logged dir items, we can skip
 		 * checking if any item with a key offset larger than the last one
 		 * we logged is in the log tree, saving time and avoiding adding
-		 * contention on the log tree.
+		 * contention on the log tree. We can only rely on the value of
+		 * last_dir_index_offset when we know for sure that the inode was
+		 * previously logged in the current transaction.
 		 */
 		if (key.offset > inode->last_dir_index_offset)
 			goto add_to_batch;
@@ -4066,21 +4163,6 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
 	u64 max_key;
 	int ret;
 
-	/*
-	 * If this is the first time we are being logged in the current
-	 * transaction, or we were logged before but the inode was evicted and
-	 * reloaded later, in which case its logged_trans is 0, reset the value
-	 * of the last logged key offset. Note that we don't use the helper
-	 * function inode_logged() here - that is because the function returns
-	 * true after an inode eviction, assuming the worst case as it can not
-	 * know for sure if the inode was logged before. So we can not skip key
-	 * searches in the case the inode was evicted, because it may not have
-	 * been logged in this transaction and may have been logged in a past
-	 * transaction, so we need to reset the last dir index offset to (u64)-1.
-	 */
-	if (inode->logged_trans != trans->transid)
-		inode->last_dir_index_offset = (u64)-1;
-
 	min_key = BTRFS_DIR_START_INDEX;
 	max_key = 0;
 	ctx->last_dir_item_offset = inode->last_dir_index_offset;
@@ -4117,9 +4199,6 @@ static int drop_inode_items(struct btrfs_trans_handle *trans,
 	struct btrfs_key found_key;
 	int start_slot;
 
-	if (!inode_logged(trans, inode))
-		return 0;
-
 	key.objectid = btrfs_ino(inode);
 	key.type = max_key_type;
 	key.offset = (u64)-1;
@@ -4617,7 +4696,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	 * are small, with a root at level 2 or 3 at most, due to their short
 	 * life span.
 	 */
-	if (inode_logged(trans, inode)) {
+	if (ctx->logged_before) {
 		drop_args.path = path;
 		drop_args.start = em->start;
 		drop_args.end = em->start + em->len;
@@ -5633,6 +5712,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	bool xattrs_logged = false;
 	bool recursive_logging = false;
 	bool inode_item_dropped = true;
+	const bool orig_logged_before = ctx->logged_before;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -5682,6 +5762,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		mutex_lock(&inode->log_mutex);
 	}
 
+	/*
+	 * Before logging the inode item, cache the value returned by
+	 * inode_logged(), because after that we have the need to figure out if
+	 * the inode was previously logged in this transaction.
+	 */
+	ret = inode_logged(trans, inode, path);
+	if (ret < 0) {
+		err = ret;
+		goto out_unlock;
+	}
+	ctx->logged_before = (ret == 1);
+
 	/*
 	 * This is for cases where logging a directory could result in losing a
 	 * a file after replaying the log. For example, if we move a file from a
@@ -5707,9 +5799,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags);
 		if (inode_only == LOG_INODE_EXISTS)
 			max_key_type = BTRFS_XATTR_ITEM_KEY;
-		ret = drop_inode_items(trans, log, path, inode, max_key_type);
+		if (ctx->logged_before)
+			ret = drop_inode_items(trans, log, path, inode,
+					       max_key_type);
 	} else {
-		if (inode_only == LOG_INODE_EXISTS && inode_logged(trans, inode)) {
+		if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) {
 			/*
 			 * Make sure the new inode item we write to the log has
 			 * the same isize as the current one (if it exists).
@@ -5731,14 +5825,15 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 			     &inode->runtime_flags)) {
 			if (inode_only == LOG_INODE_EXISTS) {
 				max_key.type = BTRFS_XATTR_ITEM_KEY;
-				ret = drop_inode_items(trans, log, path, inode,
-						       max_key.type);
+				if (ctx->logged_before)
+					ret = drop_inode_items(trans, log, path,
+							       inode, max_key.type);
 			} else {
 				clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 					  &inode->runtime_flags);
 				clear_bit(BTRFS_INODE_COPY_EVERYTHING,
 					  &inode->runtime_flags);
-				if (inode_logged(trans, inode))
+				if (ctx->logged_before)
 					ret = truncate_inode_items(trans, log,
 								   inode, 0, 0);
 			}
@@ -5748,8 +5843,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 			if (inode_only == LOG_INODE_ALL)
 				fast_search = true;
 			max_key.type = BTRFS_XATTR_ITEM_KEY;
-			ret = drop_inode_items(trans, log, path, inode,
-					       max_key.type);
+			if (ctx->logged_before)
+				ret = drop_inode_items(trans, log, path, inode,
+						       max_key.type);
 		} else {
 			if (inode_only == LOG_INODE_ALL)
 				fast_search = true;
@@ -5869,6 +5965,10 @@ out_unlock:
 out:
 	btrfs_free_path(path);
 	btrfs_free_path(dst_path);
+
+	if (recursive_logging)
+		ctx->logged_before = orig_logged_before;
+
 	return err;
 }
 
@@ -6813,7 +6913,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root = inode->root;
 	struct btrfs_log_ctx ctx;
 	bool log_pinned = false;
-	int ret = 0;
+	int ret;
 
 	/*
 	 * this will force the logging code to walk the dentry chain
@@ -6826,9 +6926,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 * if this inode hasn't been logged and directory we're renaming it
 	 * from hasn't been logged, we don't need to log it
 	 */
-	if (!inode_logged(trans, inode) &&
-	    (!old_dir || !inode_logged(trans, old_dir)))
-		return;
+	ret = inode_logged(trans, inode, NULL);
+	if (ret < 0) {
+		goto out;
+	} else if (ret == 0) {
+		if (!old_dir)
+			return;
+		/*
+		 * If the inode was not logged and we are doing a rename (old_dir is not
+		 * NULL), check if old_dir was logged - if it was not we can return and
+		 * do nothing.
+		 */
+		ret = inode_logged(trans, old_dir, NULL);
+		if (ret < 0)
+			goto out;
+		else if (ret == 0)
+			return;
+	}
+	ret = 0;
 
 	/*
 	 * If we are doing a rename (old_dir is not NULL) from a directory that
@@ -6900,15 +7015,15 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 */
 	btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx);
 out:
-	if (log_pinned) {
-		/*
-		 * If an error happened mark the log for a full commit because
-		 * it's not consistent and up to date. Do it before unpinning the
-		 * log, to avoid any races with someone else trying to commit it.
-		 */
-		if (ret < 0)
-			btrfs_set_log_full_commit(trans);
+	/*
+	 * If an error happened mark the log for a full commit because it's not
+	 * consistent and up to date or we couldn't find out if one of the
+	 * inodes was logged before in this transaction. Do it before unpinning
+	 * the log, to avoid any races with someone else trying to commit it.
+	 */
+	if (ret < 0)
+		btrfs_set_log_full_commit(trans);
+	if (log_pinned)
 		btrfs_end_log_trans(root);
-	}
 }
 
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index f1acb7fa944cf..1620f8170629e 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -17,6 +17,8 @@ struct btrfs_log_ctx {
 	int log_transid;
 	bool log_new_dentries;
 	bool logging_new_name;
+	/* Indicate if the inode being logged was logged before. */
+	bool logged_before;
 	/* Tracks the last logged dir item/index key offset. */
 	u64 last_dir_item_offset;
 	struct inode *inode;
@@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
 	ctx->log_transid = 0;
 	ctx->log_new_dentries = false;
 	ctx->logging_new_name = false;
+	ctx->logged_before = false;
 	ctx->inode = inode;
 	INIT_LIST_HEAD(&ctx->list);
 	INIT_LIST_HEAD(&ctx->ordered_extents);
-- 
GitLab


From 65faced5b9ef67baaab681ffc4df870d51990357 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 20 Jan 2022 11:00:11 +0000
Subject: [PATCH 1354/1586] btrfs: use single variable to track return value at
 btrfs_log_inode()

At btrfs_log_inode(), we have two variables to track errors and the
return value of the function, named 'ret' and 'err'. In some places we
use 'ret' and if gets a non-zero value we assign its value to 'err'
and then jump to the 'out' label, while in other places we use 'err'
directly without 'ret' as an intermediary. This is inconsistent, error
prone and not necessary. So change that to use only the 'ret' variable,
making this consistent with most functions in btrfs.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 52 +++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 49661297c74ff..24f95d7b51900 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5702,8 +5702,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	struct btrfs_key min_key;
 	struct btrfs_key max_key;
 	struct btrfs_root *log = inode->root->log_root;
-	int err = 0;
-	int ret = 0;
+	int ret;
 	bool fast_search = false;
 	u64 ino = btrfs_ino(inode);
 	struct extent_map_tree *em_tree = &inode->extent_tree;
@@ -5746,8 +5745,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	 * and figure out which index ranges have to be logged.
 	 */
 	if (S_ISDIR(inode->vfs_inode.i_mode)) {
-		err = btrfs_commit_inode_delayed_items(trans, inode);
-		if (err)
+		ret = btrfs_commit_inode_delayed_items(trans, inode);
+		if (ret)
 			goto out;
 	}
 
@@ -5768,11 +5767,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	 * the inode was previously logged in this transaction.
 	 */
 	ret = inode_logged(trans, inode, path);
-	if (ret < 0) {
-		err = ret;
+	if (ret < 0)
 		goto out_unlock;
-	}
 	ctx->logged_before = (ret == 1);
+	ret = 0;
 
 	/*
 	 * This is for cases where logging a directory could result in losing a
@@ -5785,7 +5783,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	    inode_only == LOG_INODE_ALL &&
 	    inode->last_unlink_trans >= trans->transid) {
 		btrfs_set_log_full_commit(trans);
-		err = 1;
+		ret = 1;
 		goto out_unlock;
 	}
 
@@ -5817,8 +5815,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 			 * (zeroes), as if an expanding truncate happened,
 			 * instead of getting a file of 4Kb only.
 			 */
-			err = logged_inode_size(log, inode, path, &logged_isize);
-			if (err)
+			ret = logged_inode_size(log, inode, path, &logged_isize);
+			if (ret)
 				goto out_unlock;
 		}
 		if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@ -5854,37 +5852,35 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		}
 
 	}
-	if (ret) {
-		err = ret;
+	if (ret)
 		goto out_unlock;
-	}
 
-	err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
+	ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
 				      path, dst_path, logged_isize,
 				      recursive_logging, inode_only, ctx,
 				      &need_log_inode_item);
-	if (err)
+	if (ret)
 		goto out_unlock;
 
 	btrfs_release_path(path);
 	btrfs_release_path(dst_path);
-	err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
-	if (err)
+	ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+	if (ret)
 		goto out_unlock;
 	xattrs_logged = true;
 	if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
 		btrfs_release_path(path);
 		btrfs_release_path(dst_path);
-		err = btrfs_log_holes(trans, inode, path);
-		if (err)
+		ret = btrfs_log_holes(trans, inode, path);
+		if (ret)
 			goto out_unlock;
 	}
 log_extents:
 	btrfs_release_path(path);
 	btrfs_release_path(dst_path);
 	if (need_log_inode_item) {
-		err = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
-		if (err)
+		ret = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
+		if (ret)
 			goto out_unlock;
 		/*
 		 * If we are doing a fast fsync and the inode was logged before
@@ -5895,18 +5891,16 @@ log_extents:
 		 * BTRFS_INODE_COPY_EVERYTHING set.
 		 */
 		if (!xattrs_logged && inode->logged_trans < trans->transid) {
-			err = btrfs_log_all_xattrs(trans, inode, path, dst_path);
-			if (err)
+			ret = btrfs_log_all_xattrs(trans, inode, path, dst_path);
+			if (ret)
 				goto out_unlock;
 			btrfs_release_path(path);
 		}
 	}
 	if (fast_search) {
 		ret = btrfs_log_changed_extents(trans, inode, dst_path, ctx);
-		if (ret) {
-			err = ret;
+		if (ret)
 			goto out_unlock;
-		}
 	} else if (inode_only == LOG_INODE_ALL) {
 		struct extent_map *em, *n;
 
@@ -5918,10 +5912,8 @@ log_extents:
 
 	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) {
 		ret = log_directory_changes(trans, inode, path, dst_path, ctx);
-		if (ret) {
-			err = ret;
+		if (ret)
 			goto out_unlock;
-		}
 	}
 
 	spin_lock(&inode->lock);
@@ -5969,7 +5961,7 @@ out:
 	if (recursive_logging)
 		ctx->logged_before = orig_logged_before;
 
-	return err;
+	return ret;
 }
 
 /*
-- 
GitLab


From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:39:58 -0500
Subject: [PATCH 1355/1586] btrfs: add definition for EXTENT_TREE_V2

This adds the initial definition of the EXTENT_TREE_V2 incompat feature
flag.  This also hides the support behind CONFIG_BTRFS_DEBUG.

THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A
DEVELOPER OR A TESTER.

The format is in flux and will be added in stages, any fs will need to
be re-made between updates to the format.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h           | 21 +++++++++++++++++++++
 fs/btrfs/sysfs.c           |  5 ++++-
 include/uapi/linux/btrfs.h |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ebb2d109e8bb2..54424b3411323 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -300,6 +300,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
 
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
+ */
+#define BTRFS_FEATURE_INCOMPAT_SUPP			\
+	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
+	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
+	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\
+	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
+	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
+	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\
+	 BTRFS_FEATURE_INCOMPAT_NO_HOLES	|	\
+	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\
+	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\
+	 BTRFS_FEATURE_INCOMPAT_ZONED		|	\
+	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
+#else
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
@@ -314,6 +334,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\
 	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\
 	 BTRFS_FEATURE_INCOMPAT_ZONED)
+#endif
 
 #define BTRFS_FEATURE_INCOMPAT_SAFE_SET			\
 	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index beb7f72d50b86..5f4812fd8b509 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
 BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
 BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
-/* Remove once support for zoned allocation is feature complete */
 #ifdef CONFIG_BTRFS_DEBUG
+/* Remove once support for zoned allocation is feature complete */
 BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
+/* Remove once support for extent tree v2 is feature complete */
+BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
 #endif
 #ifdef CONFIG_FS_VERITY
 BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
@@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(raid1c34),
 #ifdef CONFIG_BTRFS_DEBUG
 	BTRFS_FEAT_ATTR_PTR(zoned),
+	BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
 #endif
 #ifdef CONFIG_FS_VERITY
 	BTRFS_FEAT_ATTR_PTR(verity),
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 738619994e261..1cb1a3860f1d9 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID	(1ULL << 10)
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
 #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
+#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
 
 struct btrfs_ioctl_feature_flags {
 	__u64 compat_flags;
-- 
GitLab


From 4b3492539907ef53acb06bf0db02dbaaed2bf097 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:39:59 -0500
Subject: [PATCH 1356/1586] btrfs: disable balance for extent tree v2 for now

With global root id's it makes it problematic to do backref lookups for
balance.  This isn't hard to deal with, but future changes are going to
make it impossible to lookup backrefs on any COWonly roots, so go ahead
and disable balance for now on extent tree v2 until we can add balance
support back in future patches.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a91e51b0ca811..4c0f8a10e5cd3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3235,6 +3235,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	u64 length;
 	int ret;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info,
+			  "relocate: not supported on extent tree v2 yet");
+		return -EINVAL;
+	}
+
 	/*
 	 * Prevent races with automatic removal of unused block groups.
 	 * After we relocate and before we remove the chunk with offset
-- 
GitLab


From 914a519b19e84dd5e4c3bf5a24cd892f64f0bdef Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:00 -0500
Subject: [PATCH 1357/1586] btrfs: disable device manipulation ioctl's
 EXTENT_TREE_V2

Device add, remove, and replace all require balance, which doesn't work
right now on extent tree v2, so disable these for now.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c   | 10 ++++++++++
 fs/btrfs/volumes.c |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index dbb0a8dad770a..7a59887a2b544 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3449,6 +3449,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
+		return -EINVAL;
+	}
+
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
 		if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
 			return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@@ -4073,6 +4078,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
+		return -EINVAL;
+	}
+
 	p = memdup_user(arg, sizeof(*p));
 	if (IS_ERR(p))
 		return PTR_ERR(p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4c0f8a10e5cd3..fa7fee09e39b8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2085,6 +2085,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 	u64 num_devices;
 	int ret = 0;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
+		return -EINVAL;
+	}
+
 	/*
 	 * The device list in fs_devices is accessed without locks (neither
 	 * uuid_mutex nor device_list_mutex) as it won't change on a mounted
-- 
GitLab


From ef3eccc19aa896fc6aea26b4b065dcccec1b6460 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:01 -0500
Subject: [PATCH 1358/1586] btrfs: disable qgroups in extent tree v2

Backref lookups are going to be drastically different with extent tree
v2, disable qgroups until we do the work to add this support for extent
tree v2.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4a9bde1e2c50f..d437511ed3540 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -948,6 +948,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 	 */
 	lockdep_assert_held_write(&fs_info->subvol_sem);
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info,
+			  "qgroups are currently unsupported in extent tree v2");
+		return -EINVAL;
+	}
+
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (fs_info->quota_root)
 		goto out;
-- 
GitLab


From da32c6d5708b6858d8d2557b0034de1bfa51ee3c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:02 -0500
Subject: [PATCH 1359/1586] btrfs: disable scrub for extent-tree-v2

Scrub depends on extent references for every block, and with extent tree
v2 we won't have that, so disable scrub until we can add back the proper
code to handle extent-tree-v2.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7a59887a2b544..ecf3e05104aaa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3979,6 +3979,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
+		return -EINVAL;
+	}
+
 	sa = memdup_user(arg, sizeof(*sa));
 	if (IS_ERR(sa))
 		return PTR_ERR(sa);
-- 
GitLab


From 813febdbe6c9f691d7a1a1c8f6a31bd6461a1ae0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:03 -0500
Subject: [PATCH 1360/1586] btrfs: disable snapshot creation/deletion for
 extent tree v2

When we stop tracking metadata blocks all of snapshotting will break, so
disable it until I add the snapshot root and drop tree support.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 11 +++++++++--
 fs/btrfs/ioctl.c | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9e21fbcbac6d2..fc3e2f06cb026 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4583,14 +4583,21 @@ out_up_write:
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = d_inode(dentry);
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	int err = 0;
 	struct btrfs_trans_handle *trans;
 	u64 last_unlink_trans;
 
 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
-	if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
+	if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) {
+		if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) {
+			btrfs_err(fs_info,
+			"extent tree v2 doesn't support snapshot deletion yet");
+			return -EOPNOTSUPP;
+		}
 		return btrfs_delete_subvolume(dir, dentry);
+	}
 
 	trans = __unlink_start_trans(dir);
 	if (IS_ERR(trans))
@@ -4629,7 +4636,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 	}
 out:
 	btrfs_end_transaction(trans);
-	btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
+	btrfs_btree_balance_dirty(fs_info);
 
 	return err;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ecf3e05104aaa..8b2538b7027de 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -751,6 +751,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_trans_handle *trans;
 	int ret;
 
+	/* We do not support snapshotting right now. */
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_warn(fs_info,
+			   "extent tree v2 doesn't support snapshotting yet");
+		return -EOPNOTSUPP;
+	}
+
 	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return -EINVAL;
 
@@ -3174,6 +3181,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	int err = 0;
 	bool destroy_parent = false;
 
+	/* We don't support snapshots with extent tree v2 yet. */
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_err(fs_info,
+			  "extent tree v2 doesn't support snapshot deletion yet");
+		return -EOPNOTSUPP;
+	}
+
 	if (destroy_v2) {
 		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
 		if (IS_ERR(vol_args2))
-- 
GitLab


From 63cd070decb15f540b15f2bc35ecad42dd9cf598 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:04 -0500
Subject: [PATCH 1361/1586] btrfs: disable space cache related mount options
 for extent tree v2

We cannot fall back on the slow caching for extent tree v2, which means
we can't just arbitrarily clear the free space trees at mount time.
Furthermore we can't do v1 space cache with extent tree v2.  Simply
ignore these mount options for extent tree v2 as they aren't relevant.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1f4cda3745dc2..33cfc9e27451e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -861,6 +861,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			break;
 		case Opt_space_cache:
 		case Opt_space_cache_version:
+			/*
+			 * We already set FREE_SPACE_TREE above because we have
+			 * compat_ro(FREE_SPACE_TREE) set, and we aren't going
+			 * to allow v1 to be set for extent tree v2, simply
+			 * ignore this setting if we're extent tree v2.
+			 */
+			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+				break;
 			if (token == Opt_space_cache ||
 			    strcmp(args[0].from, "v1") == 0) {
 				btrfs_clear_opt(info->mount_opt,
@@ -881,6 +889,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
 			break;
 		case Opt_no_space_cache:
+			/*
+			 * We cannot operate without the free space tree with
+			 * extent tree v2, ignore this option.
+			 */
+			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+				break;
 			if (btrfs_test_opt(info, SPACE_CACHE)) {
 				btrfs_clear_and_info(info, SPACE_CACHE,
 					     "disabling disk space caching");
@@ -896,6 +910,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 	"the 'inode_cache' option is deprecated and has no effect since 5.11");
 			break;
 		case Opt_clear_cache:
+			/*
+			 * We cannot clear the free space tree with extent tree
+			 * v2, ignore this option.
+			 */
+			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+				break;
 			btrfs_set_and_info(info, CLEAR_CACHE,
 					   "force clearing of disk cache");
 			break;
-- 
GitLab


From c2fa821cc9f9415f6c7fc2da920e833a78040ea7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:05 -0500
Subject: [PATCH 1362/1586] btrfs: tree-checker: don't fail on empty extent
 roots for extent tree v2

For extent tree v2 we can definitely have empty extent roots, so skip
this particular check if we have that set.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index aae5697dde329..64c7d2a2bb3e4 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1648,7 +1648,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 		/* These trees must never be empty */
 		if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
 			     owner == BTRFS_CHUNK_TREE_OBJECTID ||
-			     owner == BTRFS_EXTENT_TREE_OBJECTID ||
 			     owner == BTRFS_DEV_TREE_OBJECTID ||
 			     owner == BTRFS_FS_TREE_OBJECTID ||
 			     owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
@@ -1657,12 +1656,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 				    owner);
 			return -EUCLEAN;
 		}
+
 		/* Unknown tree */
 		if (unlikely(owner == 0)) {
 			generic_err(leaf, 0,
 				"invalid owner, root 0 is not defined");
 			return -EUCLEAN;
 		}
+
+		/* EXTENT_TREE_V2 can have empty extent trees. */
+		if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+			return 0;
+
+		if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
+			generic_err(leaf, 0,
+			"invalid root, root %llu must never be empty",
+				    owner);
+			return -EUCLEAN;
+		}
+
 		return 0;
 	}
 
-- 
GitLab


From bd676446c138882003e4d470a518c219f3184354 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:06 -0500
Subject: [PATCH 1363/1586] btrfs: abstract out loading the tree root

We're going to be adding more roots that need to be loaded from the
super block, so abstract out the code to read the tree_root from the
super block, and use this helper for the chunk root as well.  This will
make it simpler to load the new trees in the future.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 81 ++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 48590a3807621..e014cb8413c7b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2930,6 +2930,45 @@ out:
 	return ret;
 }
 
+static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
+{
+	int ret = 0;
+
+	root->node = read_tree_block(root->fs_info, bytenr,
+				     root->root_key.objectid, gen, level, NULL);
+	if (IS_ERR(root->node)) {
+		ret = PTR_ERR(root->node);
+		root->node = NULL;
+	} else if (!extent_buffer_uptodate(root->node)) {
+		free_extent_buffer(root->node);
+		root->node = NULL;
+		ret = -EIO;
+	}
+
+	if (ret)
+		return ret;
+
+	btrfs_set_root_node(&root->root_item, root->node);
+	root->commit_root = btrfs_root_node(root);
+	btrfs_set_root_refs(&root->root_item, 1);
+	return ret;
+}
+
+static int load_important_roots(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_super_block *sb = fs_info->super_copy;
+	u64 gen, bytenr;
+	int level, ret;
+
+	bytenr = btrfs_super_root(sb);
+	gen = btrfs_super_generation(sb);
+	level = btrfs_super_root_level(sb);
+	ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
+	if (ret)
+		btrfs_warn(fs_info, "couldn't read tree root");
+	return ret;
+}
+
 static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 {
 	int backup_index = find_newest_super_backup(fs_info);
@@ -2940,9 +2979,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 	int i;
 
 	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
-		u64 generation;
-		int level;
-
 		if (handle_error) {
 			if (!IS_ERR(tree_root->node))
 				free_extent_buffer(tree_root->node);
@@ -2967,29 +3003,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 			if (ret < 0)
 				return ret;
 		}
-		generation = btrfs_super_generation(sb);
-		level = btrfs_super_root_level(sb);
-		tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
-						  BTRFS_ROOT_TREE_OBJECTID,
-						  generation, level, NULL);
-		if (IS_ERR(tree_root->node)) {
-			handle_error = true;
-			ret = PTR_ERR(tree_root->node);
-			tree_root->node = NULL;
-			btrfs_warn(fs_info, "couldn't read tree root");
-			continue;
 
-		} else if (!extent_buffer_uptodate(tree_root->node)) {
+		ret = load_important_roots(fs_info);
+		if (ret) {
 			handle_error = true;
-			ret = -EIO;
-			btrfs_warn(fs_info, "error while reading tree root");
 			continue;
 		}
 
-		btrfs_set_root_node(&tree_root->root_item, tree_root->node);
-		tree_root->commit_root = btrfs_root_node(tree_root);
-		btrfs_set_root_refs(&tree_root->root_item, 1);
-
 		/*
 		 * No need to hold btrfs_root::objectid_mutex since the fs
 		 * hasn't been fully initialised and we are the only user
@@ -3009,8 +3029,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 		}
 
 		/* All successful */
-		fs_info->generation = generation;
-		fs_info->last_trans_committed = generation;
+		fs_info->generation = btrfs_header_generation(tree_root->node);
+		fs_info->last_trans_committed = fs_info->generation;
 		fs_info->last_reloc_trans = 0;
 
 		/* Always begin writing backup roots after the one being used */
@@ -3594,21 +3614,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
 	generation = btrfs_super_chunk_root_generation(disk_super);
 	level = btrfs_super_chunk_root_level(disk_super);
-
-	chunk_root->node = read_tree_block(fs_info,
-					   btrfs_super_chunk_root(disk_super),
-					   BTRFS_CHUNK_TREE_OBJECTID,
-					   generation, level, NULL);
-	if (IS_ERR(chunk_root->node) ||
-	    !extent_buffer_uptodate(chunk_root->node)) {
+	ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
+			      generation, level);
+	if (ret) {
 		btrfs_err(fs_info, "failed to read chunk root");
-		if (!IS_ERR(chunk_root->node))
-			free_extent_buffer(chunk_root->node);
-		chunk_root->node = NULL;
 		goto fail_tree_roots;
 	}
-	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
-	chunk_root->commit_root = btrfs_root_node(chunk_root);
 
 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
 			   offsetof(struct btrfs_header, chunk_tree_uuid),
-- 
GitLab


From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:07 -0500
Subject: [PATCH 1364/1586] btrfs: add code to support the block group root

This code adds the on disk structures for the block group root, which
will hold the block group items for extent tree v2.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h                | 27 +++++++++++++++++-
 fs/btrfs/disk-io.c              | 49 ++++++++++++++++++++++++++++-----
 fs/btrfs/disk-io.h              |  2 ++
 fs/btrfs/print-tree.c           |  1 +
 include/trace/events/btrfs.h    |  1 +
 include/uapi/linux/btrfs_tree.h |  3 ++
 6 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54424b3411323..f460a7bb9ae8f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -274,8 +274,14 @@ struct btrfs_super_block {
 	/* the UUID written into btree blocks */
 	u8 metadata_uuid[BTRFS_FSID_SIZE];
 
+	/* Extent tree v2 */
+	__le64 block_group_root;
+	__le64 block_group_root_generation;
+	u8 block_group_root_level;
+
 	/* future expansion */
-	__le64 reserved[28];
+	u8 reserved8[7];
+	__le64 reserved[25];
 	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
 
@@ -657,6 +663,7 @@ struct btrfs_fs_info {
 	struct btrfs_root *quota_root;
 	struct btrfs_root *uuid_root;
 	struct btrfs_root *data_reloc_root;
+	struct btrfs_root *block_group_root;
 
 	/* the log root tree is a directory of all the other log roots */
 	struct btrfs_root *log_root_tree;
@@ -2349,6 +2356,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
 BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
 		   num_devices, 64);
 
+/*
+ * For extent tree v2 we overload the extent root with the block group root, as
+ * we will have multiple extent roots.
+ */
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
+			 extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
+			 extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
+			 struct btrfs_root_backup, extent_root_level, 8);
+
 /* struct btrfs_balance_item */
 BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
 
@@ -2483,6 +2501,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
 BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
 BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
 			 uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
+			 block_group_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
+			 struct btrfs_super_block,
+			 block_group_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
+			 block_group_root_level, 8);
 
 int btrfs_super_csum_size(const struct btrfs_super_block *s);
 const char *btrfs_super_csum_name(u16 csum_type);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e014cb8413c7b..fe1349737edbc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1727,6 +1727,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	btrfs_put_root(fs_info->uuid_root);
 	btrfs_put_root(fs_info->fs_root);
 	btrfs_put_root(fs_info->data_reloc_root);
+	btrfs_put_root(fs_info->block_group_root);
 	btrfs_check_leaked_roots(fs_info);
 	btrfs_extent_buffer_leak_debug_check(fs_info);
 	kfree(fs_info->super_copy);
@@ -2095,7 +2096,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 {
 	const int next_backup = info->backup_root_index;
 	struct btrfs_root_backup *root_backup;
-	struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
 	struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 
 	root_backup = info->super_for_commit->super_roots + next_backup;
@@ -2121,11 +2121,23 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 	btrfs_set_backup_chunk_root_level(root_backup,
 			       btrfs_header_level(info->chunk_root->node));
 
-	btrfs_set_backup_extent_root(root_backup, extent_root->node->start);
-	btrfs_set_backup_extent_root_gen(root_backup,
-			       btrfs_header_generation(extent_root->node));
-	btrfs_set_backup_extent_root_level(root_backup,
-			       btrfs_header_level(extent_root->node));
+	if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
+		btrfs_set_backup_block_group_root(root_backup,
+					info->block_group_root->node->start);
+		btrfs_set_backup_block_group_root_gen(root_backup,
+			btrfs_header_generation(info->block_group_root->node));
+		btrfs_set_backup_block_group_root_level(root_backup,
+			btrfs_header_level(info->block_group_root->node));
+	} else {
+		struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+
+		btrfs_set_backup_extent_root(root_backup,
+					     extent_root->node->start);
+		btrfs_set_backup_extent_root_gen(root_backup,
+				btrfs_header_generation(extent_root->node));
+		btrfs_set_backup_extent_root_level(root_backup,
+					btrfs_header_level(extent_root->node));
+	}
 
 	/*
 	 * we might commit during log recovery, which happens before we set
@@ -2269,6 +2281,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
 	free_root_extent_buffers(info->uuid_root);
 	free_root_extent_buffers(info->fs_root);
 	free_root_extent_buffers(info->data_reloc_root);
+	free_root_extent_buffers(info->block_group_root);
 	if (free_chunk_root)
 		free_root_extent_buffers(info->chunk_root);
 }
@@ -2964,8 +2977,20 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
 	gen = btrfs_super_generation(sb);
 	level = btrfs_super_root_level(sb);
 	ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
-	if (ret)
+	if (ret) {
 		btrfs_warn(fs_info, "couldn't read tree root");
+		return ret;
+	}
+
+	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+		return 0;
+
+	bytenr = btrfs_super_block_group_root(sb);
+	gen = btrfs_super_block_group_root_generation(sb);
+	level = btrfs_super_block_group_root_level(sb);
+	ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
+	if (ret)
+		btrfs_warn(fs_info, "couldn't read block group root");
 	return ret;
 }
 
@@ -2978,6 +3003,16 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 	int ret = 0;
 	int i;
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		struct btrfs_root *root;
+
+		root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
+					GFP_KERNEL);
+		if (!root)
+			return -ENOMEM;
+		fs_info->block_group_root = root;
+	}
+
 	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
 		if (handle_error) {
 			if (!IS_ERR(tree_root->node))
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 5e8bef4b7563a..2e10514ecda85 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
 
 static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
 {
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+		return fs_info->block_group_root;
 	return btrfs_extent_root(fs_info, 0);
 }
 
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0775ae9f44192..524fdb0ddd74b 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
 	{ BTRFS_QUOTA_TREE_OBJECTID,		"QUOTA_TREE"		},
 	{ BTRFS_UUID_TREE_OBJECTID,		"UUID_TREE"		},
 	{ BTRFS_FREE_SPACE_TREE_OBJECTID,	"FREE_SPACE_TREE"	},
+	{ BTRFS_BLOCK_GROUP_TREE_OBJECTID,	"BLOCK_GROUP_TREE"	},
 	{ BTRFS_DATA_RELOC_TREE_OBJECTID,	"DATA_RELOC_TREE"	},
 };
 
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0d729664b4b41..f068ff30d6541 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -53,6 +53,7 @@ struct btrfs_space_info;
 		{ BTRFS_TREE_RELOC_OBJECTID,	"TREE_RELOC"	},	\
 		{ BTRFS_UUID_TREE_OBJECTID,	"UUID_TREE"	},	\
 		{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },	\
+		{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
 		{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
 
 #define show_root_type(obj)						\
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 5416f1f1a77a8..b069752a8ecfd 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -53,6 +53,9 @@
 /* tracks free space in block groups. */
 #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
 
+/* Holds the block group items for extent tree v2. */
+#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
+
 /* device stats in the device tree */
 #define BTRFS_DEV_STATS_OBJECTID 0ULL
 
-- 
GitLab


From f7238e5094048f20ab6f92d7c2f5ec50fc247f26 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 15 Dec 2021 15:40:08 -0500
Subject: [PATCH 1365/1586] btrfs: add support for multiple global roots

With extent tree v2 you will be able to create multiple csum, extent,
and free space trees.  They will be used based on the block group, which
will now use the block_group_item->chunk_objectid to point to the set of
global roots that it will use.  When allocating new block groups we'll
simply mod the gigabyte offset of the block group against the number of
global roots we have and that will be the block groups global id.

>From there we can take the bytenr that we're modifying in the respective
tree, look up the block group and get that block groups corresponding
global root id.  From there we can get to the appropriate global root
for that bytenr.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c     | 28 +++++++++++++++++++--
 fs/btrfs/block-group.h     |  1 +
 fs/btrfs/ctree.h           |  2 ++
 fs/btrfs/disk-io.c         | 50 +++++++++++++++++++++++++++++++-------
 fs/btrfs/free-space-tree.c |  2 ++
 fs/btrfs/transaction.c     | 15 ++++++++++++
 fs/btrfs/tree-checker.c    | 21 ++++++++++++++--
 7 files changed, 106 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 8202ad6aa1317..3113f6d7f335f 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2006,6 +2006,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 	cache->length = key->offset;
 	cache->used = btrfs_stack_block_group_used(bgi);
 	cache->flags = btrfs_stack_block_group_flags(bgi);
+	cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
 
 	set_free_space_tree_thresholds(cache);
 
@@ -2288,7 +2289,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
 	spin_lock(&block_group->lock);
 	btrfs_set_stack_block_group_used(&bgi, block_group->used);
 	btrfs_set_stack_block_group_chunk_objectid(&bgi,
-				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+						   block_group->global_root_id);
 	btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
 	key.objectid = block_group->start;
 	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@@ -2444,6 +2445,27 @@ next:
 	btrfs_trans_release_chunk_metadata(trans);
 }
 
+/*
+ * For extent tree v2 we use the block_group_item->chunk_offset to point at our
+ * global root id.  For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
+ */
+static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
+{
+	u64 div = SZ_1G;
+	u64 index;
+
+	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+		return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+
+	/* If we have a smaller fs index based on 128MiB. */
+	if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
+		div = SZ_128M;
+
+	offset = div64_u64(offset, div);
+	div64_u64_rem(offset, fs_info->nr_global_roots, &index);
+	return index;
+}
+
 struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
 						 u64 bytes_used, u64 type,
 						 u64 chunk_offset, u64 size)
@@ -2464,6 +2486,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
 	cache->flags = type;
 	cache->last_byte_to_unpin = (u64)-1;
 	cache->cached = BTRFS_CACHE_FINISHED;
+	cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
+
 	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
 		cache->needs_free_space = 1;
 
@@ -2693,7 +2717,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
 	btrfs_set_stack_block_group_used(&bgi, cache->used);
 	btrfs_set_stack_block_group_chunk_objectid(&bgi,
-			BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+						   cache->global_root_id);
 	btrfs_set_stack_block_group_flags(&bgi, cache->flags);
 	write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
 	btrfs_mark_buffer_dirty(leaf);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 5878b7ce3b78e..93aabc68bb6a8 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -68,6 +68,7 @@ struct btrfs_block_group {
 	u64 bytes_super;
 	u64 flags;
 	u64 cache_generation;
+	u64 global_root_id;
 
 	/*
 	 * If the free space extent count exceeds this number, convert the block
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f460a7bb9ae8f..54513c4e891a8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1058,6 +1058,8 @@ struct btrfs_fs_info {
 	spinlock_t relocation_bg_lock;
 	u64 data_reloc_bg;
 
+	u64 nr_global_roots;
+
 	spinlock_t zone_active_bgs_lock;
 	struct list_head zone_active_bgs;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fe1349737edbc..ed62e81c0b669 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1289,12 +1289,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
 	return root;
 }
 
+static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+	struct btrfs_block_group *block_group;
+	u64 ret;
+
+	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+		return 0;
+
+	if (bytenr)
+		block_group = btrfs_lookup_block_group(fs_info, bytenr);
+	else
+		block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
+	ASSERT(block_group);
+	if (!block_group)
+		return 0;
+	ret = block_group->global_root_id;
+	btrfs_put_block_group(block_group);
+
+	return ret;
+}
+
 struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
 	struct btrfs_key key = {
 		.objectid = BTRFS_CSUM_TREE_OBJECTID,
 		.type = BTRFS_ROOT_ITEM_KEY,
-		.offset = 0,
+		.offset = btrfs_global_root_id(fs_info, bytenr),
 	};
 
 	return btrfs_global_root(fs_info, &key);
@@ -1305,7 +1326,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 	struct btrfs_key key = {
 		.objectid = BTRFS_EXTENT_TREE_OBJECTID,
 		.type = BTRFS_ROOT_ITEM_KEY,
-		.offset = 0,
+		.offset = btrfs_global_root_id(fs_info, bytenr),
 	};
 
 	return btrfs_global_root(fs_info, &key);
@@ -2096,7 +2117,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 {
 	const int next_backup = info->backup_root_index;
 	struct btrfs_root_backup *root_backup;
-	struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 
 	root_backup = info->super_for_commit->super_roots + next_backup;
 
@@ -2130,6 +2150,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 			btrfs_header_level(info->block_group_root->node));
 	} else {
 		struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+		struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 
 		btrfs_set_backup_extent_root(root_backup,
 					     extent_root->node->start);
@@ -2137,6 +2158,12 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 				btrfs_header_generation(extent_root->node));
 		btrfs_set_backup_extent_root_level(root_backup,
 					btrfs_header_level(extent_root->node));
+
+		btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
+		btrfs_set_backup_csum_root_gen(root_backup,
+					       btrfs_header_generation(csum_root->node));
+		btrfs_set_backup_csum_root_level(root_backup,
+						 btrfs_header_level(csum_root->node));
 	}
 
 	/*
@@ -2158,12 +2185,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 	btrfs_set_backup_dev_root_level(root_backup,
 				       btrfs_header_level(info->dev_root->node));
 
-	btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
-	btrfs_set_backup_csum_root_gen(root_backup,
-				       btrfs_header_generation(csum_root->node));
-	btrfs_set_backup_csum_root_level(root_backup,
-					 btrfs_header_level(csum_root->node));
-
 	btrfs_set_backup_total_bytes(root_backup,
 			     btrfs_super_total_bytes(info->super_copy));
 	btrfs_set_backup_bytes_used(root_backup,
@@ -2546,6 +2567,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 {
 	struct btrfs_fs_info *fs_info = tree_root->fs_info;
 	struct btrfs_root *root;
+	u64 max_global_id = 0;
 	int ret;
 	struct btrfs_key key = {
 		.objectid = objectid,
@@ -2581,6 +2603,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 			break;
 		btrfs_release_path(path);
 
+		/*
+		 * Just worry about this for extent tree, it'll be the same for
+		 * everybody.
+		 */
+		if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+			max_global_id = max(max_global_id, key.offset);
+
 		found = true;
 		root = read_tree_root_path(tree_root, path, &key);
 		if (IS_ERR(root)) {
@@ -2598,6 +2627,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 	}
 	btrfs_release_path(path);
 
+	if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+		fs_info->nr_global_roots = max_global_id + 1;
+
 	if (!found || ret) {
 		if (objectid == BTRFS_CSUM_TREE_OBJECTID)
 			set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 655aad0f9e1c8..0ae54d8c10d64 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
 		.offset = 0,
 	};
 
+	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
+		key.offset = block_group->global_root_id;
 	return btrfs_global_root(block_group->fs_info, &key);
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1f1c25db6f6b8..37f6ec2a3c569 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
 		super->cache_generation = 0;
 	if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
 		super->uuid_tree_generation = root_item->generation;
+
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		root_item = &fs_info->block_group_root->root_item;
+
+		super->block_group_root = root_item->bytenr;
+		super->block_group_root_generation = root_item->generation;
+		super->block_group_root_level = root_item->level;
+	}
 }
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	list_add_tail(&fs_info->chunk_root->dirty_list,
 		      &cur_trans->switch_commits);
 
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		btrfs_set_root_node(&fs_info->block_group_root->root_item,
+				    fs_info->block_group_root->node);
+		list_add_tail(&fs_info->block_group_root->dirty_list,
+			      &cur_trans->switch_commits);
+	}
+
 	switch_commit_roots(trans);
 
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 64c7d2a2bb3e4..e56c0107eea3a 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
 static int check_block_group_item(struct extent_buffer *leaf,
 				  struct btrfs_key *key, int slot)
 {
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
 	struct btrfs_block_group_item bgi;
 	u32 item_size = btrfs_item_size(leaf, slot);
+	u64 chunk_objectid;
 	u64 flags;
 	u64 type;
 
@@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
 
 	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
 			   sizeof(bgi));
-	if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
-		     BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
+	chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+		/*
+		 * We don't init the nr_global_roots until we load the global
+		 * roots, so this could be 0 at mount time.  If it's 0 we'll
+		 * just assume we're fine, and later we'll check against our
+		 * actual value.
+		 */
+		if (unlikely(fs_info->nr_global_roots &&
+			     chunk_objectid >= fs_info->nr_global_roots)) {
+			block_group_err(leaf, slot,
+	"invalid block group global root id, have %llu, needs to be <= %llu",
+					chunk_objectid,
+					fs_info->nr_global_roots);
+			return -EUCLEAN;
+		}
+	} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
 		block_group_err(leaf, slot,
 		"invalid block group chunk objectid, have %llu expect %llu",
 				btrfs_stack_block_group_chunk_objectid(&bgi),
-- 
GitLab


From f9a912a3c45f6c319c2bc11b5410ed1c0392eadd Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 26 Jan 2022 05:46:20 -0800
Subject: [PATCH 1366/1586] btrfs: zoned: make zone activation multi stripe
 capable

Currently activation of a zone only works if the block group isn't
spanning more than one zone.

This limitation is purely artificial and can be easily expanded to block
groups being places across multiple zones.

This is a preparation for allowing DUP and later more complex block-group
profiles on zoned btrfs.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 57 ++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index f559d517c7c44..e3d198405d39a 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1781,50 +1781,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 	struct btrfs_device *device;
 	u64 physical;
 	bool ret;
+	int i;
 
 	if (!btrfs_is_zoned(block_group->fs_info))
 		return true;
 
 	map = block_group->physical_map;
-	/* Currently support SINGLE profile only */
-	ASSERT(map->num_stripes == 1);
-	device = map->stripes[0].dev;
-	physical = map->stripes[0].physical;
-
-	if (device->zone_info->max_active_zones == 0)
-		return true;
 
 	spin_lock(&block_group->lock);
-
 	if (block_group->zone_is_active) {
 		ret = true;
 		goto out_unlock;
 	}
 
-	/* No space left */
-	if (block_group->alloc_offset == block_group->zone_capacity) {
-		ret = false;
-		goto out_unlock;
-	}
+	for (i = 0; i < map->num_stripes; i++) {
+		device = map->stripes[i].dev;
+		physical = map->stripes[i].physical;
 
-	if (!btrfs_dev_set_active_zone(device, physical)) {
-		/* Cannot activate the zone */
-		ret = false;
-		goto out_unlock;
-	}
+		if (device->zone_info->max_active_zones == 0)
+			continue;
+
+		/* No space left */
+		if (block_group->alloc_offset == block_group->zone_capacity) {
+			ret = false;
+			goto out_unlock;
+		}
+
+		if (!btrfs_dev_set_active_zone(device, physical)) {
+			/* Cannot activate the zone */
+			ret = false;
+			goto out_unlock;
+		}
+
+		/* Successfully activated all the zones */
+		if (i == map->num_stripes - 1)
+			block_group->zone_is_active = 1;
 
-	/* Successfully activated all the zones */
-	block_group->zone_is_active = 1;
 
+	}
 	spin_unlock(&block_group->lock);
 
-	/* For the active block group list */
-	btrfs_get_block_group(block_group);
+	if (block_group->zone_is_active) {
+		/* For the active block group list */
+		btrfs_get_block_group(block_group);
 
-	spin_lock(&fs_info->zone_active_bgs_lock);
-	ASSERT(list_empty(&block_group->active_bg_list));
-	list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
-	spin_unlock(&fs_info->zone_active_bgs_lock);
+		spin_lock(&fs_info->zone_active_bgs_lock);
+		list_add_tail(&block_group->active_bg_list,
+			      &fs_info->zone_active_bgs);
+		spin_unlock(&fs_info->zone_active_bgs_lock);
+	}
 
 	return true;
 
-- 
GitLab


From 4dcbb8ab31c1292aea6a3f240e19523f633320c2 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 26 Jan 2022 05:46:21 -0800
Subject: [PATCH 1367/1586] btrfs: zoned: make zone finishing multi stripe
 capable

Currently finishing of a zone only works if the block group isn't
spanning more than one zone.

This limitation is purely artificial and can be easily expanded to block
groups being places across multiple zones.

This is a preparation for allowing DUP and later more complex block-group
profiles on zoned btrfs.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index e3d198405d39a..987d2456d3985 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1845,19 +1845,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
 	struct btrfs_device *device;
 	u64 physical;
 	int ret = 0;
+	int i;
 
 	if (!btrfs_is_zoned(fs_info))
 		return 0;
 
 	map = block_group->physical_map;
-	/* Currently support SINGLE profile only */
-	ASSERT(map->num_stripes == 1);
-
-	device = map->stripes[0].dev;
-	physical = map->stripes[0].physical;
-
-	if (device->zone_info->max_active_zones == 0)
-		return 0;
 
 	spin_lock(&block_group->lock);
 	if (!block_group->zone_is_active) {
@@ -1909,25 +1902,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
 	btrfs_clear_data_reloc_bg(block_group);
 	spin_unlock(&block_group->lock);
 
-	ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
-			       physical >> SECTOR_SHIFT,
-			       device->zone_info->zone_size >> SECTOR_SHIFT,
-			       GFP_NOFS);
-	btrfs_dec_block_group_ro(block_group);
+	for (i = 0; i < map->num_stripes; i++) {
+		device = map->stripes[i].dev;
+		physical = map->stripes[i].physical;
 
-	if (!ret) {
-		btrfs_dev_clear_active_zone(device, physical);
+		if (device->zone_info->max_active_zones == 0)
+			continue;
 
-		spin_lock(&fs_info->zone_active_bgs_lock);
-		ASSERT(!list_empty(&block_group->active_bg_list));
-		list_del_init(&block_group->active_bg_list);
-		spin_unlock(&fs_info->zone_active_bgs_lock);
+		ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+				       physical >> SECTOR_SHIFT,
+				       device->zone_info->zone_size >> SECTOR_SHIFT,
+				       GFP_NOFS);
+
+		if (ret)
+			return ret;
 
-		/* For active_bg_list */
-		btrfs_put_block_group(block_group);
+		btrfs_dev_clear_active_zone(device, physical);
 	}
+	btrfs_dec_block_group_ro(block_group);
 
-	return ret;
+	spin_lock(&fs_info->zone_active_bgs_lock);
+	ASSERT(!list_empty(&block_group->active_bg_list));
+	list_del_init(&block_group->active_bg_list);
+	spin_unlock(&fs_info->zone_active_bgs_lock);
+
+	/* For active_bg_list */
+	btrfs_put_block_group(block_group);
+
+	return 0;
 }
 
 bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
-- 
GitLab


From dbfcc18f27218841ea326bd4072f18f3a165abc3 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 26 Jan 2022 05:46:22 -0800
Subject: [PATCH 1368/1586] btrfs: zoned: prepare for allowing DUP on zoned

Allow for a block-group to be placed on more than one physical zone.

This is a preparation for allowing DUP profiles for meta-data on a zoned
file-system.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 987d2456d3985..4f6f7afaa5a22 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1215,12 +1215,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 	struct btrfs_device *device;
 	u64 logical = cache->start;
 	u64 length = cache->length;
-	u64 physical = 0;
 	int ret;
 	int i;
 	unsigned int nofs_flag;
 	u64 *alloc_offsets = NULL;
 	u64 *caps = NULL;
+	u64 *physical = NULL;
 	unsigned long *active = NULL;
 	u64 last_alloc = 0;
 	u32 num_sequential = 0, num_conventional = 0;
@@ -1264,6 +1264,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		goto out;
 	}
 
+	physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
+	if (!physical) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
 	if (!active) {
 		ret = -ENOMEM;
@@ -1277,14 +1283,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		int dev_replace_is_ongoing = 0;
 
 		device = map->stripes[i].dev;
-		physical = map->stripes[i].physical;
+		physical[i] = map->stripes[i].physical;
 
 		if (device->bdev == NULL) {
 			alloc_offsets[i] = WP_MISSING_DEV;
 			continue;
 		}
 
-		is_sequential = btrfs_dev_is_sequential(device, physical);
+		is_sequential = btrfs_dev_is_sequential(device, physical[i]);
 		if (is_sequential)
 			num_sequential++;
 		else
@@ -1299,21 +1305,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		 * This zone will be used for allocation, so mark this zone
 		 * non-empty.
 		 */
-		btrfs_dev_clear_zone_empty(device, physical);
+		btrfs_dev_clear_zone_empty(device, physical[i]);
 
 		down_read(&dev_replace->rwsem);
 		dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
 		if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
-			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
+			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
 		up_read(&dev_replace->rwsem);
 
 		/*
 		 * The group is mapped to a sequential zone. Get the zone write
 		 * pointer to determine the allocation offset within the zone.
 		 */
-		WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
+		WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
 		nofs_flag = memalloc_nofs_save();
-		ret = btrfs_get_dev_zone(device, physical, &zone);
+		ret = btrfs_get_dev_zone(device, physical[i], &zone);
 		memalloc_nofs_restore(nofs_flag);
 		if (ret == -EIO || ret == -EOPNOTSUPP) {
 			ret = 0;
@@ -1339,7 +1345,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		case BLK_ZONE_COND_READONLY:
 			btrfs_err(fs_info,
 		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
-				  physical >> device->zone_info->zone_size_shift,
+				  physical[i] >> device->zone_info->zone_size_shift,
 				  rcu_str_deref(device->name), device->devid);
 			alloc_offsets[i] = WP_MISSING_DEV;
 			break;
@@ -1404,7 +1410,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		if (alloc_offsets[0] == WP_MISSING_DEV) {
 			btrfs_err(fs_info,
 			"zoned: cannot recover write pointer for zone %llu",
-				physical);
+				physical[0]);
 			ret = -EIO;
 			goto out;
 		}
@@ -1465,6 +1471,7 @@ out:
 		cache->physical_map = NULL;
 	}
 	bitmap_free(active);
+	kfree(physical);
 	kfree(caps);
 	kfree(alloc_offsets);
 	free_extent_map(em);
-- 
GitLab


From 265f7237dd258e1d4d0b3f84ffea92d7d1c08861 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 26 Jan 2022 05:46:23 -0800
Subject: [PATCH 1369/1586] btrfs: zoned: allow DUP on meta-data block groups

Allow creating or reading block-groups on a zoned device with DUP as a
meta-data profile.

This works because we're using the zoned_meta_io_lock and REQ_OP_WRITE
operations for meta-data on zoned btrfs, so all writes to meta-data zones
are aligned to the zone's write-pointer.

Upon loading of the block-group, it is ensured both zones do have the same
zone capacity and write-pointer offsets, so no extra machinery is needed
to keep the write-pointers in sync for the meta-data zones. If this
prerequisite is not met, loading of the block-group is refused.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 4f6f7afaa5a22..3aad1970ee430 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1419,6 +1419,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		cache->zone_is_active = test_bit(0, active);
 		break;
 	case BTRFS_BLOCK_GROUP_DUP:
+		if (map->type & BTRFS_BLOCK_GROUP_DATA) {
+			btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (alloc_offsets[0] == WP_MISSING_DEV) {
+			btrfs_err(fs_info,
+			"zoned: cannot recover write pointer for zone %llu",
+				physical[0]);
+			ret = -EIO;
+			goto out;
+		}
+		if (alloc_offsets[1] == WP_MISSING_DEV) {
+			btrfs_err(fs_info,
+			"zoned: cannot recover write pointer for zone %llu",
+				physical[1]);
+			ret = -EIO;
+			goto out;
+		}
+		if (alloc_offsets[0] != alloc_offsets[1]) {
+			btrfs_err(fs_info,
+			"zoned: write pointer offset mismatch of zones in DUP profile");
+			ret = -EIO;
+			goto out;
+		}
+		if (test_bit(0, active) != test_bit(1, active)) {
+			if (!btrfs_zone_activate(cache)) {
+				ret = -EIO;
+				goto out;
+			}
+		} else {
+			cache->zone_is_active = test_bit(0, active);
+		}
+		cache->alloc_offset = alloc_offsets[0];
+		cache->zone_capacity = min(caps[0], caps[1]);
+		break;
 	case BTRFS_BLOCK_GROUP_RAID1:
 	case BTRFS_BLOCK_GROUP_RAID0:
 	case BTRFS_BLOCK_GROUP_RAID10:
-- 
GitLab


From a55e65b80e318e8917faa781df9955549c6cdaff Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 1 Feb 2022 15:42:07 +0100
Subject: [PATCH 1370/1586] btrfs: replace BUILD_BUG_ON by static_assert

The static_assert introduced in 6bab69c65013 ("build_bug.h: add wrapper
for _Static_assert") has been supported by compilers for a long time
(gcc 4.6, clang 3.0) and can be used in header files. We don't need to
put BUILD_BUG_ON to random functions but rather keep it next to the
definition.

The exception here is the UAPI header btrfs_tree.h that could be
potentially included by userspace code and the static assert is not
defined (nor used in any other header).

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.h |  2 ++
 fs/btrfs/ctree.h       | 16 ++++++++--------
 fs/btrfs/inode.c       | 23 +++++++++--------------
 fs/btrfs/ioctl.c       |  5 +----
 fs/btrfs/sysfs.c       | 10 +++++-----
 5 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 56eef0821e3e3..7dbd14caab011 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -22,6 +22,8 @@ struct btrfs_inode;
 
 /* Maximum length of compressed data stored on disk */
 #define BTRFS_MAX_COMPRESSED		(SZ_128K)
+static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
+
 /* Maximum size of data before compression */
 #define BTRFS_MAX_UNCOMPRESSED		(SZ_128K)
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54513c4e891a8..89d2ed6c2eb13 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1639,25 +1639,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
 static inline u##bits btrfs_##name(const struct extent_buffer *eb,	\
 				   const type *s)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	return btrfs_get_##bits(eb, s, offsetof(type, member));		\
 }									\
 static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
 				    u##bits val)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	btrfs_set_##bits(eb, s, offsetof(type, member), val);		\
 }									\
 static inline u##bits btrfs_token_##name(struct btrfs_map_token *token,	\
 					 const type *s)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	return btrfs_get_token_##bits(token, s, offsetof(type, member));\
 }									\
 static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
 					  type *s, u##bits val)		\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	btrfs_set_token_##bits(token, s, offsetof(type, member), val);	\
 }
 
@@ -1688,8 +1688,8 @@ static inline void btrfs_set_##name(type *s, u##bits val)		\
 static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
 					   struct btrfs_dev_item *s)
 {
-	BUILD_BUG_ON(sizeof(u64) !=
-		     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+	static_assert(sizeof(u64) ==
+		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
 	return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
 					    total_bytes));
 }
@@ -1697,8 +1697,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
 						struct btrfs_dev_item *s,
 						u64 val)
 {
-	BUILD_BUG_ON(sizeof(u64) !=
-		     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+	static_assert(sizeof(u64) ==
+		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
 	WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
 	btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fc3e2f06cb026..1ce1ccf6d22c2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -629,7 +629,6 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
 again:
 	will_compress = 0;
 	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
-	BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
 	nr_pages = min_t(unsigned long, nr_pages,
 			BTRFS_MAX_COMPRESSED / PAGE_SIZE);
 
@@ -5609,21 +5608,17 @@ static struct inode *new_simple_dir(struct super_block *s,
 	return inode;
 }
 
+static_assert(BTRFS_FT_UNKNOWN == FT_UNKNOWN);
+static_assert(BTRFS_FT_REG_FILE == FT_REG_FILE);
+static_assert(BTRFS_FT_DIR == FT_DIR);
+static_assert(BTRFS_FT_CHRDEV == FT_CHRDEV);
+static_assert(BTRFS_FT_BLKDEV == FT_BLKDEV);
+static_assert(BTRFS_FT_FIFO == FT_FIFO);
+static_assert(BTRFS_FT_SOCK == FT_SOCK);
+static_assert(BTRFS_FT_SYMLINK == FT_SYMLINK);
+
 static inline u8 btrfs_inode_type(struct inode *inode)
 {
-	/*
-	 * Compile-time asserts that generic FT_* types still match
-	 * BTRFS_FT_* types
-	 */
-	BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
-	BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
-	BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
-	BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
-	BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
-	BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
-	BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
-	BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
-
 	return fs_umode_to_ftype(inode->i_mode);
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8b2538b7027de..413e2e72b4e24 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1527,6 +1527,7 @@ next:
 }
 
 #define CLUSTER_SIZE	(SZ_256K)
+static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 
 /*
  * Defrag one contiguous target range.
@@ -1672,7 +1673,6 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
 	LIST_HEAD(target_list);
 	int ret;
 
-	BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 	ret = defrag_collect_targets(inode, start, len, extent_thresh,
 				     newer_than, do_compress, false,
 				     &target_list, NULL);
@@ -1815,9 +1815,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
 		u64 last_scanned = cur;
 		u64 cluster_end;
 
-		/* The cluster size 256K should always be page aligned */
-		BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
-
 		if (btrfs_defrag_cancelled(fs_info)) {
 			ret = -EAGAIN;
 			break;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 5f4812fd8b509..17389a42a3ab7 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1107,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
 static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
 static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
 
+static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
+	      ARRAY_SIZE(btrfs_feature_attrs));
+static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
+	      ARRAY_SIZE(btrfs_feature_attrs[0]));
+
 static const u64 supported_feature_masks[FEAT_MAX] = {
 	[FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
 	[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
@@ -1275,11 +1280,6 @@ static void init_feature_attrs(void)
 	struct btrfs_feature_attr *fa;
 	int set, i;
 
-	BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
-		     ARRAY_SIZE(btrfs_feature_attrs));
-	BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
-		     ARRAY_SIZE(btrfs_feature_attrs[0]));
-
 	memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
 	memset(btrfs_unknown_feature_names, 0,
 	       sizeof(btrfs_unknown_feature_names));
-- 
GitLab


From f716fa4798df82f858afb9876f6d2e9f37a0d5c7 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Fri, 4 Feb 2022 13:00:22 +0100
Subject: [PATCH 1371/1586] btrfs: zoned: remove redundant assignment in
 btrfs_check_zoned_mode

Remove the redundant assignment to zone_info variable in
btrfs_check_zoned_mode function.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 3aad1970ee430..b7b5fac1c7790 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 		if (model == BLK_ZONED_HM ||
 		    (model == BLK_ZONED_HA && incompat_zoned) ||
 		    (model == BLK_ZONED_NONE && incompat_zoned)) {
-			struct btrfs_zoned_device_info *zone_info =
-				device->zone_info;
+			struct btrfs_zoned_device_info *zone_info;
 
 			zone_info = device->zone_info;
 			zoned_devices++;
-- 
GitLab


From 6b5b7a41d0704f2e3e864b0e4a2539beecefade6 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Fri, 4 Feb 2022 04:06:27 -0800
Subject: [PATCH 1372/1586] btrfs: stop checking for NULL return from
 btrfs_get_extent_fiemap()

In get_extent_skip_holes() we're checking the return of
btrfs_get_extent_fiemap() for an error pointer or NULL, but
btrfs_get_extent_fiemap() will never return NULL, only error pointers or
a valid extent_map.

The other caller of btrfs_get_extent_fiemap(), find_desired_extent(),
correctly only checks for error-pointers.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c91060d103ae..6bafdec691eb8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5390,7 +5390,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
 			break;
 		len = ALIGN(len, sectorsize);
 		em = btrfs_get_extent_fiemap(inode, offset, len);
-		if (IS_ERR_OR_NULL(em))
+		if (IS_ERR(em))
 			return em;
 
 		/* if this isn't a hole return it */
-- 
GitLab


From b4e098a97f17de10e6773191669bc27870050941 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:45 +0000
Subject: [PATCH 1373/1586] btrfs: remove unnecessary leaf free space checks
 when pushing items

When trying to push items from a leaf into its left and right neighbours,
we lock the left or right leaf, check if it has the required minimum free
space, COW the leaf and then check again if it has the minimum required
free space. This second check is pointless:

1) Most and foremost because it's not needed. We have a write lock on the
   leaf and on its parent node, so no one can come in and change either
   the pre-COW or post-COW version of the leaf for the whole duration of
   the push_leaf_left() and push_leaf_right() calls;

2) The call to btrfs_leaf_free_space() is not trivial, it has a fair
   amount of arithmetic operations and access to fields in the leaf's
   header and items, so it's not very cheap.

So remove the duplicated free space checks.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

The last patch in the series has some performance test result in its
changelog.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a7db3f6f1b7b0..cf3d570825242 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2990,16 +2990,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 	if (free_space < data_size)
 		goto out_unlock;
 
-	/* cow and double check */
 	ret = btrfs_cow_block(trans, root, right, upper,
 			      slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
 	if (ret)
 		goto out_unlock;
 
-	free_space = btrfs_leaf_free_space(right);
-	if (free_space < data_size)
-		goto out_unlock;
-
 	left_nritems = btrfs_header_nritems(left);
 	if (left_nritems == 0)
 		goto out_unlock;
@@ -3224,7 +3219,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 		goto out;
 	}
 
-	/* cow and double check */
 	ret = btrfs_cow_block(trans, root, left,
 			      path->nodes[1], slot - 1, &left,
 			      BTRFS_NESTING_LEFT_COW);
@@ -3235,12 +3229,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 		goto out;
 	}
 
-	free_space = btrfs_leaf_free_space(left);
-	if (free_space < data_size) {
-		ret = 1;
-		goto out;
-	}
-
 	if (check_sibling_keys(left, right)) {
 		ret = -EUCLEAN;
 		goto out;
-- 
GitLab


From 7c4063d19efcdf1973bcf06f15493da4a18d5524 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:46 +0000
Subject: [PATCH 1374/1586] btrfs: avoid unnecessary COW of leaves when
 deleting items from a leaf

When we delete items from a leaf, if we end up with more than two thirds
of unused leaf space, we try to delete the leaf by moving all its items
into its left and right neighbour leaves. Sometimes that is not possible
because there is not enough free space in the left and right leaves, and
in that case we end up not deleting our leaf.

The way we are doing this is not ideal and can be improved in the
following ways:

1) When we call push_leaf_left(), we pass a value of 1 byte to the data
   size parameter of push_leaf_left(). This is not realistic value because
   no item can have a size less than 25 bytes, which is the size of struct
   btrfs_item. This means that means that if the left leaf has not enough
   free space to push any item, we end up COWing it even if we end up not
   changing its content at all.

   COWing that leaf means allocating a new metadata extent, marking it
   dirty and doing more IO when committing a transaction or when syncing a
   log tree. For a log tree case, it's particularly more important to
   avoid the useless COW operation, as more IO can imply a higher latency
   for an fsync operation.

   So instead of passing 1 as the minimum data size for push_leaf_left(),
   pass the size of the first item in our leaf, as we don't want to COW
   the left leaf if we can't at least push the first item of our leaf;

2) When we call push_leaf_right(), we also pass a value of 1 byte as the
   data size parameter of push_leaf_right(). Like the previous case, it
   will also result in COWing the right leaf even if we are not able to
   move any items into it, since there can't be any item with a size
   smaller than 25 bytes (the size of struct btrfs_item).

   So instead of passing 1 as the minimum data size to push_leaf_right(),
   pass a size that corresponds to the sum of the size of all the
   remaining items in our leaf. We are not interested in moving less than
   that, because if we do, we are not able to delete our leaf and we have
   COWed the right leaf for nothing. Plus, moving only some of the items
   of our leaf, it means an even less balanced tree.

   Just like the previous case, we want to avoid the useless COW of the
   right leaf, this way we don't have to spend time allocating one new
   metadata extent, and doing more IO when committing a transaction or
   syncing a log tree. For the log tree case it's specially more important
   because more IO can result in a higher latency for a fsync operation.

So adjust the minimum data size passed to push_leaf_left() and
push_leaf_right() as mentioned above.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

Not being able to delete a leaf that became less than 1/3 full after
deleting items from it is actually common. For example, for the fio test
mentioned in the changelog of patch 6/6, we are only able to delete a
leaf at btrfs_del_items() about 5.3% of the time, due to its left and
right neighbour leaves not having enough free space to push all the
remaining items into them.

The last patch in the series has some performance test result in its
changelog.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cf3d570825242..5322140f4d222 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4215,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 			fixup_low_keys(path, &disk_key, 1);
 		}
 
-		/* delete the leaf if it is mostly empty */
+		/*
+		 * Try to delete the leaf if it is mostly empty. We do this by
+		 * trying to move all its items into its left and right neighbours.
+		 * If we can't move all the items, then we don't delete it - it's
+		 * not ideal, but future insertions might fill the leaf with more
+		 * items, or items from other leaves might be moved later into our
+		 * leaf due to deletions on those leaves.
+		 */
 		if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
+			u32 min_push_space;
+
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
 			 */
 			slot = path->slots[1];
 			atomic_inc(&leaf->refs);
-
-			wret = push_leaf_left(trans, root, path, 1, 1,
-					      1, (u32)-1);
+			/*
+			 * We want to be able to at least push one item to the
+			 * left neighbour leaf, and that's the first item.
+			 */
+			min_push_space = sizeof(struct btrfs_item) +
+				btrfs_item_size(leaf, 0);
+			wret = push_leaf_left(trans, root, path, 0,
+					      min_push_space, 1, (u32)-1);
 			if (wret < 0 && wret != -ENOSPC)
 				ret = wret;
 
 			if (path->nodes[0] == leaf &&
 			    btrfs_header_nritems(leaf)) {
-				wret = push_leaf_right(trans, root, path, 1,
-						       1, 1, 0);
+				/*
+				 * If we were not able to push all items from our
+				 * leaf to its left neighbour, then attempt to
+				 * either push all the remaining items to the
+				 * right neighbour or none. There's no advantage
+				 * in pushing only some items, instead of all, as
+				 * it's pointless to end up with a leaf having
+				 * too few items while the neighbours can be full
+				 * or nearly full.
+				 */
+				nritems = btrfs_header_nritems(leaf);
+				min_push_space = leaf_space_used(leaf, 0, nritems);
+				wret = push_leaf_right(trans, root, path, 0,
+						       min_push_space, 1, 0);
 				if (wret < 0 && wret != -ENOSPC)
 					ret = wret;
 			}
-- 
GitLab


From 0cae23b66a5f42b8071f42757f13d87c90d3600b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:47 +0000
Subject: [PATCH 1375/1586] btrfs: avoid unnecessary computation when deleting
 items from a leaf

When deleting items from a leaf, we always compute the sum of the data
sizes of the items that are going to be deleted. However we only use
that sum when the last item to delete is behind the last item in the
leaf. This unnecessarily wastes CPU time when we are deleting either
the whole leaf or from some slot > 0 up to the last item in the leaf,
and both of these cases are common (e.g. truncation operation, either
as a result of truncate(2) or when logging inodes, deleting checksums
after removing a large enough extent, etc).

So compute only the sum of the data sizes if the last item to be
deleted does not match the last item in the leaf.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

The last patch in the series has some performance test result in its
changelog.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5322140f4d222..1bb5335c6d757 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4158,24 +4158,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *leaf;
-	u32 last_off;
-	u32 dsize = 0;
 	int ret = 0;
 	int wret;
-	int i;
 	u32 nritems;
 
 	leaf = path->nodes[0];
-	last_off = btrfs_item_offset(leaf, slot + nr - 1);
-
-	for (i = 0; i < nr; i++)
-		dsize += btrfs_item_size(leaf, slot + i);
-
 	nritems = btrfs_header_nritems(leaf);
 
 	if (slot + nr != nritems) {
-		int data_end = leaf_data_end(leaf);
+		const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
+		const int data_end = leaf_data_end(leaf);
 		struct btrfs_map_token token;
+		u32 dsize = 0;
+		int i;
+
+		for (i = 0; i < nr; i++)
+			dsize += btrfs_item_size(leaf, slot + i);
 
 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
 			      data_end + dsize,
-- 
GitLab


From 7ecb4c31e773ed6950c6eafcb28146908abab630 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:48 +0000
Subject: [PATCH 1376/1586] btrfs: remove constraint on number of visited
 leaves when replacing extents

At btrfs_drop_extents(), we try to replace a range of file extent items
with a new file extent in a single btree search, to avoid the need to do
a search for deletion, followed by a path release and followed by yet
another search for insertion.

When I originally added that optimization, in commit 1acae57b161ef1
("Btrfs: faster file extent item replace operations"), I left a constraint
to do the fast replace only if we visited a single leaf. That was because
in the most common case we find all file extent items that need to be
deleted (or trimmed) in a single leaf, however it can work for other
common cases like when we need to delete a few file extent items located
at the end of a leaf and a few more located at the beginning of the next
leaf. The key for the new file extent item is greater than the key of
any deleted or trimmed file extent item from previous leaves, so we are
fine to use the last leaf that we found as long as we are holding a
write lock on it - even if the new key ends up at slot 0, as if that's
the case, the btree search has obtained a write lock on any upper nodes
that need to have a key pointer updated.

So removed the constraint that limits the optimization to the case where
we visited only a single leaf.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

The last patch in the series has some performance test result in its
changelog.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a0179cc62913b..391b05019dbdf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -691,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int modify_tree = -1;
 	int update_refs;
 	int found = 0;
-	int leafs_visited = 0;
 	struct btrfs_path *path = args->path;
 
 	args->bytes_found = 0;
@@ -729,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 				path->slots[0]--;
 		}
 		ret = 0;
-		leafs_visited++;
 next_slot:
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -741,7 +739,6 @@ next_slot:
 				ret = 0;
 				break;
 			}
-			leafs_visited++;
 			leaf = path->nodes[0];
 			recow = 1;
 		}
@@ -987,7 +984,7 @@ delete_extent_item:
 	 * which case it unlocked our path, so check path->locks[0] matches a
 	 * write lock.
 	 */
-	if (!ret && args->replace_extent && leafs_visited == 1 &&
+	if (!ret && args->replace_extent &&
 	    path->locks[0] == BTRFS_WRITE_LOCK &&
 	    btrfs_leaf_free_space(leaf) >=
 	    sizeof(struct btrfs_item) + args->extent_item_size) {
-- 
GitLab


From d84575317078239ad2835bc42fb4d2d96ab51646 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:49 +0000
Subject: [PATCH 1377/1586] btrfs: remove useless path release in the fast
 fsync path

There's no point in calling btrfs_release_path() after finishing the loop
that logs the modified extents, since log_one_extent() returns with the
path released. In case the list of extents is empty, the path is already
released, so there's no need for that case as well.
So just remove that unnecessary btrfs_release_path() call.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

The last patch in the series has some performance test result in its
changelog.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 24f95d7b51900..5b14339d269ff 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4964,7 +4964,6 @@ process:
 	WARN_ON(!list_empty(&extents));
 	write_unlock(&tree->lock);
 
-	btrfs_release_path(path);
 	if (!ret)
 		ret = btrfs_log_prealloc_extents(trans, inode, path);
 	if (ret)
-- 
GitLab


From e1f53ed874e2f242e7009c3123f9776c5aec61c3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 14:55:50 +0000
Subject: [PATCH 1378/1586] btrfs: prepare extents to be logged before locking
 a log tree path

When we want to log an extent, in the fast fsync path, we obtain a path
to the leaf that will hold the file extent item either through a deletion
search, via btrfs_drop_extents(), or through an insertion search using
btrfs_insert_empty_item(). After that we fill the file extent item's
fields one by one directly on the leaf.

Instead of doing that, we could prepare the file extent item before
obtaining a btree path, and then copy the prepared extent item with a
single operation once we get the path. This helps avoid some contention
on the log tree, since we are holding write locks for longer than
necessary, especially in the case where the path is obtained via
btrfs_drop_extents() through a deletion search, which always keeps a
write lock on the nodes at levels 1 and 2 (besides the leaf).

This change does that, we prepare the file extent item that is going to
be inserted before acquiring a path, and then copy it into a leaf using
a single copy operation once we get a path.

This change if part of a patchset that is comprised of the following
patches:

  1/6 btrfs: remove unnecessary leaf free space checks when pushing items
  2/6 btrfs: avoid unnecessary COW of leaves when deleting items from a leaf
  3/6 btrfs: avoid unnecessary computation when deleting items from a leaf
  4/6 btrfs: remove constraint on number of visited leaves when replacing extents
  5/6 btrfs: remove useless path release in the fast fsync path
  6/6 btrfs: prepare extents to be logged before locking a log tree path

The following test was run to measure the impact of the whole patchset:

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi
  MOUNT_OPTIONS="-o ssd"
  MKFS_OPTIONS="-R free-space-tree -O no-holes"

  NUM_JOBS=8
  FILE_SIZE=128M
  RUN_TIME=200

  cat <<EOF > /tmp/fio-job.ini
  [writers]
  rw=randwrite
  fsync=1
  fallocate=none
  group_reporting=1
  direct=0
  bssplit=4k/20:8k/20:16k/20:32k/10:64k/10:128k/5:256k/5:512k/5:1m/5
  ioengine=sync
  filesize=$FILE_SIZE
  runtime=$RUN_TIME
  time_based
  directory=$MNT
  numjobs=$NUM_JOBS
  thread
  EOF

  echo "performance" | \
      tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

  echo
  echo "Using config:"
  echo
  cat /tmp/fio-job.ini
  echo

  umount $MNT &> /dev/null
  mkfs.btrfs -f $MKFS_OPTIONS $DEV
  mount $MOUNT_OPTIONS $DEV $MNT

  fio /tmp/fio-job.ini

  umount $MNT

The test ran inside a VM (8 cores, 32G of RAM) with the target disk
mapping to a raw NVMe device, and using a non-debug kernel config
(Debian's default config).

Before the patchset:

WRITE: bw=116MiB/s (122MB/s), 116MiB/s-116MiB/s (122MB/s-122MB/s), io=22.7GiB (24.4GB), run=200013-200013msec

After the patchset:

WRITE: bw=125MiB/s (131MB/s), 125MiB/s-125MiB/s (131MB/s-131MB/s), io=24.3GiB (26.1GB), run=200007-200007msec

A 7.8% gain on throughput and +7.0% more IO done in the same period of
time (200 seconds).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 64 +++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 5b14339d269ff..d5b7b565036db 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4675,14 +4675,34 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_drop_extents_args drop_args = { 0 };
 	struct btrfs_root *log = inode->root->log_root;
-	struct btrfs_file_extent_item *fi;
+	struct btrfs_file_extent_item fi = { 0 };
 	struct extent_buffer *leaf;
-	struct btrfs_map_token token;
 	struct btrfs_key key;
 	u64 extent_offset = em->start - em->orig_start;
 	u64 block_len;
 	int ret;
 
+	btrfs_set_stack_file_extent_generation(&fi, trans->transid);
+	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+		btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_PREALLOC);
+	else
+		btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
+
+	block_len = max(em->block_len, em->orig_block_len);
+	if (em->compress_type != BTRFS_COMPRESS_NONE) {
+		btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start);
+		btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
+	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+		btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start -
+							extent_offset);
+		btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
+	}
+
+	btrfs_set_stack_file_extent_offset(&fi, extent_offset);
+	btrfs_set_stack_file_extent_num_bytes(&fi, em->len);
+	btrfs_set_stack_file_extent_ram_bytes(&fi, em->ram_bytes);
+	btrfs_set_stack_file_extent_compression(&fi, em->compress_type);
+
 	ret = log_extent_csums(trans, inode, log, em, ctx);
 	if (ret)
 		return ret;
@@ -4701,7 +4721,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 		drop_args.start = em->start;
 		drop_args.end = em->start + em->len;
 		drop_args.replace_extent = true;
-		drop_args.extent_item_size = sizeof(*fi);
+		drop_args.extent_item_size = sizeof(fi);
 		ret = btrfs_drop_extents(trans, log, inode, &drop_args);
 		if (ret)
 			return ret;
@@ -4713,44 +4733,14 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 		key.offset = em->start;
 
 		ret = btrfs_insert_empty_item(trans, log, path, &key,
-					      sizeof(*fi));
+					      sizeof(fi));
 		if (ret)
 			return ret;
 	}
 	leaf = path->nodes[0];
-	btrfs_init_map_token(&token, leaf);
-	fi = btrfs_item_ptr(leaf, path->slots[0],
-			    struct btrfs_file_extent_item);
-
-	btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
-	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-		btrfs_set_token_file_extent_type(&token, fi,
-						 BTRFS_FILE_EXTENT_PREALLOC);
-	else
-		btrfs_set_token_file_extent_type(&token, fi,
-						 BTRFS_FILE_EXTENT_REG);
-
-	block_len = max(em->block_len, em->orig_block_len);
-	if (em->compress_type != BTRFS_COMPRESS_NONE) {
-		btrfs_set_token_file_extent_disk_bytenr(&token, fi,
-							em->block_start);
-		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
-	} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
-		btrfs_set_token_file_extent_disk_bytenr(&token, fi,
-							em->block_start -
-							extent_offset);
-		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
-	} else {
-		btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
-		btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
-	}
-
-	btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
-	btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
-	btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
-	btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
-	btrfs_set_token_file_extent_encryption(&token, fi, 0);
-	btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
+	write_extent_buffer(leaf, &fi,
+			    btrfs_item_ptr_offset(leaf, path->slots[0]),
+			    sizeof(fi));
 	btrfs_mark_buffer_dirty(leaf);
 
 	btrfs_release_path(path);
-- 
GitLab


From c03475506e3e7fed2c428a953f1fcc01c04aabd5 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 15:36:42 +0000
Subject: [PATCH 1379/1586] btrfs: stop checking for NULL return from
 btrfs_get_extent()

At extent_io.c, in the read page and write page code paths, we are testing
if the return value from btrfs_get_extent() can be NULL. However that is
not possible, as btrfs_get_extent() always returns either an error pointer
or a (non-NULL) pointer to an extent map structure.

Everywhere else outside extent_io.c we never check for NULL, we always
treat any returned value as a non-NULL pointer if it does not encode an
error.

So check only for the IS_ERR() case at extent_io.c.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6bafdec691eb8..37589fd440b5a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3534,7 +3534,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 	}
 
 	em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
-	if (em_cached && !IS_ERR_OR_NULL(em)) {
+	if (em_cached && !IS_ERR(em)) {
 		BUG_ON(*em_cached);
 		refcount_inc(&em->refs);
 		*em_cached = em;
@@ -3608,7 +3608,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		}
 		em = __get_extent_map(inode, page, pg_offset, cur,
 				      end - cur + 1, em_cached);
-		if (IS_ERR_OR_NULL(em)) {
+		if (IS_ERR(em)) {
 			unlock_extent(tree, cur, end);
 			end_page_read(page, false, cur, end + 1 - cur);
 			break;
@@ -3951,7 +3951,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		}
 
 		em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
-		if (IS_ERR_OR_NULL(em)) {
+		if (IS_ERR(em)) {
 			btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
 			ret = PTR_ERR_OR_ZERO(em);
 			break;
-- 
GitLab


From bbf0ea7ea3855b35a2ab0b8d0000ce06cc58e4bf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 15:36:43 +0000
Subject: [PATCH 1380/1586] btrfs: fix lost error return value when reading a
 data page

At btrfs_do_readpage(), if we get an error when trying to lookup for an
extent map, we end up marking the page with the error bit, clearing
the uptodate bit on it, and doing everything else that should be done.
However we return success (0) to the caller, when we should return the
error encoded in the extent map pointer. So fix that by returning the
error encoded in the pointer.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 1 +
 fs/btrfs/inode.c     | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 37589fd440b5a..8b6e9ca3170a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3611,6 +3611,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		if (IS_ERR(em)) {
 			unlock_extent(tree, cur, end);
 			end_page_read(page, false, cur, end + 1 - cur);
+			ret = PTR_ERR(em);
 			break;
 		}
 		extent_offset = cur - em->start;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ce1ccf6d22c2..ffe327090277a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8118,8 +8118,13 @@ int btrfs_readpage(struct file *file, struct page *page)
 	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
 
 	ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
-	if (bio_ctrl.bio)
-		ret = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
+	if (bio_ctrl.bio) {
+		int ret2;
+
+		ret2 = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
+		if (ret == 0)
+			ret = ret2;
+	}
 	return ret;
 }
 
-- 
GitLab


From ad3fc7946b1829213bbdbb2b9ad0d124b31ae4a7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 15:36:44 +0000
Subject: [PATCH 1381/1586] btrfs: remove no longer used counter when reading
 data page

After commit 92082d40976ed0 ("btrfs: integrate page status update for
data read path into begin/end_page_read"), the 'nr' counter at
btrfs_do_readpage() is no longer used, we increment it but we never
read from it. So just remove it.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8b6e9ca3170a9..13155e57e8c23 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3563,7 +3563,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 	u64 cur_end;
 	struct extent_map *em;
 	int ret = 0;
-	int nr = 0;
 	size_t pg_offset = 0;
 	size_t iosize;
 	size_t blocksize = inode->i_sb->s_blocksize;
@@ -3722,9 +3721,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 					 end_bio_extent_readpage, 0,
 					 this_bio_flag,
 					 force_bio_submit);
-		if (!ret) {
-			nr++;
-		} else {
+		if (ret) {
 			unlock_extent(tree, cur, cur + iosize - 1);
 			end_page_read(page, false, cur, iosize);
 			goto out;
-- 
GitLab


From 6d3b050efa079f5bc4d5e2a8f37f0dc0345a2096 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 3 Feb 2022 15:36:45 +0000
Subject: [PATCH 1382/1586] btrfs: assert we have a write lock when removing
 and replacing extent maps

Removing or replacing an extent map requires holding a write lock on the
extent map's tree. We currently do that everywhere, except in one of the
self tests, where it's harmless since there's no concurrency.

In order to catch possible races in the future, assert that we are holding
a write lock on the extent map tree before removing or replacing an extent
map in the tree, and update the self test to obtain a write lock before
removing extent maps.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_map.c             | 4 ++++
 fs/btrfs/tests/extent-map-tests.c | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index c28ceddefae42..6fee14ce2e6b1 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -492,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
  */
 void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
 {
+	lockdep_assert_held_write(&tree->lock);
+
 	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
 	rb_erase_cached(&em->rb_node, &tree->map);
 	if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
@@ -506,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
 			    struct extent_map *new,
 			    int modified)
 {
+	lockdep_assert_held_write(&tree->lock);
+
 	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
 	ASSERT(extent_map_in_tree(cur));
 	if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 319fed82d741f..c5b3a631bf4fb 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
 	struct extent_map *em;
 	struct rb_node *node;
 
+	write_lock(&em_tree->lock);
 	while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
 		node = rb_first_cached(&em_tree->map);
 		em = rb_entry(node, struct extent_map, rb_node);
@@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
 #endif
 		free_extent_map(em);
 	}
+	write_unlock(&em_tree->lock);
 }
 
 /*
-- 
GitLab


From 40e7efe057ae7446915dc9d95bbfe4c6c7329d89 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 8 Feb 2022 13:31:19 +0800
Subject: [PATCH 1383/1586] btrfs: populate extent_map::generation when reading
 from disk

When btrfs_get_extent() tries to get some file extent from disk, it
never populates extent_map::generation, leaving the value to be 0.

On the other hand, for extent map generated by IO, it will get its
generation properly set at finish_ordered_io()

 finish_ordered_io()
 |- unpin_extent_cache(gen = trans->transid)
    |- em->generation = gen;

[CAUSE]
Since extent_map::generation is mostly used by fsync code, and for fsync
they only care about modified extents, which all have their
em::generation > 0.

Thus it's fine to not populate em read from disk for fsync.

[CORNER CASE]
However autodefrag also relies on em::generation to determine if one
extent needs to be defragged.

This unpopulated extent_map::generation can prevent the following
autodefrag case from working:

	mkfs.btrfs -f $dev
	mount $dev $mnt -o autodefrag

	# initial write to queue the inode for autodefrag
	xfs_io -f -c "pwrite 0 4k" $mnt/file
	sync

	# Real fragmented write
	xfs_io -f -s -c "pwrite -b 4096 0 32k" $mnt/file
	sync
	echo "=== before autodefrag ==="
	xfs_io -c "fiemap -v" $mnt/file

	# Drop cache to force em to be read from disk
	echo 3 > /proc/sys/vm/drop_caches
	mount -o remount,commit=1 $mnt
	sleep 3
	sync

	echo "=== After autodefrag ==="
	xfs_io -c "fiemap -v" $mnt/file
	umount $mnt

The result looks like this:

  === before autodefrag ===
  /mnt/btrfs/file:
   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
     0: [0..15]:         26672..26687        16   0x0
     1: [16..31]:        26656..26671        16   0x0
     2: [32..47]:        26640..26655        16   0x0
     3: [48..63]:        26624..26639        16   0x1
  === After autodefrag ===
  /mnt/btrfs/file:
   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
     0: [0..15]:         26672..26687        16   0x0
     1: [16..31]:        26656..26671        16   0x0
     2: [32..47]:        26640..26655        16   0x0
     3: [48..63]:        26624..26639        16   0x1

This fragmented 32K will not be defragged by autodefrag.

[FIX]
To make things less weird, just populate extent_map::generation when
reading file extents from disk.

This would make above fragmented extents to be properly defragged:

  == before autodefrag ===
  /mnt/btrfs/file:
   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
     0: [0..15]:         26672..26687        16   0x0
     1: [16..31]:        26656..26671        16   0x0
     2: [32..47]:        26640..26655        16   0x0
     3: [48..63]:        26624..26639        16   0x1
  === After autodefrag ===
  /mnt/btrfs/file:
   EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
     0: [0..63]:         26688..26751        64   0x1

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90c5c38836ab3..9a3de652ada84 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -1211,6 +1211,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 	extent_start = key.offset;
 	extent_end = btrfs_file_extent_end(path);
 	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+	em->generation = btrfs_file_extent_generation(leaf, fi);
 	if (type == BTRFS_FILE_EXTENT_REG ||
 	    type == BTRFS_FILE_EXTENT_PREALLOC) {
 		em->start = extent_start;
-- 
GitLab


From dc4a4bdb3f22f9a97a59cdf3c6a1257435ff3700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= <davispuh@gmail.com>
Date: Wed, 2 Feb 2022 23:44:54 +0200
Subject: [PATCH 1384/1586] btrfs: add lzo workspace buffer length constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It makes it more readable for length checking and is be used repeatedly.

Signed-off-by: Dāvis Mosāns <davispuh@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/lzo.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index e6e28a9c79877..430ad36b8b080 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -55,6 +55,9 @@
  * 0x1000   | SegHdr N+1| Data payload N+1 ...                |
  */
 
+#define WORKSPACE_BUF_LENGTH	(lzo1x_worst_compress(PAGE_SIZE))
+#define WORKSPACE_CBUF_LENGTH	(lzo1x_worst_compress(PAGE_SIZE))
+
 struct workspace {
 	void *mem;
 	void *buf;	/* where decompressed data goes */
@@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
 		return ERR_PTR(-ENOMEM);
 
 	workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
-	workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
-	workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
+	workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
+	workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
 	if (!workspace->mem || !workspace->buf || !workspace->cbuf)
 		goto fail;
 
@@ -380,7 +383,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 		kunmap(cur_page);
 		cur_in += LZO_LEN;
 
-		if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
+		if (seg_len > WORKSPACE_CBUF_LENGTH) {
 			/*
 			 * seg_len shouldn't be larger than we have allocated
 			 * for workspace->cbuf
@@ -433,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	size_t in_len;
 	size_t out_len;
-	size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
+	size_t max_segment_len = WORKSPACE_BUF_LENGTH;
 	int ret = 0;
 	char *kaddr;
 	unsigned long bytes;
-- 
GitLab


From a8f6f619e44402f7005da79470835b98dc88450f Mon Sep 17 00:00:00 2001
From: Sidong Yang <realwakka@gmail.com>
Date: Sun, 6 Feb 2022 12:52:48 +0000
Subject: [PATCH 1385/1586] btrfs: qgroup: remove duplicated check in adding
 qgroup relations

Removes duplicated check when adding qgroup relations.
btrfs_add_qgroup_relations function adds relations by calling
add_relation_rb(). add_relation_rb() checks that member/parentid exists
in current qgroup_tree. But it already checked before calling the
function. It seems that we don't need to double check.

Add new function __add_relation_rb() that adds relations with
qgroup structures and makes old function use the new one. And it makes
btrfs_add_qgroup_relation() function work without double checks by
calling the new function.

Signed-off-by: Sidong Yang <realwakka@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ add comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d437511ed3540..a22b901ac25a7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -258,16 +258,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 	return 0;
 }
 
-/* must be called with qgroup_lock held */
-static int add_relation_rb(struct btrfs_fs_info *fs_info,
-			   u64 memberid, u64 parentid)
+/*
+ * Add relation specified by two qgroups.
+ *
+ * Must be called with qgroup_lock held.
+ *
+ * Return: 0        on success
+ *         -ENOENT  if one of the qgroups is NULL
+ *         <0       other errors
+ */
+static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
 {
-	struct btrfs_qgroup *member;
-	struct btrfs_qgroup *parent;
 	struct btrfs_qgroup_list *list;
 
-	member = find_qgroup_rb(fs_info, memberid);
-	parent = find_qgroup_rb(fs_info, parentid);
 	if (!member || !parent)
 		return -ENOENT;
 
@@ -283,7 +286,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-/* must be called with qgroup_lock held */
+/*
+ * Add relation specified by two qgoup ids.
+ *
+ * Must be called with qgroup_lock held.
+ *
+ * Return: 0        on success
+ *         -ENOENT  if one of the ids does not exist
+ *         <0       other errors
+ */
+static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
+{
+	struct btrfs_qgroup *member;
+	struct btrfs_qgroup *parent;
+
+	member = find_qgroup_rb(fs_info, memberid);
+	parent = find_qgroup_rb(fs_info, parentid);
+
+	return __add_relation_rb(member, parent);
+}
+
+/* Must be called with qgroup_lock held */
 static int del_relation_rb(struct btrfs_fs_info *fs_info,
 			   u64 memberid, u64 parentid)
 {
@@ -1457,7 +1480,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 	}
 
 	spin_lock(&fs_info->qgroup_lock);
-	ret = add_relation_rb(fs_info, src, dst);
+	ret = __add_relation_rb(member, parent);
 	if (ret < 0) {
 		spin_unlock(&fs_info->qgroup_lock);
 		goto out;
-- 
GitLab


From 457b0a3d6e266f40a367205e224b971e4da7f5f8 Mon Sep 17 00:00:00 2001
From: Sidong Yang <realwakka@gmail.com>
Date: Fri, 11 Feb 2022 13:48:29 +0000
Subject: [PATCH 1386/1586] btrfs: qgroup: remove outdated TODO comments

These comments are old, outdated and not very specific. It seems that it
doesn't help to inspire anybody to work on that.  So we remove them.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Sidong Yang <realwakka@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a22b901ac25a7..1866b1f0da01a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -25,18 +25,6 @@
 #include "sysfs.h"
 #include "tree-mod-log.h"
 
-/* TODO XXX FIXME
- *  - subvol delete -> delete when ref goes to 0? delete limits also?
- *  - reorganize keys
- *  - compressed
- *  - sync
- *  - copy also limits on subvol creation
- *  - limit
- *  - caches for ulists
- *  - performance benchmarks
- *  - check all ioctl parameters
- */
-
 /*
  * Helpers to access qgroup reservation
  *
-- 
GitLab


From 871129332d74c9e94bd110932ac4445833995639 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 4 Sep 2019 12:13:25 -0700
Subject: [PATCH 1387/1586] fs: export rw_verify_area()

I'm adding btrfs ioctls to read and write compressed data, and rather
than duplicating the checks in rw_verify_area(), let's just export it.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/internal.h      | 5 -----
 fs/read_write.c    | 1 +
 include/linux/fs.h | 1 +
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 8590c973c2f42..711bdc00ec7cc 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -157,11 +157,6 @@ extern char *simple_dname(struct dentry *, char *, int);
 extern void dput_to_list(struct dentry *, struct list_head *);
 extern void shrink_dentry_list(struct list_head *);
 
-/*
- * read_write.c
- */
-extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
-
 /*
  * pipe.c
  */
diff --git a/fs/read_write.c b/fs/read_write.c
index 0074afa7ecb3e..4d60146243dfa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
 	return security_file_permission(file,
 				read_write == READ ? MAY_READ : MAY_WRITE);
 }
+EXPORT_SYMBOL(rw_verify_area);
 
 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b07..0ebfc25192129 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3173,6 +3173,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
 		int whence, loff_t size);
 extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
 extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
+int rw_verify_area(int, struct file *, const loff_t *, size_t);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 extern int stream_open(struct inode * inode, struct file * filp);
-- 
GitLab


From f6f7a25a650818c61defa97d60a79b216618315f Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 12 Aug 2021 15:34:57 -0700
Subject: [PATCH 1388/1586] fs: export variant of generic_write_checks without
 iov_iter

Encoded I/O in Btrfs needs to check a write with a given logical size
without an iov_iter that matches that size (because the iov_iter we have
is for the compressed data). So, factor out the parts of
generic_write_check() that don't need an iov_iter into a new
generic_write_checks_count() function and export that.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/read_write.c    | 33 ++++++++++++++++++++-------------
 include/linux/fs.h |  1 +
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 4d60146243dfa..dc5000173b80a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1618,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
 	return 0;
 }
 
-/*
- * Performs necessary checks before doing a write
- *
- * Can adjust writing position or amount of bytes to write.
- * Returns appropriate error code that caller should return or
- * zero in case that write should be allowed.
- */
-ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
+/* Like generic_write_checks(), but takes size of write instead of iter. */
+int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	loff_t count;
-	int ret;
 
 	if (IS_SWAPFILE(inode))
 		return -ETXTBSY;
 
-	if (!iov_iter_count(from))
+	if (!*count)
 		return 0;
 
 	/* FIXME: this is for backwards compatibility with 2.4 */
@@ -1645,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EINVAL;
 
-	count = iov_iter_count(from);
-	ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+	return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
+}
+EXPORT_SYMBOL(generic_write_checks_count);
+
+/*
+ * Performs necessary checks before doing a write
+ *
+ * Can adjust writing position or amount of bytes to write.
+ * Returns appropriate error code that caller should return or
+ * zero in case that write should be allowed.
+ */
+ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+	loff_t count = iov_iter_count(from);
+	int ret;
+
+	ret = generic_write_checks_count(iocb, &count);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ebfc25192129..27746a3da8fd6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
 extern int generic_write_check_limits(struct file *file, loff_t pos,
 		loff_t *count);
 extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
-- 
GitLab


From e331f6b19f8adde2307588bb325ae5de78617c20 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 6 Nov 2019 15:38:43 -0800
Subject: [PATCH 1389/1586] btrfs: don't advance offset for compressed bios in
 btrfs_csum_one_bio()

btrfs_csum_one_bio() loops over each filesystem block in the bio while
keeping a cursor of its current logical position in the file in order to
look up the ordered extent to add the checksums to. However, this
doesn't make much sense for compressed extents, as a sector on disk does
not correspond to a sector of decompressed file data. It happens to work
because:

1) the compressed bio always covers one ordered extent
2) the size of the bio is always less than the size of the ordered
   extent

However, the second point will not always be true for encoded writes.

Let's add a boolean parameter to btrfs_csum_one_bio() to indicate that
it can assume that the bio only covers one ordered extent. Since we're
already changing the signature, let's get rid of the contig parameter
and make it implied by the offset parameter, similar to the change we
recently made to btrfs_lookup_bio_sums(). Additionally, let's rename
nr_sectors to blockcount to make it clear that it's the number of
filesystem blocks, not the number of 512-byte sectors.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c |  2 +-
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/file-item.c   | 37 +++++++++++++++++--------------------
 fs/btrfs/inode.c       |  8 ++++----
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 71e5b2e9a1ba8..8b3bca269de3d 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -591,7 +591,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 
 		if (submit) {
 			if (!skip_sum) {
-				ret = btrfs_csum_one_bio(inode, bio, start, 1);
+				ret = btrfs_csum_one_bio(inode, bio, start, true);
 				if (ret)
 					goto finish_cb;
 			}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 89d2ed6c2eb13..aa88dd70ac48b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3203,7 +3203,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
-				u64 file_start, int contig);
+				u64 offset, bool one_ordered);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 9a3de652ada84..2907314a41b42 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -612,32 +612,33 @@ fail:
 	return ret;
 }
 
-/*
- * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
+/**
+ * Calculate checksums of the data contained inside a bio
+ *
  * @inode:	 Owner of the data inside the bio
  * @bio:	 Contains the data to be checksummed
- * @file_start:  offset in file this bio begins to describe
- * @contig:	 Boolean. If true/1 means all bio vecs in this bio are
- *		 contiguous and they begin at @file_start in the file. False/0
- *		 means this bio can contain potentially discontiguous bio vecs
- *		 so the logical offset of each should be calculated separately.
+ * @offset:      If (u64)-1, @bio may contain discontiguous bio vecs, so the
+ *               file offsets are determined from the page offsets in the bio.
+ *               Otherwise, this is the starting file offset of the bio vecs in
+ *               @bio, which must be contiguous.
+ * @one_ordered: If true, @bio only refers to one ordered extent.
  */
 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
-		       u64 file_start, int contig)
+				u64 offset, bool one_ordered)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 	struct btrfs_ordered_sum *sums;
 	struct btrfs_ordered_extent *ordered = NULL;
+	const bool use_page_offsets = (offset == (u64)-1);
 	char *data;
 	struct bvec_iter iter;
 	struct bio_vec bvec;
 	int index;
-	int nr_sectors;
+	unsigned int blockcount;
 	unsigned long total_bytes = 0;
 	unsigned long this_sum_bytes = 0;
 	int i;
-	u64 offset;
 	unsigned nofs_flag;
 
 	nofs_flag = memalloc_nofs_save();
@@ -651,18 +652,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
 
-	if (contig)
-		offset = file_start;
-	else
-		offset = 0; /* shut up gcc */
-
 	sums->bytenr = bio->bi_iter.bi_sector << 9;
 	index = 0;
 
 	shash->tfm = fs_info->csum_shash;
 
 	bio_for_each_segment(bvec, bio, iter) {
-		if (!contig)
+		if (use_page_offsets)
 			offset = page_offset(bvec.bv_page) + bvec.bv_offset;
 
 		if (!ordered) {
@@ -681,13 +677,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 			}
 		}
 
-		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
+		blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
 						 bvec.bv_len + fs_info->sectorsize
 						 - 1);
 
-		for (i = 0; i < nr_sectors; i++) {
-			if (offset >= ordered->file_offset + ordered->num_bytes ||
-			    offset < ordered->file_offset) {
+		for (i = 0; i < blockcount; i++) {
+			if (!one_ordered &&
+			    !in_range(offset, ordered->file_offset,
+				      ordered->num_bytes)) {
 				unsigned long bytes_left;
 
 				sums->len = this_sum_bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ffe327090277a..26fd111218c71 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2314,7 +2314,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
 static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
 					   u64 dio_file_offset)
 {
-	return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
+	return btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
 }
 
 /*
@@ -2566,7 +2566,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
 					  0, btrfs_submit_bio_start);
 		goto out;
 	} else if (!skip_sum) {
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
+		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
 		if (ret)
 			goto out;
 	}
@@ -7845,7 +7845,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
 						     struct bio *bio,
 						     u64 dio_file_offset)
 {
-	return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, 1);
+	return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false);
 }
 
 static void btrfs_end_dio_bio(struct bio *bio)
@@ -7902,7 +7902,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		 * If we aren't doing async submit, calculate the csum of the
 		 * bio now.
 		 */
-		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
+		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
 		if (ret)
 			goto err;
 	} else {
-- 
GitLab


From cb36a9bb17c4abf720123dacd449408588247005 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 6 Nov 2019 12:11:56 -0800
Subject: [PATCH 1390/1586] btrfs: add ram_bytes and offset to
 btrfs_ordered_extent

Currently, we only create ordered extents when ram_bytes == num_bytes
and offset == 0. However, BTRFS_IOC_ENCODED_WRITE writes may create
extents which only refer to a subset of the full unencoded extent, so we
need to plumb these fields through the ordered extent infrastructure and
pass them down to insert_reserved_file_extent().

Since we're changing the btrfs_add_ordered_extent* signature, let's get
rid of the trivial wrappers and add a kernel-doc.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c        |  58 +++++++++++--------
 fs/btrfs/ordered-data.c | 120 ++++++++++++----------------------------
 fs/btrfs/ordered-data.h |  22 +++++---
 3 files changed, 83 insertions(+), 117 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 26fd111218c71..5f77554686db4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,11 +985,14 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
 	}
 	free_extent_map(em);
 
-	ret = btrfs_add_ordered_extent_compress(inode, start,	/* file_offset */
-					ins.objectid,		/* disk_bytenr */
-					async_extent->ram_size, /* num_bytes */
-					ins.offset,		/* disk_num_bytes */
-					async_extent->compress_type);
+	ret = btrfs_add_ordered_extent(inode, start,		/* file_offset */
+				       async_extent->ram_size,	/* num_bytes */
+				       async_extent->ram_size,	/* ram_bytes */
+				       ins.objectid,		/* disk_bytenr */
+				       ins.offset,		/* disk_num_bytes */
+				       0,			/* offset */
+				       1 << BTRFS_ORDERED_COMPRESSED,
+				       async_extent->compress_type);
 	if (ret) {
 		btrfs_drop_extent_cache(inode, start, end, 0);
 		goto out_free_reserve;
@@ -1238,9 +1241,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 		}
 		free_extent_map(em);
 
-		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-					       ram_size, cur_alloc_size,
-					       BTRFS_ORDERED_REGULAR);
+		ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
+					       ins.objectid, cur_alloc_size, 0,
+					       1 << BTRFS_ORDERED_REGULAR,
+					       BTRFS_COMPRESS_NONE);
 		if (ret)
 			goto out_drop_extent_cache;
 
@@ -1899,10 +1903,11 @@ out_check:
 				goto error;
 			}
 			free_extent_map(em);
-			ret = btrfs_add_ordered_extent(inode, cur_offset,
-						       disk_bytenr, num_bytes,
-						       num_bytes,
-						       BTRFS_ORDERED_PREALLOC);
+			ret = btrfs_add_ordered_extent(inode,
+					cur_offset, num_bytes, num_bytes,
+					disk_bytenr, num_bytes, 0,
+					1 << BTRFS_ORDERED_PREALLOC,
+					BTRFS_COMPRESS_NONE);
 			if (ret) {
 				btrfs_drop_extent_cache(inode, cur_offset,
 							cur_offset + num_bytes - 1,
@@ -1911,9 +1916,11 @@ out_check:
 			}
 		} else {
 			ret = btrfs_add_ordered_extent(inode, cur_offset,
+						       num_bytes, num_bytes,
 						       disk_bytenr, num_bytes,
-						       num_bytes,
-						       BTRFS_ORDERED_NOCOW);
+						       0,
+						       1 << BTRFS_ORDERED_NOCOW,
+						       BTRFS_COMPRESS_NONE);
 			if (ret)
 				goto error;
 		}
@@ -2874,6 +2881,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_key ins;
 	u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
 	u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
+	u64 offset = btrfs_stack_file_extent_offset(stack_fi);
 	u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
 	u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
 	struct btrfs_drop_extents_args drop_args = { 0 };
@@ -2948,7 +2956,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 		goto out;
 
 	ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
-					       file_pos, qgroup_reserved, &ins);
+					       file_pos - offset,
+					       qgroup_reserved, &ins);
 out:
 	btrfs_free_path(path);
 
@@ -2974,20 +2983,20 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
 					     struct btrfs_ordered_extent *oe)
 {
 	struct btrfs_file_extent_item stack_fi;
-	u64 logical_len;
 	bool update_inode_bytes;
+	u64 num_bytes = oe->num_bytes;
+	u64 ram_bytes = oe->ram_bytes;
 
 	memset(&stack_fi, 0, sizeof(stack_fi));
 	btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
 	btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
 	btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
 						   oe->disk_num_bytes);
+	btrfs_set_stack_file_extent_offset(&stack_fi, oe->offset);
 	if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
-		logical_len = oe->truncated_len;
-	else
-		logical_len = oe->num_bytes;
-	btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
-	btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
+		num_bytes = ram_bytes = oe->truncated_len;
+	btrfs_set_stack_file_extent_num_bytes(&stack_fi, num_bytes);
+	btrfs_set_stack_file_extent_ram_bytes(&stack_fi, ram_bytes);
 	btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
 	/* Encryption and other encoding is reserved and all 0 */
 
@@ -7054,8 +7063,11 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
 		if (IS_ERR(em))
 			goto out;
 	}
-	ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
-					   block_len, type);
+	ret = btrfs_add_ordered_extent(inode, start, len, len, block_start,
+				       block_len, 0,
+				       (1 << type) |
+				       (1 << BTRFS_ORDERED_DIRECT),
+				       BTRFS_COMPRESS_NONE);
 	if (ret) {
 		if (em) {
 			free_extent_map(em);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6b51fd2ec5ac5..2ce7a4b4f7d0d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
 	return ret;
 }
 
-/*
- * Allocate and add a new ordered_extent into the per-inode tree.
+/**
+ * Add an ordered extent to the per-inode tree.
+ *
+ * @inode:           Inode that this extent is for.
+ * @file_offset:     Logical offset in file where the extent starts.
+ * @num_bytes:       Logical length of extent in file.
+ * @ram_bytes:       Full length of unencoded data.
+ * @disk_bytenr:     Offset of extent on disk.
+ * @disk_num_bytes:  Size of extent on disk.
+ * @offset:          Offset into unencoded data where file data starts.
+ * @flags:           Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @compress_type:   Compression algorithm used for data.
+ *
+ * Most of these parameters correspond to &struct btrfs_file_extent_item. The
+ * tree is given a single reference on the ordered extent that was inserted.
  *
- * The tree is given a single reference on the ordered extent that was
- * inserted.
+ * Return: 0 or -ENOMEM.
  */
-static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
-				      u64 disk_bytenr, u64 num_bytes,
-				      u64 disk_num_bytes, int type, int dio,
-				      int compress_type)
+int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+			     u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+			     u64 disk_num_bytes, u64 offset, unsigned flags,
+			     int compress_type)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	struct btrfs_ordered_extent *entry;
 	int ret;
 
-	if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
+	if (flags &
+	    ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
 		/* For nocow write, we can release the qgroup rsv right now */
 		ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
 		if (ret < 0)
@@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 		return -ENOMEM;
 
 	entry->file_offset = file_offset;
-	entry->disk_bytenr = disk_bytenr;
 	entry->num_bytes = num_bytes;
+	entry->ram_bytes = ram_bytes;
+	entry->disk_bytenr = disk_bytenr;
 	entry->disk_num_bytes = disk_num_bytes;
+	entry->offset = offset;
 	entry->bytes_left = num_bytes;
 	entry->inode = igrab(&inode->vfs_inode);
 	entry->compress_type = compress_type;
@@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	entry->qgroup_rsv = ret;
 	entry->physical = (u64)-1;
 
-	ASSERT(type == BTRFS_ORDERED_REGULAR ||
-	       type == BTRFS_ORDERED_NOCOW ||
-	       type == BTRFS_ORDERED_PREALLOC ||
-	       type == BTRFS_ORDERED_COMPRESSED);
-	set_bit(type, &entry->flags);
+	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
+	entry->flags = flags;
 
 	percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
 				 fs_info->delalloc_batch);
 
-	if (dio)
-		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
-
 	/* one ref for the tree */
 	refcount_set(&entry->refs, 1);
 	init_waitqueue_head(&entry->wait);
@@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	return 0;
 }
 
-int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
-			     u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
-			     int type)
-{
-	ASSERT(type == BTRFS_ORDERED_REGULAR ||
-	       type == BTRFS_ORDERED_NOCOW ||
-	       type == BTRFS_ORDERED_PREALLOC);
-	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
-					  num_bytes, disk_num_bytes, type, 0,
-					  BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
-				 u64 disk_bytenr, u64 num_bytes,
-				 u64 disk_num_bytes, int type)
-{
-	ASSERT(type == BTRFS_ORDERED_REGULAR ||
-	       type == BTRFS_ORDERED_NOCOW ||
-	       type == BTRFS_ORDERED_PREALLOC);
-	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
-					  num_bytes, disk_num_bytes, type, 1,
-					  BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
-				      u64 disk_bytenr, u64 num_bytes,
-				      u64 disk_num_bytes, int compress_type)
-{
-	ASSERT(compress_type != BTRFS_COMPRESS_NONE);
-	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
-					  num_bytes, disk_num_bytes,
-					  BTRFS_ORDERED_COMPRESSED, 0,
-					  compress_type);
-}
-
 /*
  * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
  * when an ordered extent is finished.  If the list covers more than one
@@ -1052,42 +1026,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
 	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	u64 file_offset = ordered->file_offset + pos;
 	u64 disk_bytenr = ordered->disk_bytenr + pos;
-	u64 num_bytes = len;
-	u64 disk_num_bytes = len;
-	int type;
-	unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
-	int compress_type = ordered->compress_type;
-	unsigned long weight;
-	int ret;
-
-	weight = hweight_long(flags_masked);
-	WARN_ON_ONCE(weight > 1);
-	if (!weight)
-		type = 0;
-	else
-		type = __ffs(flags_masked);
+	unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
 
 	/*
-	 * The splitting extent is already counted and will be added again
-	 * in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
-	 * double counting.
+	 * The splitting extent is already counted and will be added again in
+	 * btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
 	 */
-	percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
+	percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
 				 fs_info->delalloc_batch);
-	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
-		WARN_ON_ONCE(1);
-		ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
-				file_offset, disk_bytenr, num_bytes,
-				disk_num_bytes, compress_type);
-	} else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
-		ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
-				disk_bytenr, num_bytes, disk_num_bytes, type);
-	} else {
-		ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
-				disk_bytenr, num_bytes, disk_num_bytes, type);
-	}
-
-	return ret;
+	WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
+	return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
+					disk_bytenr, len, 0, flags,
+					ordered->compress_type);
 }
 
 int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4194e960ff617..d5d41f2290c15 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -76,6 +76,13 @@ enum {
 	BTRFS_ORDERED_PENDING,
 };
 
+/* BTRFS_ORDERED_* flags that specify the type of the extent. */
+#define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) |	\
+				  (1UL << BTRFS_ORDERED_NOCOW) |	\
+				  (1UL << BTRFS_ORDERED_PREALLOC) |	\
+				  (1UL << BTRFS_ORDERED_COMPRESSED) |	\
+				  (1UL << BTRFS_ORDERED_DIRECT))
+
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
 	u64 file_offset;
@@ -84,9 +91,11 @@ struct btrfs_ordered_extent {
 	 * These fields directly correspond to the same fields in
 	 * btrfs_file_extent_item.
 	 */
-	u64 disk_bytenr;
 	u64 num_bytes;
+	u64 ram_bytes;
+	u64 disk_bytenr;
 	u64 disk_num_bytes;
+	u64 offset;
 
 	/* number of bytes that still need writing */
 	u64 bytes_left;
@@ -179,14 +188,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
 				    struct btrfs_ordered_extent **cached,
 				    u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
-			     u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
-			     int type);
-int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
-				 u64 disk_bytenr, u64 num_bytes,
-				 u64 disk_num_bytes, int type);
-int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
-				      u64 disk_bytenr, u64 num_bytes,
-				      u64 disk_num_bytes, int compress_type);
+			     u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+			     u64 disk_num_bytes, u64 offset, unsigned flags,
+			     int compress_type);
 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
 			   struct btrfs_ordered_sum *sum);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
-- 
GitLab


From 28c9b1e75aa1794897a9cb709f20f27f4aadebac Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 18 Nov 2019 22:45:55 -0800
Subject: [PATCH 1391/1586] btrfs: support different disk extent size for
 delalloc

Currently, we always reserve the same extent size in the file and extent
size on disk for delalloc because the former is the worst case for the
latter. For BTRFS_IOC_ENCODED_WRITE writes, we know the exact size of
the extent on disk, which may be less than or greater than (for
bookends) the size in the file. Add a disk_num_bytes parameter to
btrfs_delalloc_reserve_metadata() so that we can reserve the correct
amount of csum bytes. No functional change.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h          |  3 ++-
 fs/btrfs/delalloc-space.c | 18 ++++++++++--------
 fs/btrfs/file.c           |  3 ++-
 fs/btrfs/inode.c          |  4 ++--
 fs/btrfs/relocation.c     |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index aa88dd70ac48b..91b27d5eb8cfc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2887,7 +2887,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
 				      struct btrfs_block_rsv *rsv);
 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
 
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
+				    u64 disk_num_bytes);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
 				   u64 start, u64 end);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index fb46a28f50659..bd8267c4687d9 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 }
 
 static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
-				    u64 num_bytes, u64 *meta_reserve,
-				    u64 *qgroup_reserve)
+				    u64 num_bytes, u64 disk_num_bytes,
+				    u64 *meta_reserve, u64 *qgroup_reserve)
 {
 	u64 nr_extents = count_max_extents(num_bytes);
-	u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+	u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
 	u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
 
 	*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
@@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
 	*qgroup_reserve = nr_extents * fs_info->nodesize;
 }
 
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
+				    u64 disk_num_bytes)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	}
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+	disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
 
 	/*
 	 * We always want to do it this way, every other way is wrong and ends
@@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	 * everything out and try again, which is bad.  This way we just
 	 * over-reserve slightly, and clean up the mess when we are done.
 	 */
-	calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
-				&qgroup_reserve);
+	calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+				&meta_reserve, &qgroup_reserve);
 	ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
 	if (ret)
 		return ret;
@@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	spin_lock(&inode->lock);
 	nr_extents = count_max_extents(num_bytes);
 	btrfs_mod_outstanding_extents(inode, nr_extents);
-	inode->csum_bytes += num_bytes;
+	inode->csum_bytes += disk_num_bytes;
 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
 	spin_unlock(&inode->lock);
 
@@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
 	ret = btrfs_check_data_free_space(inode, reserved, start, len);
 	if (ret < 0)
 		return ret;
-	ret = btrfs_delalloc_reserve_metadata(inode, len);
+	ret = btrfs_delalloc_reserve_metadata(inode, len, len);
 	if (ret < 0) {
 		btrfs_free_reserved_data_space(inode, *reserved, start, len);
 		extent_changeset_free(*reserved);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 391b05019dbdf..7b4a5458ca9b6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1719,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 					 fs_info->sectorsize);
 		WARN_ON(reserve_bytes == 0);
 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
-				reserve_bytes);
+						      reserve_bytes,
+						      reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
 				btrfs_free_reserved_data_space(BTRFS_I(inode),
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5f77554686db4..38ea51d645534 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4697,7 +4697,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
 			goto out;
 		}
 	}
-	ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
+	ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize);
 	if (ret < 0) {
 		if (!only_release_metadata)
 			btrfs_free_reserved_data_space(inode, data_reserved,
@@ -7467,7 +7467,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
 		struct extent_map *em2;
 
 		/* We can NOCOW, so only need to reserve metadata space. */
-		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len);
 		if (ret < 0) {
 			/* Our caller expects us to free the input extent map. */
 			free_extent_map(em);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 9d8054839782e..50130f943d999 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 
 		/* Reserve metadata for this range */
 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
-						      clamped_len);
+						      clamped_len, clamped_len);
 		if (ret)
 			goto release_page;
 
-- 
GitLab


From 8dd9872d2e261196069ab03d5dc5e6ec9bc93f4b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 16 Nov 2021 14:03:45 -0800
Subject: [PATCH 1392/1586] btrfs: clean up cow_file_range_inline()

The start parameter to cow_file_range_inline() (and
insert_inline_extent()) is always 0, so get rid of it and simplify the
logic in those two functions. Pass btrfs_inode to insert_inline_extent()
and remove the redundant root parameter. Also document the requirements
for creating an inline extent. No functional change.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 87 ++++++++++++++++++++----------------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 38ea51d645534..c0b2421c2925d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -239,12 +239,13 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
  * no overlapping inline items exist in the btree
  */
 static int insert_inline_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_path *path, bool extent_inserted,
-				struct btrfs_root *root, struct inode *inode,
-				u64 start, size_t size, size_t compressed_size,
+				struct btrfs_path *path,
+				struct btrfs_inode *inode, bool extent_inserted,
+				size_t size, size_t compressed_size,
 				int compress_type,
 				struct page **compressed_pages)
 {
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct page *page = NULL;
 	char *kaddr;
@@ -252,7 +253,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_file_extent_item *ei;
 	int ret;
 	size_t cur_size = size;
-	unsigned long offset;
 
 	ASSERT((compressed_size > 0 && compressed_pages) ||
 	       (compressed_size == 0 && !compressed_pages));
@@ -264,8 +264,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		struct btrfs_key key;
 		size_t datasize;
 
-		key.objectid = btrfs_ino(BTRFS_I(inode));
-		key.offset = start;
+		key.objectid = btrfs_ino(inode);
+		key.offset = 0;
 		key.type = BTRFS_EXTENT_DATA_KEY;
 
 		datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -303,12 +303,10 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		btrfs_set_file_extent_compression(leaf, ei,
 						  compress_type);
 	} else {
-		page = find_get_page(inode->i_mapping,
-				     start >> PAGE_SHIFT);
+		page = find_get_page(inode->vfs_inode.i_mapping, 0);
 		btrfs_set_file_extent_compression(leaf, ei, 0);
 		kaddr = kmap_atomic(page);
-		offset = offset_in_page(start);
-		write_extent_buffer(leaf, kaddr + offset, ptr, size);
+		write_extent_buffer(leaf, kaddr, ptr, size);
 		kunmap_atomic(kaddr);
 		put_page(page);
 	}
@@ -319,8 +317,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	 * We align size to sectorsize for inline extents just for simplicity
 	 * sake.
 	 */
-	size = ALIGN(size, root->fs_info->sectorsize);
-	ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
+	ret = btrfs_inode_set_file_extent_range(inode, 0,
+					ALIGN(size, root->fs_info->sectorsize));
 	if (ret)
 		goto fail;
 
@@ -333,7 +331,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	 * before we unlock the pages.  Otherwise we
 	 * could end up racing with unlink.
 	 */
-	BTRFS_I(inode)->disk_i_size = inode->i_size;
+	inode->disk_i_size = i_size_read(&inode->vfs_inode);
+
 fail:
 	return ret;
 }
@@ -344,8 +343,8 @@ fail:
  * does the checks required to make sure the data is small enough
  * to fit as an inline extent.
  */
-static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
-					  u64 end, size_t compressed_size,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
+					  size_t compressed_size,
 					  int compress_type,
 					  struct page **compressed_pages)
 {
@@ -353,26 +352,21 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_trans_handle *trans;
-	u64 isize = i_size_read(&inode->vfs_inode);
-	u64 actual_end = min(end + 1, isize);
-	u64 inline_len = actual_end - start;
-	u64 aligned_end = ALIGN(end, fs_info->sectorsize);
-	u64 data_len = inline_len;
+	u64 data_len = (compressed_size ?: size);
 	int ret;
 	struct btrfs_path *path;
 
-	if (compressed_size)
-		data_len = compressed_size;
-
-	if (start > 0 ||
-	    actual_end > fs_info->sectorsize ||
+	/*
+	 * We can create an inline extent if it ends at or beyond the current
+	 * i_size, is no larger than a sector (decompressed), and the (possibly
+	 * compressed) data fits in a leaf and the configured maximum inline
+	 * size.
+	 */
+	if (size < i_size_read(&inode->vfs_inode) ||
+	    size > fs_info->sectorsize ||
 	    data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
-	    (!compressed_size &&
-	    (actual_end & (fs_info->sectorsize - 1)) == 0) ||
-	    end + 1 < isize ||
-	    data_len > fs_info->max_inline) {
+	    data_len > fs_info->max_inline)
 		return 1;
-	}
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -386,30 +380,20 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
 	trans->block_rsv = &inode->block_rsv;
 
 	drop_args.path = path;
-	drop_args.start = start;
-	drop_args.end = aligned_end;
+	drop_args.start = 0;
+	drop_args.end = fs_info->sectorsize;
 	drop_args.drop_cache = true;
 	drop_args.replace_extent = true;
-
-	if (compressed_size && compressed_pages)
-		drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
-		   compressed_size);
-	else
-		drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
-		    inline_len);
-
+	drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len);
 	ret = btrfs_drop_extents(trans, root, inode, &drop_args);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
 	}
 
-	if (isize > actual_end)
-		inline_len = min_t(u64, isize, actual_end);
-	ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
-				   root, &inode->vfs_inode, start,
-				   inline_len, compressed_size,
-				   compress_type, compressed_pages);
+	ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
+				   size, compressed_size, compress_type,
+				   compressed_pages);
 	if (ret && ret != -ENOSPC) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
@@ -418,7 +402,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
 		goto out;
 	}
 
-	btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+	btrfs_update_inode_bytes(inode, size, drop_args.bytes_found);
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret && ret != -ENOSPC) {
 		btrfs_abort_transaction(trans, ret);
@@ -739,12 +723,12 @@ cont:
 			/* we didn't compress the entire range, try
 			 * to make an uncompressed inline extent.
 			 */
-			ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+			ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
 						    0, BTRFS_COMPRESS_NONE,
 						    NULL);
 		} else {
 			/* try making a compressed inline extent */
-			ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+			ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
 						    total_compressed,
 						    compress_type, pages);
 		}
@@ -1159,8 +1143,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 	 * So here we skip inline extent creation completely.
 	 */
 	if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+		u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
+				       end + 1);
+
 		/* lets try to make an inline extent */
-		ret = cow_file_range_inline(inode, start, end, 0,
+		ret = cow_file_range_inline(inode, actual_end, 0,
 					    BTRFS_COMPRESS_NONE, NULL);
 		if (ret == 0) {
 			/*
-- 
GitLab


From d9496e8aba491e6c4c1c1c3549a93839ffd9b5a4 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 7 Nov 2019 15:19:16 -0800
Subject: [PATCH 1393/1586] btrfs: optionally extend i_size in
 cow_file_range_inline()

Currently, an inline extent is always created after i_size is extended
from btrfs_dirty_pages(). However, for encoded writes, we only want to
update i_size after we successfully created the inline extent. Add an
update_i_size parameter to cow_file_range_inline() and
insert_inline_extent() and pass in the size of the extent rather than
determining it from i_size.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ reformat comment ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c0b2421c2925d..004d2dda99aad 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -243,7 +243,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_inode *inode, bool extent_inserted,
 				size_t size, size_t compressed_size,
 				int compress_type,
-				struct page **compressed_pages)
+				struct page **compressed_pages,
+				bool update_i_size)
 {
 	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
@@ -253,6 +254,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_file_extent_item *ei;
 	int ret;
 	size_t cur_size = size;
+	u64 i_size;
 
 	ASSERT((compressed_size > 0 && compressed_pages) ||
 	       (compressed_size == 0 && !compressed_pages));
@@ -323,15 +325,18 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		goto fail;
 
 	/*
-	 * we're an inline extent, so nobody can
-	 * extend the file past i_size without locking
-	 * a page we already have locked.
+	 * We're an inline extent, so nobody can extend the file past i_size
+	 * without locking a page we already have locked.
 	 *
-	 * We must do any isize and inode updates
-	 * before we unlock the pages.  Otherwise we
-	 * could end up racing with unlink.
+	 * We must do any i_size and inode updates before we unlock the pages.
+	 * Otherwise we could end up racing with unlink.
 	 */
-	inode->disk_i_size = i_size_read(&inode->vfs_inode);
+	i_size = i_size_read(&inode->vfs_inode);
+	if (update_i_size && size > i_size) {
+		i_size_write(&inode->vfs_inode, size);
+		i_size = size;
+	}
+	inode->disk_i_size = i_size;
 
 fail:
 	return ret;
@@ -346,7 +351,8 @@ fail:
 static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
 					  size_t compressed_size,
 					  int compress_type,
-					  struct page **compressed_pages)
+					  struct page **compressed_pages,
+					  bool update_i_size)
 {
 	struct btrfs_drop_extents_args drop_args = { 0 };
 	struct btrfs_root *root = inode->root;
@@ -393,7 +399,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
 
 	ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
 				   size, compressed_size, compress_type,
-				   compressed_pages);
+				   compressed_pages, update_i_size);
 	if (ret && ret != -ENOSPC) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
@@ -725,12 +731,13 @@ cont:
 			 */
 			ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
 						    0, BTRFS_COMPRESS_NONE,
-						    NULL);
+						    NULL, false);
 		} else {
 			/* try making a compressed inline extent */
 			ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
 						    total_compressed,
-						    compress_type, pages);
+						    compress_type, pages,
+						    false);
 		}
 		if (ret <= 0) {
 			unsigned long clear_flags = EXTENT_DELALLOC |
@@ -1148,7 +1155,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 
 		/* lets try to make an inline extent */
 		ret = cow_file_range_inline(inode, actual_end, 0,
-					    BTRFS_COMPRESS_NONE, NULL);
+					    BTRFS_COMPRESS_NONE, NULL, false);
 		if (ret == 0) {
 			/*
 			 * We use DO_ACCOUNTING here because we need the
-- 
GitLab


From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 16 Aug 2021 15:58:29 -0700
Subject: [PATCH 1394/1586] btrfs: add definitions and documentation for
 encoded I/O ioctls

In order to allow sending and receiving compressed data without
decompressing it, we need an interface to write pre-compressed data
directly to the filesystem and the matching interface to read compressed
data without decompressing it. This adds the definitions for ioctls to
do that and detailed explanations of how to use them.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 1cb1a3860f1d9..d956b2993970f 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
 		__u8 align[7];
 };
 
+/*
+ * Data and metadata for an encoded read or write.
+ *
+ * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
+ * compression). This can be used to read the compressed contents of a file or
+ * write pre-compressed data directly to a file.
+ *
+ * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
+ * preadv/pwritev with additional metadata about how the data is encoded and the
+ * size of the unencoded data.
+ *
+ * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
+ * the metadata fields, and returns the size of the encoded data. It reads one
+ * extent per call. It can also read data which is not encoded.
+ *
+ * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
+ * from the iovecs, and returns the size of the encoded data. Note that the
+ * encoded data is not validated when it is written; if it is not valid (e.g.,
+ * it cannot be decompressed), then a subsequent read may return an error.
+ *
+ * Since the filesystem page cache contains decoded data, encoded I/O bypasses
+ * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
+ */
+struct btrfs_ioctl_encoded_io_args {
+	/* Input parameters for both reads and writes. */
+
+	/*
+	 * iovecs containing encoded data.
+	 *
+	 * For reads, if the size of the encoded data is larger than the sum of
+	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
+	 * ENOBUFS.
+	 *
+	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
+	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
+	 * increase in the future). This must also be less than or equal to
+	 * unencoded_len.
+	 */
+	const struct iovec __user *iov;
+	/* Number of iovecs. */
+	unsigned long iovcnt;
+	/*
+	 * Offset in file.
+	 *
+	 * For writes, must be aligned to the sector size of the filesystem.
+	 */
+	__s64 offset;
+	/* Currently must be zero. */
+	__u64 flags;
+
+	/*
+	 * For reads, the following members are output parameters that will
+	 * contain the returned metadata for the encoded data.
+	 * For writes, the following members must be set to the metadata for the
+	 * encoded data.
+	 */
+
+	/*
+	 * Length of the data in the file.
+	 *
+	 * Must be less than or equal to unencoded_len - unencoded_offset. For
+	 * writes, must be aligned to the sector size of the filesystem unless
+	 * the data ends at or beyond the current end of the file.
+	 */
+	__u64 len;
+	/*
+	 * Length of the unencoded (i.e., decrypted and decompressed) data.
+	 *
+	 * For writes, must be no more than 128 KiB (this limit may increase in
+	 * the future). If the unencoded data is actually longer than
+	 * unencoded_len, then it is truncated; if it is shorter, then it is
+	 * extended with zeroes.
+	 */
+	__u64 unencoded_len;
+	/*
+	 * Offset from the first byte of the unencoded data to the first byte of
+	 * logical data in the file.
+	 *
+	 * Must be less than unencoded_len.
+	 */
+	__u64 unencoded_offset;
+	/*
+	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
+	 *
+	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
+	 */
+	__u32 compression;
+	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
+	__u32 encryption;
+	/*
+	 * Reserved for future expansion.
+	 *
+	 * For reads, always returned as zero. Users should check for non-zero
+	 * bytes. If there are any, then the kernel has a newer version of this
+	 * structure with additional information that the user definition is
+	 * missing.
+	 *
+	 * For writes, must be zeroed.
+	 */
+	__u8 reserved[64];
+};
+
+/* Data is not compressed. */
+#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
+/* Data is compressed as a single zlib stream. */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
+/*
+ * Data is compressed as a single zstd frame with the windowLog compression
+ * parameter set to no more than 17.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
+/*
+ * Data is compressed sector by sector (using the sector size indicated by the
+ * name of the constant) with LZO1X and wrapped in the format documented in
+ * fs/btrfs/lzo.c. For writes, the compression sector size must match the
+ * filesystem sector size.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
+#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
+
+/* Data is not encrypted. */
+#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
+#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -997,5 +1125,9 @@ enum btrfs_err_code {
 				struct btrfs_ioctl_ino_lookup_user_args)
 #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
 				struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
+				    struct btrfs_ioctl_encoded_io_args)
+#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
+				     struct btrfs_ioctl_encoded_io_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
-- 
GitLab


From 1881fba89bd5dcd364d2e1bf561912a90a11c21a Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 9 Oct 2019 17:59:07 -0700
Subject: [PATCH 1395/1586] btrfs: add BTRFS_IOC_ENCODED_READ ioctl

There are 4 main cases:

1. Inline extents: we copy the data straight out of the extent buffer.
2. Hole/preallocated extents: we fill in zeroes.
3. Regular, uncompressed extents: we read the sectors we need directly
   from disk.
4. Regular, compressed extents: we read the entire compressed extent
   from disk and indicate what subset of the decompressed extent is in
   the file.

This initial implementation simplifies a few things that can be improved
in the future:

- Cases 1, 3, and 4 allocate temporary memory to read into before
  copying out to userspace.
- We don't do read repair, because it turns out that read repair is
  currently broken for compressed data.
- We hold the inode lock during the operation.

Note that we don't need to hold the mmap lock. We may race with
btrfs_page_mkwrite() and read the old data from before the page was
dirtied:

btrfs_page_mkwrite         btrfs_encoded_read
---------------------------------------------------
(enter)                    (enter)
                           btrfs_wait_ordered_range
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached
(exit)
                           lock_extent_bits
                           read extent (dirty page hasn't been flushed,
                                        so this is the old data)
                           unlock_extent_cached
                           (exit)

we read the old data from before the page was dirtied. But, that's true
even if we were to hold the mmap lock:

btrfs_page_mkwrite               btrfs_encoded_read
-------------------------------------------------------------------
(enter)                          (enter)
                                 btrfs_inode_lock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) (blocked)
                                 btrfs_wait_ordered_range
                                 lock_extent_bits
				 read extent (page hasn't been dirtied,
                                              so this is the old data)
                                 unlock_extent_cached
                                 btrfs_inode_unlock(BTRFS_ILOCK_MMAP)
down_read(i_mmap_lock) returns
lock_extent_bits
btrfs_page_set_dirty
unlock_extent_cached

In other words, this is inherently racy, so it's fine that we return the
old data in this tiny window.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |   4 +
 fs/btrfs/inode.c | 498 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/ioctl.c | 106 ++++++++++
 3 files changed, 608 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 91b27d5eb8cfc..22a3fef0a9819 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
 struct btrfs_ordered_sum;
 struct btrfs_ref;
 struct btrfs_bio;
+struct btrfs_ioctl_encoded_io_args;
 
 #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
 
@@ -3305,6 +3306,9 @@ int btrfs_writepage_cow_fixup(struct page *page);
 void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
 					  struct page *page, u64 start,
 					  u64 end, bool uptodate);
+ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+			   struct btrfs_ioctl_encoded_io_args *encoded);
+
 extern const struct dentry_operations btrfs_dentry_operations;
 extern const struct iomap_ops btrfs_dio_iomap_ops;
 extern const struct iomap_dio_ops btrfs_dio_ops;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 004d2dda99aad..505cd20511810 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10156,6 +10156,504 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
 	}
 }
 
+static int btrfs_encoded_io_compression_from_extent(
+				struct btrfs_fs_info *fs_info,
+				int compress_type)
+{
+	switch (compress_type) {
+	case BTRFS_COMPRESS_NONE:
+		return BTRFS_ENCODED_IO_COMPRESSION_NONE;
+	case BTRFS_COMPRESS_ZLIB:
+		return BTRFS_ENCODED_IO_COMPRESSION_ZLIB;
+	case BTRFS_COMPRESS_LZO:
+		/*
+		 * The LZO format depends on the sector size. 64K is the maximum
+		 * sector size that we support.
+		 */
+		if (fs_info->sectorsize < SZ_4K || fs_info->sectorsize > SZ_64K)
+			return -EINVAL;
+		return BTRFS_ENCODED_IO_COMPRESSION_LZO_4K +
+		       (fs_info->sectorsize_bits - 12);
+	case BTRFS_COMPRESS_ZSTD:
+		return BTRFS_ENCODED_IO_COMPRESSION_ZSTD;
+	default:
+		return -EUCLEAN;
+	}
+}
+
+static ssize_t btrfs_encoded_read_inline(
+				struct kiocb *iocb,
+				struct iov_iter *iter, u64 start,
+				u64 lockend,
+				struct extent_state **cached_state,
+				u64 extent_start, size_t count,
+				struct btrfs_ioctl_encoded_io_args *encoded,
+				bool *unlocked)
+{
+	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+	struct btrfs_root *root = inode->root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct extent_io_tree *io_tree = &inode->io_tree;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_file_extent_item *item;
+	u64 ram_bytes;
+	unsigned long ptr;
+	void *tmp;
+	ssize_t ret;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
+				       extent_start, 0);
+	if (ret) {
+		if (ret > 0) {
+			/* The extent item disappeared? */
+			ret = -EIO;
+		}
+		goto out;
+	}
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+
+	ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
+	ptr = btrfs_file_extent_inline_start(item);
+
+	encoded->len = min_t(u64, extent_start + ram_bytes,
+			     inode->vfs_inode.i_size) - iocb->ki_pos;
+	ret = btrfs_encoded_io_compression_from_extent(fs_info,
+				 btrfs_file_extent_compression(leaf, item));
+	if (ret < 0)
+		goto out;
+	encoded->compression = ret;
+	if (encoded->compression) {
+		size_t inline_size;
+
+		inline_size = btrfs_file_extent_inline_item_len(leaf,
+								path->slots[0]);
+		if (inline_size > count) {
+			ret = -ENOBUFS;
+			goto out;
+		}
+		count = inline_size;
+		encoded->unencoded_len = ram_bytes;
+		encoded->unencoded_offset = iocb->ki_pos - extent_start;
+	} else {
+		count = min_t(u64, count, encoded->len);
+		encoded->len = count;
+		encoded->unencoded_len = count;
+		ptr += iocb->ki_pos - extent_start;
+	}
+
+	tmp = kmalloc(count, GFP_NOFS);
+	if (!tmp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	read_extent_buffer(leaf, tmp, ptr, count);
+	btrfs_release_path(path);
+	unlock_extent_cached(io_tree, start, lockend, cached_state);
+	btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+	*unlocked = true;
+
+	ret = copy_to_iter(tmp, count, iter);
+	if (ret != count)
+		ret = -EFAULT;
+	kfree(tmp);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+struct btrfs_encoded_read_private {
+	struct btrfs_inode *inode;
+	u64 file_offset;
+	wait_queue_head_t wait;
+	atomic_t pending;
+	blk_status_t status;
+	bool skip_csum;
+};
+
+static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
+					    struct bio *bio, int mirror_num)
+{
+	struct btrfs_encoded_read_private *priv = bio->bi_private;
+	struct btrfs_bio *bbio = btrfs_bio(bio);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	blk_status_t ret;
+
+	if (!priv->skip_csum) {
+		ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
+		if (ret)
+			return ret;
+	}
+
+	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
+	if (ret) {
+		btrfs_bio_free_csum(bbio);
+		return ret;
+	}
+
+	atomic_inc(&priv->pending);
+	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+	if (ret) {
+		atomic_dec(&priv->pending);
+		btrfs_bio_free_csum(bbio);
+	}
+	return ret;
+}
+
+static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
+{
+	const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
+	struct btrfs_encoded_read_private *priv = bbio->bio.bi_private;
+	struct btrfs_inode *inode = priv->inode;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	u32 sectorsize = fs_info->sectorsize;
+	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
+	u64 start = priv->file_offset;
+	u32 bio_offset = 0;
+
+	if (priv->skip_csum || !uptodate)
+		return bbio->bio.bi_status;
+
+	bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+		unsigned int i, nr_sectors, pgoff;
+
+		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+		pgoff = bvec->bv_offset;
+		for (i = 0; i < nr_sectors; i++) {
+			ASSERT(pgoff < PAGE_SIZE);
+			if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
+					    bvec->bv_page, pgoff, start))
+				return BLK_STS_IOERR;
+			start += sectorsize;
+			bio_offset += sectorsize;
+			pgoff += sectorsize;
+		}
+	}
+	return BLK_STS_OK;
+}
+
+static void btrfs_encoded_read_endio(struct bio *bio)
+{
+	struct btrfs_encoded_read_private *priv = bio->bi_private;
+	struct btrfs_bio *bbio = btrfs_bio(bio);
+	blk_status_t status;
+
+	status = btrfs_encoded_read_verify_csum(bbio);
+	if (status) {
+		/*
+		 * The memory barrier implied by the atomic_dec_return() here
+		 * pairs with the memory barrier implied by the
+		 * atomic_dec_return() or io_wait_event() in
+		 * btrfs_encoded_read_regular_fill_pages() to ensure that this
+		 * write is observed before the load of status in
+		 * btrfs_encoded_read_regular_fill_pages().
+		 */
+		WRITE_ONCE(priv->status, status);
+	}
+	if (!atomic_dec_return(&priv->pending))
+		wake_up(&priv->wait);
+	btrfs_bio_free_csum(bbio);
+	bio_put(bio);
+}
+
+static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+						 u64 file_offset,
+						 u64 disk_bytenr,
+						 u64 disk_io_size,
+						 struct page **pages)
+{
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct btrfs_encoded_read_private priv = {
+		.inode = inode,
+		.file_offset = file_offset,
+		.pending = ATOMIC_INIT(1),
+		.skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
+	};
+	unsigned long i = 0;
+	u64 cur = 0;
+	int ret;
+
+	init_waitqueue_head(&priv.wait);
+	/*
+	 * Submit bios for the extent, splitting due to bio or stripe limits as
+	 * necessary.
+	 */
+	while (cur < disk_io_size) {
+		struct extent_map *em;
+		struct btrfs_io_geometry geom;
+		struct bio *bio = NULL;
+		u64 remaining;
+
+		em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
+					 disk_io_size - cur);
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
+		} else {
+			ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
+						    disk_bytenr + cur, &geom);
+			free_extent_map(em);
+		}
+		if (ret) {
+			WRITE_ONCE(priv.status, errno_to_blk_status(ret));
+			break;
+		}
+		remaining = min(geom.len, disk_io_size - cur);
+		while (bio || remaining) {
+			size_t bytes = min_t(u64, remaining, PAGE_SIZE);
+
+			if (!bio) {
+				bio = btrfs_bio_alloc(BIO_MAX_VECS);
+				bio->bi_iter.bi_sector =
+					(disk_bytenr + cur) >> SECTOR_SHIFT;
+				bio->bi_end_io = btrfs_encoded_read_endio;
+				bio->bi_private = &priv;
+				bio->bi_opf = REQ_OP_READ;
+			}
+
+			if (!bytes ||
+			    bio_add_page(bio, pages[i], bytes, 0) < bytes) {
+				blk_status_t status;
+
+				status = submit_encoded_read_bio(inode, bio, 0);
+				if (status) {
+					WRITE_ONCE(priv.status, status);
+					bio_put(bio);
+					goto out;
+				}
+				bio = NULL;
+				continue;
+			}
+
+			i++;
+			cur += bytes;
+			remaining -= bytes;
+		}
+	}
+
+out:
+	if (atomic_dec_return(&priv.pending))
+		io_wait_event(priv.wait, !atomic_read(&priv.pending));
+	/* See btrfs_encoded_read_endio() for ordering. */
+	return blk_status_to_errno(READ_ONCE(priv.status));
+}
+
+static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
+					  struct iov_iter *iter,
+					  u64 start, u64 lockend,
+					  struct extent_state **cached_state,
+					  u64 disk_bytenr, u64 disk_io_size,
+					  size_t count, bool compressed,
+					  bool *unlocked)
+{
+	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+	struct extent_io_tree *io_tree = &inode->io_tree;
+	struct page **pages;
+	unsigned long nr_pages, i;
+	u64 cur;
+	size_t page_offset;
+	ssize_t ret;
+
+	nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+	if (!pages)
+		return -ENOMEM;
+	for (i = 0; i < nr_pages; i++) {
+		pages[i] = alloc_page(GFP_NOFS);
+		if (!pages[i]) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
+						    disk_io_size, pages);
+	if (ret)
+		goto out;
+
+	unlock_extent_cached(io_tree, start, lockend, cached_state);
+	btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+	*unlocked = true;
+
+	if (compressed) {
+		i = 0;
+		page_offset = 0;
+	} else {
+		i = (iocb->ki_pos - start) >> PAGE_SHIFT;
+		page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
+	}
+	cur = 0;
+	while (cur < count) {
+		size_t bytes = min_t(size_t, count - cur,
+				     PAGE_SIZE - page_offset);
+
+		if (copy_page_to_iter(pages[i], page_offset, bytes,
+				      iter) != bytes) {
+			ret = -EFAULT;
+			goto out;
+		}
+		i++;
+		cur += bytes;
+		page_offset = 0;
+	}
+	ret = count;
+out:
+	for (i = 0; i < nr_pages; i++) {
+		if (pages[i])
+			__free_page(pages[i]);
+	}
+	kfree(pages);
+	return ret;
+}
+
+ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+			   struct btrfs_ioctl_encoded_io_args *encoded)
+{
+	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct extent_io_tree *io_tree = &inode->io_tree;
+	ssize_t ret;
+	size_t count = iov_iter_count(iter);
+	u64 start, lockend, disk_bytenr, disk_io_size;
+	struct extent_state *cached_state = NULL;
+	struct extent_map *em;
+	bool unlocked = false;
+
+	file_accessed(iocb->ki_filp);
+
+	btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+
+	if (iocb->ki_pos >= inode->vfs_inode.i_size) {
+		btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+		return 0;
+	}
+	start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
+	/*
+	 * We don't know how long the extent containing iocb->ki_pos is, but if
+	 * it's compressed we know that it won't be longer than this.
+	 */
+	lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
+
+	for (;;) {
+		struct btrfs_ordered_extent *ordered;
+
+		ret = btrfs_wait_ordered_range(&inode->vfs_inode, start,
+					       lockend - start + 1);
+		if (ret)
+			goto out_unlock_inode;
+		lock_extent_bits(io_tree, start, lockend, &cached_state);
+		ordered = btrfs_lookup_ordered_range(inode, start,
+						     lockend - start + 1);
+		if (!ordered)
+			break;
+		btrfs_put_ordered_extent(ordered);
+		unlock_extent_cached(io_tree, start, lockend, &cached_state);
+		cond_resched();
+	}
+
+	em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
+	if (IS_ERR(em)) {
+		ret = PTR_ERR(em);
+		goto out_unlock_extent;
+	}
+
+	if (em->block_start == EXTENT_MAP_INLINE) {
+		u64 extent_start = em->start;
+
+		/*
+		 * For inline extents we get everything we need out of the
+		 * extent item.
+		 */
+		free_extent_map(em);
+		em = NULL;
+		ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
+						&cached_state, extent_start,
+						count, encoded, &unlocked);
+		goto out;
+	}
+
+	/*
+	 * We only want to return up to EOF even if the extent extends beyond
+	 * that.
+	 */
+	encoded->len = min_t(u64, extent_map_end(em),
+			     inode->vfs_inode.i_size) - iocb->ki_pos;
+	if (em->block_start == EXTENT_MAP_HOLE ||
+	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+		disk_bytenr = EXTENT_MAP_HOLE;
+		count = min_t(u64, count, encoded->len);
+		encoded->len = count;
+		encoded->unencoded_len = count;
+	} else if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+		disk_bytenr = em->block_start;
+		/*
+		 * Bail if the buffer isn't large enough to return the whole
+		 * compressed extent.
+		 */
+		if (em->block_len > count) {
+			ret = -ENOBUFS;
+			goto out_em;
+		}
+		disk_io_size = count = em->block_len;
+		encoded->unencoded_len = em->ram_bytes;
+		encoded->unencoded_offset = iocb->ki_pos - em->orig_start;
+		ret = btrfs_encoded_io_compression_from_extent(fs_info,
+							     em->compress_type);
+		if (ret < 0)
+			goto out_em;
+		encoded->compression = ret;
+	} else {
+		disk_bytenr = em->block_start + (start - em->start);
+		if (encoded->len > count)
+			encoded->len = count;
+		/*
+		 * Don't read beyond what we locked. This also limits the page
+		 * allocations that we'll do.
+		 */
+		disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
+		count = start + disk_io_size - iocb->ki_pos;
+		encoded->len = count;
+		encoded->unencoded_len = count;
+		disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
+	}
+	free_extent_map(em);
+	em = NULL;
+
+	if (disk_bytenr == EXTENT_MAP_HOLE) {
+		unlock_extent_cached(io_tree, start, lockend, &cached_state);
+		btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+		unlocked = true;
+		ret = iov_iter_zero(count, iter);
+		if (ret != count)
+			ret = -EFAULT;
+	} else {
+		ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
+						 &cached_state, disk_bytenr,
+						 disk_io_size, count,
+						 encoded->compression,
+						 &unlocked);
+	}
+
+out:
+	if (ret >= 0)
+		iocb->ki_pos += encoded->len;
+out_em:
+	free_extent_map(em);
+out_unlock_extent:
+	if (!unlocked)
+		unlock_extent_cached(io_tree, start, lockend, &cached_state);
+out_unlock_inode:
+	if (!unlocked)
+		btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
+	return ret;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * Add an entry indicating a block group or device which is pinned by a
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 413e2e72b4e24..a6994d2ad8346 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -28,6 +28,7 @@
 #include <linux/iversion.h>
 #include <linux/fileattr.h>
 #include <linux/fsverity.h>
+#include <linux/sched/xacct.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "export.h"
@@ -88,6 +89,22 @@ struct btrfs_ioctl_send_args_32 {
 
 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
 			       struct btrfs_ioctl_send_args_32)
+
+struct btrfs_ioctl_encoded_io_args_32 {
+	compat_uptr_t iov;
+	compat_ulong_t iovcnt;
+	__s64 offset;
+	__u64 flags;
+	__u64 len;
+	__u64 unencoded_len;
+	__u64 unencoded_offset;
+	__u32 compression;
+	__u32 encryption;
+	__u8 reserved[64];
+};
+
+#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
+				       struct btrfs_ioctl_encoded_io_args_32)
 #endif
 
 /* Mask out flags that are inappropriate for the given type of inode. */
@@ -5195,6 +5212,89 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
 	return ret;
 }
 
+static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
+				    bool compat)
+{
+	struct btrfs_ioctl_encoded_io_args args = { 0 };
+	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
+					     flags);
+	size_t copy_end;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	loff_t pos;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		ret = -EPERM;
+		goto out_acct;
+	}
+
+	if (compat) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+		struct btrfs_ioctl_encoded_io_args_32 args32;
+
+		copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
+				       flags);
+		if (copy_from_user(&args32, argp, copy_end)) {
+			ret = -EFAULT;
+			goto out_acct;
+		}
+		args.iov = compat_ptr(args32.iov);
+		args.iovcnt = args32.iovcnt;
+		args.offset = args32.offset;
+		args.flags = args32.flags;
+#else
+		return -ENOTTY;
+#endif
+	} else {
+		copy_end = copy_end_kernel;
+		if (copy_from_user(&args, argp, copy_end)) {
+			ret = -EFAULT;
+			goto out_acct;
+		}
+	}
+	if (args.flags != 0) {
+		ret = -EINVAL;
+		goto out_acct;
+	}
+
+	ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+			   &iov, &iter);
+	if (ret < 0)
+		goto out_acct;
+
+	if (iov_iter_count(&iter) == 0) {
+		ret = 0;
+		goto out_iov;
+	}
+	pos = args.offset;
+	ret = rw_verify_area(READ, file, &pos, args.len);
+	if (ret < 0)
+		goto out_iov;
+
+	init_sync_kiocb(&kiocb, file);
+	kiocb.ki_pos = pos;
+
+	ret = btrfs_encoded_read(&kiocb, &iter, &args);
+	if (ret >= 0) {
+		fsnotify_access(file);
+		if (copy_to_user(argp + copy_end,
+				 (char *)&args + copy_end_kernel,
+				 sizeof(args) - copy_end_kernel))
+			ret = -EFAULT;
+	}
+
+out_iov:
+	kfree(iov);
+out_acct:
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
+	return ret;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
@@ -5339,6 +5439,12 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return fsverity_ioctl_enable(file, (const void __user *)argp);
 	case FS_IOC_MEASURE_VERITY:
 		return fsverity_ioctl_measure(file, argp);
+	case BTRFS_IOC_ENCODED_READ:
+		return btrfs_ioctl_encoded_read(file, argp, false);
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+	case BTRFS_IOC_ENCODED_READ_32:
+		return btrfs_ioctl_encoded_read(file, argp, true);
+#endif
 	}
 
 	return -ENOTTY;
-- 
GitLab


From 7c0c7269f7b508ba6e4b063a9314d6bd1fb6db22 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 13 Aug 2019 16:00:02 -0700
Subject: [PATCH 1396/1586] btrfs: add BTRFS_IOC_ENCODED_WRITE

The implementation resembles direct I/O: we have to flush any ordered
extents, invalidate the page cache, and do the io tree/delalloc/extent
map/ordered extent dance. From there, we can reuse the compression code
with a minor modification to distinguish the write from writeback. This
also creates inline extents when possible.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c  |   7 +-
 fs/btrfs/compression.h  |   6 +-
 fs/btrfs/ctree.h        |   4 +
 fs/btrfs/file.c         |  64 +++++++++--
 fs/btrfs/inode.c        | 249 +++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/ioctl.c        | 106 +++++++++++++++++
 fs/btrfs/ordered-data.c |  12 +-
 fs/btrfs/ordered-data.h |   5 +-
 8 files changed, 433 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 8b3bca269de3d..71d26b8a93965 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -383,7 +383,8 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
 			cb->start, cb->start + cb->len - 1,
 			!cb->errors);
 
-	end_compressed_writeback(inode, cb);
+	if (cb->writeback)
+		end_compressed_writeback(inode, cb);
 	/* Note, our inode could be gone now */
 
 	/*
@@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				 struct page **compressed_pages,
 				 unsigned int nr_pages,
 				 unsigned int write_flags,
-				 struct cgroup_subsys_state *blkcg_css)
+				 struct cgroup_subsys_state *blkcg_css,
+				 bool writeback)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct bio *bio = NULL;
@@ -531,6 +533,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->mirror_num = 0;
 	cb->compressed_pages = compressed_pages;
 	cb->compressed_len = compressed_len;
+	cb->writeback = writeback;
 	cb->orig_bio = NULL;
 	cb->nr_pages = nr_pages;
 
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 7dbd14caab011..7773953470e39 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -54,6 +54,9 @@ struct compressed_bio {
 	/* The compression algorithm for this bio */
 	u8 compress_type;
 
+	/* Whether this is a write for writeback. */
+	bool writeback;
+
 	/* IO errors */
 	u8 errors;
 	int mirror_num;
@@ -97,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				  struct page **compressed_pages,
 				  unsigned int nr_pages,
 				  unsigned int write_flags,
-				  struct cgroup_subsys_state *blkcg_css);
+				  struct cgroup_subsys_state *blkcg_css,
+				  bool writeback);
 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 22a3fef0a9819..5745b37a7e552 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3308,6 +3308,8 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
 					  u64 end, bool uptodate);
 ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
 			   struct btrfs_ioctl_encoded_io_args *encoded);
+ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+			     const struct btrfs_ioctl_encoded_io_args *encoded);
 
 extern const struct dentry_operations btrfs_dentry_operations;
 extern const struct iomap_ops btrfs_dio_iomap_ops;
@@ -3371,6 +3373,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
 			   struct btrfs_trans_handle **trans_out);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 			      struct btrfs_inode *inode, u64 start, u64 end);
+ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
+			    const struct btrfs_ioctl_encoded_io_args *encoded);
 int btrfs_release_file(struct inode *inode, struct file *file);
 int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7b4a5458ca9b6..c7a56909be8c7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2037,12 +2037,43 @@ out:
 	return err < 0 ? err : written;
 }
 
-static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
-				    struct iov_iter *from)
+static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+			const struct btrfs_ioctl_encoded_io_args *encoded)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	loff_t count;
+	ssize_t ret;
+
+	btrfs_inode_lock(inode, 0);
+	count = encoded->len;
+	ret = generic_write_checks_count(iocb, &count);
+	if (ret == 0 && count != encoded->len) {
+		/*
+		 * The write got truncated by generic_write_checks_count(). We
+		 * can't do a partial encoded write.
+		 */
+		ret = -EFBIG;
+	}
+	if (ret || encoded->len == 0)
+		goto out;
+
+	ret = btrfs_write_check(iocb, from, encoded->len);
+	if (ret < 0)
+		goto out;
+
+	ret = btrfs_do_encoded_write(iocb, from, encoded);
+out:
+	btrfs_inode_unlock(inode, 0);
+	return ret;
+}
+
+ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
+			    const struct btrfs_ioctl_encoded_io_args *encoded)
 {
 	struct file *file = iocb->ki_filp;
 	struct btrfs_inode *inode = BTRFS_I(file_inode(file));
-	ssize_t num_written = 0;
+	ssize_t num_written, num_sync;
 	const bool sync = iocb->ki_flags & IOCB_DSYNC;
 
 	/*
@@ -2053,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	if (BTRFS_FS_ERROR(inode->root->fs_info))
 		return -EROFS;
 
-	if (!(iocb->ki_flags & IOCB_DIRECT) &&
-	    (iocb->ki_flags & IOCB_NOWAIT))
+	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EOPNOTSUPP;
 
 	if (sync)
 		atomic_inc(&inode->sync_writers);
 
-	if (iocb->ki_flags & IOCB_DIRECT)
-		num_written = btrfs_direct_write(iocb, from);
-	else
-		num_written = btrfs_buffered_write(iocb, from);
+	if (encoded) {
+		num_written = btrfs_encoded_write(iocb, from, encoded);
+		num_sync = encoded->len;
+	} else if (iocb->ki_flags & IOCB_DIRECT) {
+		num_written = num_sync = btrfs_direct_write(iocb, from);
+	} else {
+		num_written = num_sync = btrfs_buffered_write(iocb, from);
+	}
 
 	btrfs_set_inode_last_sub_trans(inode);
 
-	if (num_written > 0)
-		num_written = generic_write_sync(iocb, num_written);
+	if (num_sync > 0) {
+		num_sync = generic_write_sync(iocb, num_sync);
+		if (num_sync < 0)
+			num_written = num_sync;
+	}
 
 	if (sync)
 		atomic_dec(&inode->sync_writers);
@@ -2077,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	return num_written;
 }
 
+static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	return btrfs_do_write_iter(iocb, from, NULL);
+}
+
 int btrfs_release_file(struct inode *inode, struct file *filp)
 {
 	struct btrfs_file_private *private = filp->private_data;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 505cd20511810..964eab60742fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1001,7 +1001,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
 			    async_extent->pages,	/* compressed_pages */
 			    async_extent->nr_pages,
 			    async_chunk->write_flags,
-			    async_chunk->blkcg_css)) {
+			    async_chunk->blkcg_css, true)) {
 		const u64 start = async_extent->start;
 		const u64 end = start + async_extent->ram_size - 1;
 
@@ -3001,6 +3001,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
 	 * except if the ordered extent was truncated.
 	 */
 	update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+			     test_bit(BTRFS_ORDERED_ENCODED, &oe->flags) ||
 			     test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
 
 	return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
@@ -3035,7 +3036,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
 	if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
 	    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
-	    !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
+	    !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags) &&
+	    !test_bit(BTRFS_ORDERED_ENCODED, &ordered_extent->flags))
 		clear_bits |= EXTENT_DELALLOC_NEW;
 
 	freespace_inode = btrfs_is_free_space_inode(inode);
@@ -10654,6 +10656,249 @@ out_unlock_inode:
 	return ret;
 }
 
+ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+			       const struct btrfs_ioctl_encoded_io_args *encoded)
+{
+	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+	struct btrfs_root *root = inode->root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct extent_io_tree *io_tree = &inode->io_tree;
+	struct extent_changeset *data_reserved = NULL;
+	struct extent_state *cached_state = NULL;
+	int compression;
+	size_t orig_count;
+	u64 start, end;
+	u64 num_bytes, ram_bytes, disk_num_bytes;
+	unsigned long nr_pages, i;
+	struct page **pages;
+	struct btrfs_key ins;
+	bool extent_reserved = false;
+	struct extent_map *em;
+	ssize_t ret;
+
+	switch (encoded->compression) {
+	case BTRFS_ENCODED_IO_COMPRESSION_ZLIB:
+		compression = BTRFS_COMPRESS_ZLIB;
+		break;
+	case BTRFS_ENCODED_IO_COMPRESSION_ZSTD:
+		compression = BTRFS_COMPRESS_ZSTD;
+		break;
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_4K:
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_8K:
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_16K:
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_32K:
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_64K:
+		/* The sector size must match for LZO. */
+		if (encoded->compression -
+		    BTRFS_ENCODED_IO_COMPRESSION_LZO_4K + 12 !=
+		    fs_info->sectorsize_bits)
+			return -EINVAL;
+		compression = BTRFS_COMPRESS_LZO;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+		return -EINVAL;
+
+	orig_count = iov_iter_count(from);
+
+	/* The extent size must be sane. */
+	if (encoded->unencoded_len > BTRFS_MAX_UNCOMPRESSED ||
+	    orig_count > BTRFS_MAX_COMPRESSED || orig_count == 0)
+		return -EINVAL;
+
+	/*
+	 * The compressed data must be smaller than the decompressed data.
+	 *
+	 * It's of course possible for data to compress to larger or the same
+	 * size, but the buffered I/O path falls back to no compression for such
+	 * data, and we don't want to break any assumptions by creating these
+	 * extents.
+	 *
+	 * Note that this is less strict than the current check we have that the
+	 * compressed data must be at least one sector smaller than the
+	 * decompressed data. We only want to enforce the weaker requirement
+	 * from old kernels that it is at least one byte smaller.
+	 */
+	if (orig_count >= encoded->unencoded_len)
+		return -EINVAL;
+
+	/* The extent must start on a sector boundary. */
+	start = iocb->ki_pos;
+	if (!IS_ALIGNED(start, fs_info->sectorsize))
+		return -EINVAL;
+
+	/*
+	 * The extent must end on a sector boundary. However, we allow a write
+	 * which ends at or extends i_size to have an unaligned length; we round
+	 * up the extent size and set i_size to the unaligned end.
+	 */
+	if (start + encoded->len < inode->vfs_inode.i_size &&
+	    !IS_ALIGNED(start + encoded->len, fs_info->sectorsize))
+		return -EINVAL;
+
+	/* Finally, the offset in the unencoded data must be sector-aligned. */
+	if (!IS_ALIGNED(encoded->unencoded_offset, fs_info->sectorsize))
+		return -EINVAL;
+
+	num_bytes = ALIGN(encoded->len, fs_info->sectorsize);
+	ram_bytes = ALIGN(encoded->unencoded_len, fs_info->sectorsize);
+	end = start + num_bytes - 1;
+
+	/*
+	 * If the extent cannot be inline, the compressed data on disk must be
+	 * sector-aligned. For convenience, we extend it with zeroes if it
+	 * isn't.
+	 */
+	disk_num_bytes = ALIGN(orig_count, fs_info->sectorsize);
+	nr_pages = DIV_ROUND_UP(disk_num_bytes, PAGE_SIZE);
+	pages = kvcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
+	if (!pages)
+		return -ENOMEM;
+	for (i = 0; i < nr_pages; i++) {
+		size_t bytes = min_t(size_t, PAGE_SIZE, iov_iter_count(from));
+		char *kaddr;
+
+		pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
+		if (!pages[i]) {
+			ret = -ENOMEM;
+			goto out_pages;
+		}
+		kaddr = kmap(pages[i]);
+		if (copy_from_iter(kaddr, bytes, from) != bytes) {
+			kunmap(pages[i]);
+			ret = -EFAULT;
+			goto out_pages;
+		}
+		if (bytes < PAGE_SIZE)
+			memset(kaddr + bytes, 0, PAGE_SIZE - bytes);
+		kunmap(pages[i]);
+	}
+
+	for (;;) {
+		struct btrfs_ordered_extent *ordered;
+
+		ret = btrfs_wait_ordered_range(&inode->vfs_inode, start, num_bytes);
+		if (ret)
+			goto out_pages;
+		ret = invalidate_inode_pages2_range(inode->vfs_inode.i_mapping,
+						    start >> PAGE_SHIFT,
+						    end >> PAGE_SHIFT);
+		if (ret)
+			goto out_pages;
+		lock_extent_bits(io_tree, start, end, &cached_state);
+		ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);
+		if (!ordered &&
+		    !filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))
+			break;
+		if (ordered)
+			btrfs_put_ordered_extent(ordered);
+		unlock_extent_cached(io_tree, start, end, &cached_state);
+		cond_resched();
+	}
+
+	/*
+	 * We don't use the higher-level delalloc space functions because our
+	 * num_bytes and disk_num_bytes are different.
+	 */
+	ret = btrfs_alloc_data_chunk_ondemand(inode, disk_num_bytes);
+	if (ret)
+		goto out_unlock;
+	ret = btrfs_qgroup_reserve_data(inode, &data_reserved, start, num_bytes);
+	if (ret)
+		goto out_free_data_space;
+	ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes);
+	if (ret)
+		goto out_qgroup_free_data;
+
+	/* Try an inline extent first. */
+	if (start == 0 && encoded->unencoded_len == encoded->len &&
+	    encoded->unencoded_offset == 0) {
+		ret = cow_file_range_inline(inode, encoded->len, orig_count,
+					    compression, pages, true);
+		if (ret <= 0) {
+			if (ret == 0)
+				ret = orig_count;
+			goto out_delalloc_release;
+		}
+	}
+
+	ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes,
+				   disk_num_bytes, 0, 0, &ins, 1, 1);
+	if (ret)
+		goto out_delalloc_release;
+	extent_reserved = true;
+
+	em = create_io_em(inode, start, num_bytes,
+			  start - encoded->unencoded_offset, ins.objectid,
+			  ins.offset, ins.offset, ram_bytes, compression,
+			  BTRFS_ORDERED_COMPRESSED);
+	if (IS_ERR(em)) {
+		ret = PTR_ERR(em);
+		goto out_free_reserved;
+	}
+	free_extent_map(em);
+
+	ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
+				       ins.objectid, ins.offset,
+				       encoded->unencoded_offset,
+				       (1 << BTRFS_ORDERED_ENCODED) |
+				       (1 << BTRFS_ORDERED_COMPRESSED),
+				       compression);
+	if (ret) {
+		btrfs_drop_extent_cache(inode, start, end, 0);
+		goto out_free_reserved;
+	}
+	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+
+	if (start + encoded->len > inode->vfs_inode.i_size)
+		i_size_write(&inode->vfs_inode, start + encoded->len);
+
+	unlock_extent_cached(io_tree, start, end, &cached_state);
+
+	btrfs_delalloc_release_extents(inode, num_bytes);
+
+	if (btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
+					  ins.offset, pages, nr_pages, 0, NULL,
+					  false)) {
+		btrfs_writepage_endio_finish_ordered(inode, pages[0], start, end, 0);
+		ret = -EIO;
+		goto out_pages;
+	}
+	ret = orig_count;
+	goto out;
+
+out_free_reserved:
+	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+out_delalloc_release:
+	btrfs_delalloc_release_extents(inode, num_bytes);
+	btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
+out_qgroup_free_data:
+	if (ret < 0)
+		btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
+out_free_data_space:
+	/*
+	 * If btrfs_reserve_extent() succeeded, then we already decremented
+	 * bytes_may_use.
+	 */
+	if (!extent_reserved)
+		btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);
+out_unlock:
+	unlock_extent_cached(io_tree, start, end, &cached_state);
+out_pages:
+	for (i = 0; i < nr_pages; i++) {
+		if (pages[i])
+			__free_page(pages[i]);
+	}
+	kvfree(pages);
+out:
+	if (ret >= 0)
+		iocb->ki_pos += encoded->len;
+	return ret;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * Add an entry indicating a block group or device which is pinned by a
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a6994d2ad8346..238cee5b5254d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -105,6 +105,8 @@ struct btrfs_ioctl_encoded_io_args_32 {
 
 #define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
 				       struct btrfs_ioctl_encoded_io_args_32)
+#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
+					struct btrfs_ioctl_encoded_io_args_32)
 #endif
 
 /* Mask out flags that are inappropriate for the given type of inode. */
@@ -5295,6 +5297,106 @@ out_acct:
 	return ret;
 }
 
+static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
+{
+	struct btrfs_ioctl_encoded_io_args args;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	loff_t pos;
+	struct kiocb kiocb;
+	ssize_t ret;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		ret = -EPERM;
+		goto out_acct;
+	}
+
+	if (!(file->f_mode & FMODE_WRITE)) {
+		ret = -EBADF;
+		goto out_acct;
+	}
+
+	if (compat) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+		struct btrfs_ioctl_encoded_io_args_32 args32;
+
+		if (copy_from_user(&args32, argp, sizeof(args32))) {
+			ret = -EFAULT;
+			goto out_acct;
+		}
+		args.iov = compat_ptr(args32.iov);
+		args.iovcnt = args32.iovcnt;
+		args.offset = args32.offset;
+		args.flags = args32.flags;
+		args.len = args32.len;
+		args.unencoded_len = args32.unencoded_len;
+		args.unencoded_offset = args32.unencoded_offset;
+		args.compression = args32.compression;
+		args.encryption = args32.encryption;
+		memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
+#else
+		return -ENOTTY;
+#endif
+	} else {
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			ret = -EFAULT;
+			goto out_acct;
+		}
+	}
+
+	ret = -EINVAL;
+	if (args.flags != 0)
+		goto out_acct;
+	if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
+		goto out_acct;
+	if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+	    args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+		goto out_acct;
+	if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+	    args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+		goto out_acct;
+	if (args.unencoded_offset > args.unencoded_len)
+		goto out_acct;
+	if (args.len > args.unencoded_len - args.unencoded_offset)
+		goto out_acct;
+
+	ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+			   &iov, &iter);
+	if (ret < 0)
+		goto out_acct;
+
+	file_start_write(file);
+
+	if (iov_iter_count(&iter) == 0) {
+		ret = 0;
+		goto out_end_write;
+	}
+	pos = args.offset;
+	ret = rw_verify_area(WRITE, file, &pos, args.len);
+	if (ret < 0)
+		goto out_end_write;
+
+	init_sync_kiocb(&kiocb, file);
+	ret = kiocb_set_rw_flags(&kiocb, 0);
+	if (ret)
+		goto out_end_write;
+	kiocb.ki_pos = pos;
+
+	ret = btrfs_do_write_iter(&kiocb, &iter, &args);
+	if (ret > 0)
+		fsnotify_modify(file);
+
+out_end_write:
+	file_end_write(file);
+	kfree(iov);
+out_acct:
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	return ret;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
@@ -5441,9 +5543,13 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return fsverity_ioctl_measure(file, argp);
 	case BTRFS_IOC_ENCODED_READ:
 		return btrfs_ioctl_encoded_read(file, argp, false);
+	case BTRFS_IOC_ENCODED_WRITE:
+		return btrfs_ioctl_encoded_write(file, argp, false);
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 	case BTRFS_IOC_ENCODED_READ_32:
 		return btrfs_ioctl_encoded_read(file, argp, true);
+	case BTRFS_IOC_ENCODED_WRITE_32:
+		return btrfs_ioctl_encoded_write(file, argp, true);
 #endif
 	}
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2ce7a4b4f7d0d..1957b14b329a7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -522,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
 	spin_lock(&btrfs_inode->lock);
 	btrfs_mod_outstanding_extents(btrfs_inode, -1);
 	spin_unlock(&btrfs_inode->lock);
-	if (root != fs_info->tree_root)
-		btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
-						false);
+	if (root != fs_info->tree_root) {
+		u64 release;
+
+		if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
+			release = entry->disk_num_bytes;
+		else
+			release = entry->num_bytes;
+		btrfs_delalloc_release_metadata(btrfs_inode, release, false);
+	}
 
 	percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
 				 fs_info->delalloc_batch);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d5d41f2290c15..ecad67a2c7457 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -74,6 +74,8 @@ enum {
 	BTRFS_ORDERED_LOGGED_CSUM,
 	/* We wait for this extent to complete in the current transaction */
 	BTRFS_ORDERED_PENDING,
+	/* BTRFS_IOC_ENCODED_WRITE */
+	BTRFS_ORDERED_ENCODED,
 };
 
 /* BTRFS_ORDERED_* flags that specify the type of the extent. */
@@ -81,7 +83,8 @@ enum {
 				  (1UL << BTRFS_ORDERED_NOCOW) |	\
 				  (1UL << BTRFS_ORDERED_PREALLOC) |	\
 				  (1UL << BTRFS_ORDERED_COMPRESSED) |	\
-				  (1UL << BTRFS_ORDERED_DIRECT))
+				  (1UL << BTRFS_ORDERED_DIRECT) |	\
+				  (1UL << BTRFS_ORDERED_ENCODED))
 
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
-- 
GitLab


From 03ddb19d2ea745228879b9334f3b550c88acb10a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:22 -0500
Subject: [PATCH 1397/1586] btrfs: make search_csum_tree return 0 if we get
 -EFBIG

We can either fail to find a csum entry at all and return -ENOENT, or we
can find a range that is close, but return -EFBIG.  In essence these
both mean the same thing when we are doing a lookup for a csum in an
existing range, we didn't find a csum.  We want to treat both of these
errors the same way, complain loudly that there wasn't a csum.  This
currently happens anyway because we do

	count = search_csum_tree();
	if (count <= 0) {
		// reloc and error handling
	}

However it forces us to incorrectly treat EIO or ENOMEM errors as on
disk corruption.  Fix this by returning 0 if we get either -ENOENT or
-EFBIG from btrfs_lookup_csum() so we can do proper error handling.

Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 2907314a41b42..1c6c621d1b0f7 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -305,7 +305,7 @@ found:
 	read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
 			ret * csum_size);
 out:
-	if (ret == -ENOENT)
+	if (ret == -ENOENT || ret == -EFBIG)
 		ret = 0;
 	return ret;
 }
-- 
GitLab


From 1784b7d502a94b561eae58249adde5f72c26eb3c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:23 -0500
Subject: [PATCH 1398/1586] btrfs: handle csum lookup errors properly on reads

Currently any error we get while trying to lookup csums during reads
shows up as a missing csum, and then on the read completion side we
print an error saying there was a csum mismatch and we increase the
device corruption count.

However we could have gotten an EIO from the lookup.  We could also be
inside of a memory constrained container and gotten a ENOMEM while
trying to do the read.  In either case we don't want to make this look
like a file system corruption problem, we want to make it look like the
actual error it is.  Capture any negative value, convert it to the
appropriate blk_status_t, free the csum array if we have one and bail.

Note: a possible improvement would be to make the relocation code look
up the owning inode and see if it's marked as NODATASUM and set
EXTENT_NODATASUM there, that way if there's corruption and there isn't a
checksum when we want it we can fail here rather than later.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 1c6c621d1b0f7..c828f971a3462 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	struct btrfs_bio *bbio = NULL;
 	struct btrfs_path *path;
 	const u32 sectorsize = fs_info->sectorsize;
 	const u32 csum_size = fs_info->csum_size;
@@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 	u8 *csum;
 	const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
 	int count = 0;
+	blk_status_t ret = BLK_STS_OK;
 
 	if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
 	    test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
@@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 		return BLK_STS_RESOURCE;
 
 	if (!dst) {
-		struct btrfs_bio *bbio = btrfs_bio(bio);
+		bbio = btrfs_bio(bio);
 
 		if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
 			bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
@@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 
 		count = search_csum_tree(fs_info, path, cur_disk_bytenr,
 					 search_len, csum_dst);
-		if (count <= 0) {
-			/*
-			 * Either we hit a critical error or we didn't find
-			 * the csum.
-			 * Either way, we put zero into the csums dst, and skip
-			 * to the next sector.
-			 */
+		if (count < 0) {
+			ret = errno_to_blk_status(count);
+			if (bbio)
+				btrfs_bio_free_csum(bbio);
+			break;
+		}
+
+		/*
+		 * We didn't find a csum for this range.  We need to make sure
+		 * we complain loudly about this, because we are not NODATASUM.
+		 *
+		 * However for the DATA_RELOC inode we could potentially be
+		 * relocating data extents for a NODATASUM inode, so the inode
+		 * itself won't be marked with NODATASUM, but the extent we're
+		 * copying is in fact NODATASUM.  If we don't find a csum we
+		 * assume this is the case.
+		 */
+		if (count == 0) {
 			memset(csum_dst, 0, csum_size);
 			count = 1;
 
-			/*
-			 * For data reloc inode, we need to mark the range
-			 * NODATASUM so that balance won't report false csum
-			 * error.
-			 */
 			if (BTRFS_I(inode)->root->root_key.objectid ==
 			    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 				u64 file_offset;
@@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 	}
 
 	btrfs_free_path(path);
-	return BLK_STS_OK;
+	return ret;
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
-- 
GitLab


From b0bbc8a3d49371a13ef9a5ff54fe2edfb4fb02d7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:24 -0500
Subject: [PATCH 1399/1586] btrfs: check correct bio in
 finish_compressed_bio_read

Commit c09abff87f90 ("btrfs: cloned bios must not be iterated by
bio_for_each_segment_all") added ASSERT()'s to make sure we weren't
calling bio_for_each_segment_all() on a RAID5/6 bio.  However it was
checking the bio that the compression code passed in, not the
cb->orig_bio that we actually iterate over, so adjust this ASSERT() to
check the correct bio.

Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 71d26b8a93965..cba3a844f21d5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -259,7 +259,7 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
 		 * We have verified the checksum already, set page checked so
 		 * the end_io handlers know about it
 		 */
-		ASSERT(!bio_flagged(bio, BIO_CLONED));
+		ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
 		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
 			u64 bvec_start = page_offset(bvec->bv_page) +
 					 bvec->bv_offset;
-- 
GitLab


From e14bfdb5a1f5fbb0a45f64a763c8da00637072d1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:25 -0500
Subject: [PATCH 1400/1586] btrfs: remove the bio argument from
 finish_compressed_bio_read

This bio is usually one of the compressed bio's, and we don't actually
need it in this function, so remove the argument and stop passing it
around.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index cba3a844f21d5..14409b3c838a1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
 	return last_io;
 }
 
-static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
+static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
 	unsigned int index;
 	struct page *page;
@@ -253,8 +253,6 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
 		struct bio_vec *bvec;
 		struct bvec_iter_all iter_all;
 
-		ASSERT(bio);
-		ASSERT(!bio->bi_status);
 		/*
 		 * We have verified the checksum already, set page checked so
 		 * the end_io handlers know about it
@@ -325,7 +323,7 @@ static void end_compressed_bio_read(struct bio *bio)
 csum_failed:
 	if (ret)
 		cb->errors = 1;
-	finish_compressed_bio_read(cb, bio);
+	finish_compressed_bio_read(cb);
 out:
 	bio_put(bio);
 }
@@ -973,7 +971,7 @@ finish_cb:
 	 */
 	ASSERT(refcount_read(&cb->pending_sectors));
 	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_read(cb, NULL);
+	finish_compressed_bio_read(cb);
 	return ret;
 }
 
-- 
GitLab


From 606f82e797e26013c80d25790639587a5d4dfbb7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:26 -0500
Subject: [PATCH 1401/1586] btrfs: track compressed bio errors as blk_status_t

Right now we just have a binary "errors" flag, so any error we get on
the compressed bio's gets translated to EIO.  This isn't necessarily a
bad thing, but if we get an ENOMEM it may be nice to know that's what
happened instead of an EIO.  Track our errors as a blk_status_t, and do
the appropriate setting of the errors accordingly.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 24 +++++++++++++-----------
 fs/btrfs/compression.h |  2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14409b3c838a1..f459e1832cad9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
 		bi_size += bvec->bv_len;
 
 	if (bio->bi_status)
-		cb->errors = 1;
+		cb->status = bio->bi_status;
 
 	ASSERT(bi_size && bi_size <= cb->compressed_len);
 	last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
@@ -247,8 +247,9 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
 	}
 
 	/* Do io completion on the original bio */
-	if (cb->errors) {
-		bio_io_error(cb->orig_bio);
+	if (cb->status != BLK_STS_OK) {
+		cb->orig_bio->bi_status = cb->status;
+		bio_endio(cb->orig_bio);
 	} else {
 		struct bio_vec *bvec;
 		struct bvec_iter_all iter_all;
@@ -306,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	 * Some IO in this cb have failed, just skip checksum as there
 	 * is no way it could be correct.
 	 */
-	if (cb->errors == 1)
+	if (cb->status != BLK_STS_OK)
 		goto csum_failed;
 
 	inode = cb->inode;
@@ -322,7 +323,7 @@ static void end_compressed_bio_read(struct bio *bio)
 
 csum_failed:
 	if (ret)
-		cb->errors = 1;
+		cb->status = errno_to_blk_status(ret);
 	finish_compressed_bio_read(cb);
 out:
 	bio_put(bio);
@@ -340,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
 	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
 	struct page *pages[16];
 	unsigned long nr_pages = end_index - index + 1;
+	const int errno = blk_status_to_errno(cb->status);
 	int i;
 	int ret;
 
-	if (cb->errors)
-		mapping_set_error(inode->i_mapping, -EIO);
+	if (errno)
+		mapping_set_error(inode->i_mapping, errno);
 
 	while (nr_pages > 0) {
 		ret = find_get_pages_contig(inode->i_mapping, index,
@@ -356,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
 			continue;
 		}
 		for (i = 0; i < ret; i++) {
-			if (cb->errors)
+			if (errno)
 				SetPageError(pages[i]);
 			btrfs_page_clamp_clear_writeback(fs_info, pages[i],
 							 cb->start, cb->len);
@@ -379,7 +381,7 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
 	 */
 	btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
 			cb->start, cb->start + cb->len - 1,
-			!cb->errors);
+			cb->status == BLK_STS_OK);
 
 	if (cb->writeback)
 		end_compressed_writeback(inode, cb);
@@ -524,7 +526,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	if (!cb)
 		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
-	cb->errors = 0;
+	cb->status = BLK_STS_OK;
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;
 	cb->len = len;
@@ -832,7 +834,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		goto out;
 
 	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
-	cb->errors = 0;
+	cb->status = BLK_STS_OK;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
 	sums = cb->sums;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 7773953470e39..ac5b20731d2ad 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -58,7 +58,7 @@ struct compressed_bio {
 	bool writeback;
 
 	/* IO errors */
-	u8 errors;
+	blk_status_t status;
 	int mirror_num;
 
 	/* for reads, this is the bio we are copying the data into */
-- 
GitLab


From f9f15de85d74e7eef021af059ca53a15f041cdd8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:27 -0500
Subject: [PATCH 1402/1586] btrfs: do not double complete bio on errors during
 compressed reads

I hit some weird panics while fixing up the error handling from
btrfs_lookup_bio_sums().  Turns out the compression path will complete
the bio we use if we set up any of the compression bios and then return
an error, and then btrfs_submit_data_bio() will also call bio_endio() on
the bio.

Fix this by making btrfs_submit_compressed_read() responsible for
calling bio_endio() on the bio if there are any errors.  Currently it
was only doing it if we created the compression bios, otherwise it was
depending on btrfs_submit_data_bio() to do the right thing.  This
creates the above problem, so fix up btrfs_submit_compressed_read() to
always call bio_endio() in case of an error, and then simply return from
btrfs_submit_data_bio() if we had to call
btrfs_submit_compressed_read().

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 20 ++++++++++++++------
 fs/btrfs/inode.c       |  8 +++++++-
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f459e1832cad9..be476f094300a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -811,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	blk_status_t ret = BLK_STS_RESOURCE;
+	blk_status_t ret;
 	int faili = 0;
 	u8 *sums;
 
@@ -824,14 +824,18 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
 	read_unlock(&em_tree->lock);
-	if (!em)
-		return BLK_STS_IOERR;
+	if (!em) {
+		ret = BLK_STS_IOERR;
+		goto out;
+	}
 
 	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
-	if (!cb)
+	if (!cb) {
+		ret = BLK_STS_RESOURCE;
 		goto out;
+	}
 
 	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
 	cb->status = BLK_STS_OK;
@@ -854,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
 	cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
 				       GFP_NOFS);
-	if (!cb->compressed_pages)
+	if (!cb->compressed_pages) {
+		ret = BLK_STS_RESOURCE;
 		goto fail1;
+	}
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
 		cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
@@ -941,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio = NULL;
 		}
 	}
-	return 0;
+	return BLK_STS_OK;
 
 fail2:
 	while (faili >= 0) {
@@ -954,6 +960,8 @@ fail1:
 	kfree(cb);
 out:
 	free_extent_map(em);
+	bio->bi_status = ret;
+	bio_endio(bio);
 	return ret;
 finish_cb:
 	if (comp_bio) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 964eab60742fc..53f46d5c78c77 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2543,10 +2543,15 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
 			goto out;
 
 		if (bio_flags & EXTENT_BIO_COMPRESSED) {
+			/*
+			 * btrfs_submit_compressed_read will handle completing
+			 * the bio if there were any errors, so just return
+			 * here.
+			 */
 			ret = btrfs_submit_compressed_read(inode, bio,
 							   mirror_num,
 							   bio_flags);
-			goto out;
+			goto out_no_endio;
 		} else {
 			/*
 			 * Lookup bio sums does extra checks around whether we
@@ -2580,6 +2585,7 @@ out:
 		bio->bi_status = ret;
 		bio_endio(bio);
 	}
+out_no_endio:
 	return ret;
 }
 
-- 
GitLab


From 510671d2d83668b76c08baec5faa2319ceaffd62 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:28 -0500
Subject: [PATCH 1403/1586] btrfs: do not try to repair bio that has no mirror
 set

If we fail to submit a bio for whatever reason, we may not have setup a
mirror_num for that bio.  This means we shouldn't try to do the repair
workflow, if we do we'll hit an BUG_ON(!failrec->this_mirror) in
clean_io_failure.  Instead simply skip the repair workflow if we have no
mirror set, and add an assert to btrfs_check_repairable() to make it
easier to catch what is happening in the future.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 13155e57e8c23..6a095e9962219 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
 	 * a good copy of the failed sector and if we succeed, we have setup
 	 * everything for repair_io_failure to do the rest for us.
 	 */
+	ASSERT(failed_mirror);
 	failrec->failed_mirror = failed_mirror;
 	failrec->this_mirror++;
 	if (failrec->this_mirror == failed_mirror)
@@ -3067,6 +3068,14 @@ static void end_bio_extent_readpage(struct bio *bio)
 			goto readpage_ok;
 
 		if (is_data_inode(inode)) {
+			/*
+			 * If we failed to submit the IO at all we'll have a
+			 * mirror_num == 0, in which case we need to just mark
+			 * the page with an error and unlock it and carry on.
+			 */
+			if (mirror == 0)
+				goto readpage_ok;
+
 			/*
 			 * btrfs_submit_read_repair() will handle all the good
 			 * and bad sectors, we just continue to the next bvec.
-- 
GitLab


From 8cbc3001a3264d998d6b6db3e23f935c158abd4d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 10:03:29 -0500
Subject: [PATCH 1404/1586] btrfs: do not clean up repair bio if submit fails

The submit helper will always run bio_endio() on the bio if it fails to
submit, so cleaning up the bio just leads to a variety of use-after-free
and NULL pointer dereference bugs because we race with the endio
function that is cleaning up the bio.  Instead just return BLK_STS_OK as
the repair function has to continue to process the rest of the pages,
and the endio for the repair bio will do the appropriate cleanup for the
page that it was given.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6a095e9962219..3f13ff0affdb3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2640,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
 	const int icsum = bio_offset >> fs_info->sectorsize_bits;
 	struct bio *repair_bio;
 	struct btrfs_bio *repair_bbio;
-	blk_status_t status;
 
 	btrfs_debug(fs_info,
 		   "repair read error: read error at %llu", start);
@@ -2679,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
 		    "repair read error: submitting new read to mirror %d",
 		    failrec->this_mirror);
 
-	status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
-				 failrec->bio_flags);
-	if (status) {
-		free_io_failure(failure_tree, tree, failrec);
-		bio_put(repair_bio);
-	}
-	return blk_status_to_errno(status);
+	/*
+	 * At this point we have a bio, so any errors from submit_bio_hook()
+	 * will be handled by the endio on the repair_bio, so we can't return an
+	 * error here.
+	 */
+	submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
+	return BLK_STS_OK;
 }
 
 static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
-- 
GitLab


From 7f30c07288bb9e20463182d0db56416025f85e08 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:03 +0000
Subject: [PATCH 1405/1586] btrfs: stop copying old file extents when doing a
 full fsync

When logging an inode in full sync mode, we go over every leaf that was
modified in the current transaction and has items associated to our inode,
and then copy all those items into the log tree. This includes copying
file extent items that were created and added to the inode in past
transactions, which is useless and only makes use more leaf space in the
log tree.

It's common to have a file with many file extent items spanning many
leaves where only a few file extent items are new and need to be logged,
and in such case we log all the file extent items we find in the modified
leaves.

So change the full sync behaviour to skip over file extent items that are
not needed. Those are the ones that match the following criteria:

1) Have a generation older than the current transaction and the inode
   was not a target of a reflink operation, as that can copy file extent
   items from a past generation from some other inode into our inode, so
   we have to log them;

2) Start at an offset within i_size - we must log anything at or beyond
   i_size, otherwise we would lose prealloc extents after log replay.

The following script exercises a scenario where this happens, and it's
somehow close enough to what happened often on a SQL Server workload which
I had to debug sometime ago to fix an issue where a pattern of writes to
prealloc extents and fsync resulted in fsync failing with -EIO (that was
commit ea7036de0d36c4 ("btrfs: fix fsync failure and transaction abort
after writes to prealloc extents")). In that particular case, we had large
files that had random writes and were often truncated, which made the
next fsync be a full sync.

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  MKFS_OPTIONS="-O no-holes -R free-space-tree"
  MOUNT_OPTIONS="-o ssd"

  FILE_SIZE=$((1 * 1024 * 1024 * 1024)) # 1G
  # FILE_SIZE=$((2 * 1024 * 1024 * 1024)) # 2G
  # FILE_SIZE=$((512 * 1024 * 1024)) # 512M

  mkfs.btrfs -f $MKFS_OPTIONS $DEV
  mount $MOUNT_OPTIONS $DEV $MNT

  # Create a file with many extents. Use direct IO to make it faster
  # to create the file - using buffered IO we would have to fsync
  # after each write (terribly slow).
  echo "Creating file with $((FILE_SIZE / 4096)) extents of 4K each..."
  xfs_io -f -d -c "pwrite -b 4K 0 $FILE_SIZE" $MNT/foobar

  # Commit the transaction, so every extent after this is from an
  # old generation.
  sync

  # Now rewrite only a few extents, which are all far spread apart from
  # each other (e.g. 1G / 32M = 32 extents).
  # After this only a few extents have a new generation, while all other
  # ones have an old generation.
  echo "Rewriting $((FILE_SIZE / (32 * 1024 * 1024))) extents..."
  for ((i = 0; i < $FILE_SIZE; i += $((32 * 1024 * 1024)))); do
      xfs_io -c "pwrite $i 4K" $MNT/foobar >/dev/null
  done

  # Fsync, the inode logged in full sync mode since it was never fsynced
  # before.
  echo "Fsyncing file..."
  xfs_io -c "fsync" $MNT/foobar

  umount $MNT

And the following bpftrace program was running when executing the test
script:

  $ cat bpf-script.sh
  #!/usr/bin/bpftrace

  k:btrfs_log_inode
  {
      @start_log_inode[tid] = nsecs;
  }

  kr:btrfs_log_inode
  /@start_log_inode[tid]/
  {
      @log_inode_dur[tid] = (nsecs - @start_log_inode[tid]) / 1000;
      delete(@start_log_inode[tid]);
  }

  k:btrfs_sync_log
  {
      @start_sync_log[tid] = nsecs;
  }

  kr:btrfs_sync_log
  /@start_sync_log[tid]/
  {
      $sync_log_dur = (nsecs - @start_sync_log[tid]) / 1000;
      printf("btrfs_log_inode() took %llu us\n", @log_inode_dur[tid]);
      printf("btrfs_sync_log()  took %llu us\n", $sync_log_dur);
      delete(@start_sync_log[tid]);
      delete(@log_inode_dur[tid]);
      exit();
  }

With 512M test file, before this patch:

  btrfs_log_inode() took 15218 us
  btrfs_sync_log()  took 1328 us

  Log tree has 17 leaves and 1 node, its total size is 294912 bytes.

With 512M test file, after this patch:

  btrfs_log_inode() took 14760 us
  btrfs_sync_log()  took 588 us

  Log tree has a single leaf, its total size is 16K.

With 1G test file, before this patch:

  btrfs_log_inode() took 27301 us
  btrfs_sync_log()  took 1767 us

  Log tree has 33 leaves and 1 node, its total size is 557056 bytes.

With 1G test file, after this patch:

  btrfs_log_inode() took 26166 us
  btrfs_sync_log()  took 593 us

  Log tree has a single leaf, its total size is 16K

With 2G test file, before this patch:

  btrfs_log_inode() took 50892 us
  btrfs_sync_log()  took 3127 us

  Log tree has 65 leaves and 1 node, its total size is 1081344 bytes.

With 2G test file, after this patch:

  btrfs_log_inode() took 50126 us
  btrfs_sync_log()  took 586 us

  Log tree has a single leaf, its total size is 16K.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c  |  23 +++--
 fs/btrfs/tree-log.c | 198 ++++++++++++++++++++++++++++++--------------
 2 files changed, 148 insertions(+), 73 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index a3930da4eb3fb..c083ded71ef74 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -518,17 +518,22 @@ process_slot:
 		btrfs_release_path(path);
 
 		/*
-		 * If this is a new extent update the last_reflink_trans of both
-		 * inodes. This is used by fsync to make sure it does not log
-		 * multiple checksum items with overlapping ranges. For older
-		 * extents we don't need to do it since inode logging skips the
-		 * checksums for older extents. Also ignore holes and inline
-		 * extents because they don't have checksums in the csum tree.
+		 * Whenever we share an extent we update the last_reflink_trans
+		 * of each inode to the current transaction. This is needed to
+		 * make sure fsync does not log multiple checksum items with
+		 * overlapping ranges (because some extent items might refer
+		 * only to sections of the original extent). For the destination
+		 * inode we do this regardless of the generation of the extents
+		 * or even if they are inline extents or explicit holes, to make
+		 * sure a full fsync does not skip them. For the source inode,
+		 * we only need to update last_reflink_trans in case it's a new
+		 * extent that is not a hole or an inline extent, to deal with
+		 * the checksums problem on fsync.
 		 */
-		if (extent_gen == trans->transid && disko > 0) {
+		if (extent_gen == trans->transid && disko > 0)
 			BTRFS_I(src)->last_reflink_trans = trans->transid;
-			BTRFS_I(inode)->last_reflink_trans = trans->transid;
-		}
+
+		BTRFS_I(inode)->last_reflink_trans = trans->transid;
 
 		last_dest_end = ALIGN(new_key.offset + datal,
 				      fs_info->sectorsize);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d5b7b565036db..bb3781fed8daf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4418,21 +4418,19 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 			       int start_slot, int nr, int inode_only,
 			       u64 logged_isize)
 {
-	struct btrfs_fs_info *fs_info = trans->fs_info;
-	unsigned long src_offset;
-	unsigned long dst_offset;
 	struct btrfs_root *log = inode->root->log_root;
 	struct btrfs_file_extent_item *extent;
-	struct btrfs_inode_item *inode_item;
 	struct extent_buffer *src = src_path->nodes[0];
-	int ret;
+	int ret = 0;
 	struct btrfs_key *ins_keys;
 	u32 *ins_sizes;
 	struct btrfs_item_batch batch;
 	char *ins_data;
 	int i;
+	int dst_index;
 	struct list_head ordered_sums;
-	int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
+	const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
+	const u64 i_size = i_size_read(&inode->vfs_inode);
 
 	INIT_LIST_HEAD(&ordered_sums);
 
@@ -4446,28 +4444,140 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 	batch.keys = ins_keys;
 	batch.data_sizes = ins_sizes;
 	batch.total_data_size = 0;
-	batch.nr = nr;
+	batch.nr = 0;
 
+	dst_index = 0;
 	for (i = 0; i < nr; i++) {
-		ins_sizes[i] = btrfs_item_size(src, i + start_slot);
-		batch.total_data_size += ins_sizes[i];
-		btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
+		const int src_slot = start_slot + i;
+		struct btrfs_root *csum_root;
+		u64 disk_bytenr;
+		u64 disk_num_bytes;
+		u64 extent_offset;
+		u64 extent_num_bytes;
+		bool is_old_extent;
+
+		btrfs_item_key_to_cpu(src, &ins_keys[dst_index], src_slot);
+
+		if (ins_keys[dst_index].type != BTRFS_EXTENT_DATA_KEY)
+			goto add_to_batch;
+
+		extent = btrfs_item_ptr(src, src_slot,
+					struct btrfs_file_extent_item);
+
+		is_old_extent = (btrfs_file_extent_generation(src, extent) <
+				 trans->transid);
+
+		/*
+		 * Don't copy extents from past generations. That would make us
+		 * log a lot more metadata for common cases like doing only a
+		 * few random writes into a file and then fsync it for the first
+		 * time or after the full sync flag is set on the inode. We can
+		 * get leaves full of extent items, most of which are from past
+		 * generations, so we can skip them - as long as the inode has
+		 * not been the target of a reflink operation in this transaction,
+		 * as in that case it might have had file extent items with old
+		 * generations copied into it. We also must always log prealloc
+		 * extents that start at or beyond eof, otherwise we would lose
+		 * them on log replay.
+		 */
+		if (is_old_extent &&
+		    ins_keys[dst_index].offset < i_size &&
+		    inode->last_reflink_trans < trans->transid)
+			continue;
+
+		if (skip_csum)
+			goto add_to_batch;
+
+		/* Only regular extents have checksums. */
+		if (btrfs_file_extent_type(src, extent) != BTRFS_FILE_EXTENT_REG)
+			goto add_to_batch;
+
+		/*
+		 * If it's an extent created in a past transaction, then its
+		 * checksums are already accessible from the committed csum tree,
+		 * no need to log them.
+		 */
+		if (is_old_extent)
+			goto add_to_batch;
+
+		disk_bytenr = btrfs_file_extent_disk_bytenr(src, extent);
+		/* If it's an explicit hole, there are no checksums. */
+		if (disk_bytenr == 0)
+			goto add_to_batch;
+
+		disk_num_bytes = btrfs_file_extent_disk_num_bytes(src, extent);
+
+		if (btrfs_file_extent_compression(src, extent)) {
+			extent_offset = 0;
+			extent_num_bytes = disk_num_bytes;
+		} else {
+			extent_offset = btrfs_file_extent_offset(src, extent);
+			extent_num_bytes = btrfs_file_extent_num_bytes(src, extent);
+		}
+
+		csum_root = btrfs_csum_root(trans->fs_info, disk_bytenr);
+		disk_bytenr += extent_offset;
+		ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
+					       disk_bytenr + extent_num_bytes - 1,
+					       &ordered_sums, 0);
+		if (ret)
+			goto out;
+
+add_to_batch:
+		ins_sizes[dst_index] = btrfs_item_size(src, src_slot);
+		batch.total_data_size += ins_sizes[dst_index];
+		batch.nr++;
+		dst_index++;
 	}
+
+	/*
+	 * We have a leaf full of old extent items that don't need to be logged,
+	 * so we don't need to do anything.
+	 */
+	if (batch.nr == 0)
+		goto out;
+
 	ret = btrfs_insert_empty_items(trans, log, dst_path, &batch);
-	if (ret) {
-		kfree(ins_data);
-		return ret;
-	}
+	if (ret)
+		goto out;
 
-	for (i = 0; i < nr; i++, dst_path->slots[0]++) {
-		dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
-						   dst_path->slots[0]);
+	dst_index = 0;
+	for (i = 0; i < nr; i++) {
+		const int src_slot = start_slot + i;
+		const int dst_slot = dst_path->slots[0] + dst_index;
+		struct btrfs_key key;
+		unsigned long src_offset;
+		unsigned long dst_offset;
 
-		src_offset = btrfs_item_ptr_offset(src, start_slot + i);
+		/*
+		 * We're done, all the remaining items in the source leaf
+		 * correspond to old file extent items.
+		 */
+		if (dst_index >= batch.nr)
+			break;
 
-		if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
-			inode_item = btrfs_item_ptr(dst_path->nodes[0],
-						    dst_path->slots[0],
+		btrfs_item_key_to_cpu(src, &key, src_slot);
+
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			goto copy_item;
+
+		extent = btrfs_item_ptr(src, src_slot,
+					struct btrfs_file_extent_item);
+
+		/* See the comment in the previous loop, same logic. */
+		if (btrfs_file_extent_generation(src, extent) < trans->transid &&
+		    key.offset < i_size &&
+		    inode->last_reflink_trans < trans->transid)
+			continue;
+
+copy_item:
+		dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], dst_slot);
+		src_offset = btrfs_item_ptr_offset(src, src_slot);
+
+		if (key.type == BTRFS_INODE_ITEM_KEY) {
+			struct btrfs_inode_item *inode_item;
+
+			inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_slot,
 						    struct btrfs_inode_item);
 			fill_inode_item(trans, dst_path->nodes[0], inode_item,
 					&inode->vfs_inode,
@@ -4475,55 +4585,15 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 					logged_isize);
 		} else {
 			copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
-					   src_offset, ins_sizes[i]);
+					   src_offset, ins_sizes[dst_index]);
 		}
 
-		/* take a reference on file data extents so that truncates
-		 * or deletes of this inode don't have to relog the inode
-		 * again
-		 */
-		if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY &&
-		    !skip_csum) {
-			int found_type;
-			extent = btrfs_item_ptr(src, start_slot + i,
-						struct btrfs_file_extent_item);
-
-			if (btrfs_file_extent_generation(src, extent) < trans->transid)
-				continue;
-
-			found_type = btrfs_file_extent_type(src, extent);
-			if (found_type == BTRFS_FILE_EXTENT_REG) {
-				struct btrfs_root *csum_root;
-				u64 ds, dl, cs, cl;
-				ds = btrfs_file_extent_disk_bytenr(src,
-								extent);
-				/* ds == 0 is a hole */
-				if (ds == 0)
-					continue;
-
-				dl = btrfs_file_extent_disk_num_bytes(src,
-								extent);
-				cs = btrfs_file_extent_offset(src, extent);
-				cl = btrfs_file_extent_num_bytes(src,
-								extent);
-				if (btrfs_file_extent_compression(src,
-								  extent)) {
-					cs = 0;
-					cl = dl;
-				}
-
-				csum_root = btrfs_csum_root(fs_info, ds);
-				ret = btrfs_lookup_csums_range(csum_root,
-						ds + cs, ds + cs + cl - 1,
-						&ordered_sums, 0);
-				if (ret)
-					break;
-			}
-		}
+		dst_index++;
 	}
 
 	btrfs_mark_buffer_dirty(dst_path->nodes[0]);
 	btrfs_release_path(dst_path);
+out:
 	kfree(ins_data);
 
 	/*
-- 
GitLab


From 5b7ce5e287f030d1d3c799abea769b1a308067ba Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:04 +0000
Subject: [PATCH 1406/1586] btrfs: hold on to less memory when logging
 checksums during full fsync

When doing a full fsync, at copy_items(), we iterate over all extents and
then collect their checksums into a list. After copying all the extents to
the log tree, we then log all the previously collected checksums.

Before the previous patch in the series (subject "btrfs: stop copying old
file extents when doing a full fsync"), we had to do it this way, because
while we were iterating over the items in the leaf of the subvolume tree,
we were holding a write lock on a leaf of the log tree, so logging the
checksums for an extent right after we collected them could result in a
deadlock, in case the checksum items ended up in the same leaf.

However after the previous patch in the series we now do a first iteration
over all the items in the leaf of the subvolume tree before locking a path
in the log tree, so we can now log the checksums right after we have
obtained them. This avoids holding in memory all checksums for all extents
in the leaf while copying items from the source leaf to the log tree. The
amount of memory used to hold all checksums of the extents in a leaf can
be significant. For example if a leaf has 200 file extent items referring
to 1M extents, using the default crc32c checksums, would result in using
over 200K of memory (not accounting for the extra overhead of struct
btrfs_ordered_sum), with smaller or less extents it would be less, but
it could be much more with more extents per leaf and/or much larger
extents.

So change copy_items() to log the checksums for an extent after looking
them up, and then free their memory, as they are no longer necessary.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index bb3781fed8daf..0b841eab61693 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4428,12 +4428,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 	char *ins_data;
 	int i;
 	int dst_index;
-	struct list_head ordered_sums;
 	const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
 	const u64 i_size = i_size_read(&inode->vfs_inode);
 
-	INIT_LIST_HEAD(&ordered_sums);
-
 	ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
 			   nr * sizeof(u32), GFP_NOFS);
 	if (!ins_data)
@@ -4450,6 +4447,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 	for (i = 0; i < nr; i++) {
 		const int src_slot = start_slot + i;
 		struct btrfs_root *csum_root;
+		struct btrfs_ordered_sum *sums;
+		struct btrfs_ordered_sum *sums_next;
+		LIST_HEAD(ordered_sums);
 		u64 disk_bytenr;
 		u64 disk_num_bytes;
 		u64 extent_offset;
@@ -4523,6 +4523,15 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 		if (ret)
 			goto out;
 
+		list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) {
+			if (!ret)
+				ret = log_csums(trans, inode, log, sums);
+			list_del(&sums->list);
+			kfree(sums);
+		}
+		if (ret)
+			goto out;
+
 add_to_batch:
 		ins_sizes[dst_index] = btrfs_item_size(src, src_slot);
 		batch.total_data_size += ins_sizes[dst_index];
@@ -4596,20 +4605,6 @@ copy_item:
 out:
 	kfree(ins_data);
 
-	/*
-	 * we have to do this after the loop above to avoid changing the
-	 * log tree while trying to change the log tree.
-	 */
-	while (!list_empty(&ordered_sums)) {
-		struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
-						   struct btrfs_ordered_sum,
-						   list);
-		if (!ret)
-			ret = log_csums(trans, inode, log, sums);
-		list_del(&sums->list);
-		kfree(sums);
-	}
-
 	return ret;
 }
 
-- 
GitLab


From 96acb3753e0740e0a3148640927dcf3249f09f5d Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:05 +0000
Subject: [PATCH 1407/1586] btrfs: voluntarily relinquish cpu when doing a full
 fsync

Doing a full fsync may require processing many leaves of metadata, which
can take some time and result in a task monopolizing a cpu for too long.
So add a cond_resched() after processing a leaf when doing a full fsync,
while not holding any locks on any tree (a subvolume or a log tree).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0b841eab61693..b6abc716aff28 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5712,6 +5712,13 @@ next_key:
 		} else {
 			break;
 		}
+
+		/*
+		 * We may process many leaves full of items for our inode, so
+		 * avoid monopolizing a cpu for too long by rescheduling while
+		 * not holding locks on any tree.
+		 */
+		cond_resched();
 	}
 	if (ins_nr) {
 		ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
-- 
GitLab


From 23e3337faf73e5bb2610697977e175313d48acb0 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:06 +0000
Subject: [PATCH 1408/1586] btrfs: reset last_reflink_trans after fsyncing
 inode

When an inode has a last_reflink_trans matching the current transaction,
we have to take special care when logging its checksums in order to
avoid getting checksum items with overlapping ranges in a log tree,
which could result in missing checksums after log replay (more on that
in the changelogs of commit 40e046acbd2f36 ("Btrfs: fix missing data
checksums after replaying a log tree") and commit e289f03ea79bbc ("btrfs:
fix corrupt log due to concurrent fsync of inodes with shared extents")).
We also need to make sure a full fsync will copy all old file extent
items it finds in modified leaves, because they might have been copied
from some other inode.

However once we fsync an inode, we don't need to keep paying the price of
that extra special care in future fsyncs done in the same transaction,
unless the inode is used for another reflink operation or the full sync
flag is set on it (truncate, failure to allocate extent maps for holes,
and other exceptional and infrequent cases).

So after we fsync an inode reset its last_unlink_trans to zero. In case
another reflink happens, we continue to update the last_reflink_trans of
the inode, just as before. Also set last_reflink_trans to the generation
of the last transaction that modified the inode whenever we need to set
the full sync flag on the inode, just like when we need to load an inode
from disk after eviction.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 30 ++++++++++++++++++++++++++++++
 fs/btrfs/file.c        |  7 +++----
 fs/btrfs/inode.c       | 12 +++++-------
 fs/btrfs/reflink.c     |  5 ++---
 fs/btrfs/tree-log.c    |  8 ++++++++
 5 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6d4f42b0fdcef..47e72d72f7d0a 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -341,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
 	spin_unlock(&inode->lock);
 }
 
+/*
+ * Should be called while holding the inode's VFS lock in exclusive mode or in a
+ * context where no one else can access the inode concurrently (during inode
+ * creation or when loading an inode from disk).
+ */
+static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
+{
+	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+	/*
+	 * The inode may have been part of a reflink operation in the last
+	 * transaction that modified it, and then a fsync has reset the
+	 * last_reflink_trans to avoid subsequent fsyncs in the same
+	 * transaction to do unnecessary work. So update last_reflink_trans
+	 * to the last_trans value (we have to be pessimistic and assume a
+	 * reflink happened).
+	 *
+	 * The ->last_trans is protected by the inode's spinlock and we can
+	 * have a concurrent ordered extent completion update it. Also set
+	 * last_reflink_trans to ->last_trans only if the former is less than
+	 * the later, because we can be called in a context where
+	 * last_reflink_trans was set to the current transaction generation
+	 * while ->last_trans was not yet updated in the current transaction,
+	 * and therefore has a lower value.
+	 */
+	spin_lock(&inode->lock);
+	if (inode->last_reflink_trans < inode->last_trans)
+		inode->last_reflink_trans = inode->last_trans;
+	spin_unlock(&inode->lock);
+}
+
 static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 {
 	bool ret = false;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c7a56909be8c7..9f455c96c9744 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2514,7 +2514,7 @@ out:
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
 		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+		btrfs_set_inode_full_sync(inode);
 	} else {
 		hole_em->start = offset;
 		hole_em->len = end - offset;
@@ -2535,8 +2535,7 @@ out:
 		} while (ret == -EEXIST);
 		free_extent_map(hole_em);
 		if (ret)
-			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-					&inode->runtime_flags);
+			btrfs_set_inode_full_sync(inode);
 	}
 
 	return 0;
@@ -2890,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
 	 * maps for the replacement extents (or holes).
 	 */
 	if (extent_info && !extent_info->is_new_extent)
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+		btrfs_set_inode_full_sync(inode);
 
 	if (ret)
 		goto out_trans;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 53f46d5c78c77..2e7143ff55230 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -418,7 +418,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
 		goto out;
 	}
 
-	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+	btrfs_set_inode_full_sync(inode);
 out:
 	/*
 	 * Don't forget to free the reserved space, as for inlined extent
@@ -4911,8 +4911,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
 						cur_offset + hole_size - 1, 0);
 			hole_em = alloc_extent_map();
 			if (!hole_em) {
-				set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-					&inode->runtime_flags);
+				btrfs_set_inode_full_sync(inode);
 				goto next;
 			}
 			hole_em->start = cur_offset;
@@ -6165,7 +6164,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	 * sync since it will be a full sync anyway and this will blow away the
 	 * old info in the log.
 	 */
-	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+	btrfs_set_inode_full_sync(BTRFS_I(inode));
 
 	key[0].objectid = objectid;
 	key[0].type = BTRFS_INODE_ITEM_KEY;
@@ -8767,7 +8766,7 @@ out:
 	 * extents beyond i_size to drop.
 	 */
 	if (control.extents_found > 0)
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
+		btrfs_set_inode_full_sync(BTRFS_I(inode));
 
 	return ret;
 }
@@ -9975,8 +9974,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 
 		em = alloc_extent_map();
 		if (!em) {
-			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+			btrfs_set_inode_full_sync(BTRFS_I(inode));
 			goto next;
 		}
 
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index c083ded71ef74..cdcfeab82e123 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -277,7 +277,7 @@ copy_inline_extent:
 						  path->slots[0]),
 			    size);
 	btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
-	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
+	btrfs_set_inode_full_sync(BTRFS_I(dst));
 	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
@@ -580,8 +580,7 @@ process_slot:
 		 * replaced file extent items.
 		 */
 		if (last_dest_end >= i_size_read(inode))
-			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+			btrfs_set_inode_full_sync(BTRFS_I(inode));
 
 		ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
 				last_dest_end, destoff + len - 1, NULL, &trans);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b6abc716aff28..d661222a3aa49 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6013,6 +6013,14 @@ log_extents:
 	if (inode_only != LOG_INODE_EXISTS)
 		inode->last_log_commit = inode->last_sub_trans;
 	spin_unlock(&inode->lock);
+
+	/*
+	 * Reset the last_reflink_trans so that the next fsync does not need to
+	 * go through the slower path when logging extents and their checksums.
+	 */
+	if (inode_only == LOG_INODE_ALL)
+		inode->last_reflink_trans = 0;
+
 out_unlock:
 	mutex_unlock(&inode->log_mutex);
 out:
-- 
GitLab


From 1f4613cdbe7739ce291554b316bff8e551383389 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:07 +0000
Subject: [PATCH 1409/1586] btrfs: fix unexpected error path when reflinking an
 inline extent

When reflinking an inline extent, we assert that its file offset is 0 and
that its uncompressed length is not greater than the sector size. We then
return an error if one of those conditions is not satisfied. However we
use a return statement, which results in returning from btrfs_clone()
without freeing the path and buffer that were allocated before, as well as
not clearing the flag BTRFS_INODE_NO_DELALLOC_FLUSH for the destination
inode.

Fix that by jumping to the 'out' label instead, and also add a WARN_ON()
for each condition so that in case assertions are disabled, we get to
known which of the unexpected conditions triggered the error.

Fixes: a61e1e0df9f321 ("Btrfs: simplify inline extent handling when doing reflinks")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index cdcfeab82e123..07e19e069c84c 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -505,8 +505,11 @@ process_slot:
 			 */
 			ASSERT(key.offset == 0);
 			ASSERT(datal <= fs_info->sectorsize);
-			if (key.offset != 0 || datal > fs_info->sectorsize)
-				return -EUCLEAN;
+			if (WARN_ON(key.offset != 0) ||
+			    WARN_ON(datal > fs_info->sectorsize)) {
+				ret = -EUCLEAN;
+				goto out;
+			}
 
 			ret = clone_copy_inline_extent(inode, path, &new_key,
 						       drop_start, datal, size,
-- 
GitLab


From b2d9f2dc019fb739b8c0765ddcc184c31f7ae54b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 17 Feb 2022 12:12:08 +0000
Subject: [PATCH 1410/1586] btrfs: deal with unexpected extent type during
 reflinking

Smatch complains about a possible dereference of a pointer that was not
initialized:

    CC [M]  fs/btrfs/reflink.o
    CHECK   fs/btrfs/reflink.c
  fs/btrfs/reflink.c:533 btrfs_clone() error: potentially dereferencing uninitialized 'trans'.

This is because we are not dealing with the case where the type of a file
extent has an unexpected value (not regular, not prealloc and not inline),
in which case the transaction handle pointer is not initialized.

Such unexpected type should be impossible, except in case of some memory
corruption caused either by bad hardware or some software bug causing
something like a buffer overrun.

So ASSERT that if the extent type is neither regular nor prealloc, then
it must be inline. Bail out with -EUCLEAN and a warning in case it is
not. This silences smatch.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 07e19e069c84c..03d60db0c5a34 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -494,7 +494,8 @@ process_slot:
 					&clone_info, &trans);
 			if (ret)
 				goto out;
-		} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+		} else {
+			ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
 			/*
 			 * Inline extents always have to start at file offset 0
 			 * and can never be bigger then the sector size. We can
@@ -505,7 +506,8 @@ process_slot:
 			 */
 			ASSERT(key.offset == 0);
 			ASSERT(datal <= fs_info->sectorsize);
-			if (WARN_ON(key.offset != 0) ||
+			if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
+			    WARN_ON(key.offset != 0) ||
 			    WARN_ON(datal > fs_info->sectorsize)) {
 				ret = -EUCLEAN;
 				goto out;
-- 
GitLab


From c067da87815657779115b8eae01c514458e4dfd2 Mon Sep 17 00:00:00 2001
From: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Date: Wed, 23 Feb 2022 14:38:06 -0500
Subject: [PATCH 1411/1586] btrfs: add filesystems state details to error
 messages

When a filesystem goes read-only due to an error, multiple errors tend
to be reported, some of which are knock-on failures. Logging fs_states,
in btrfs_handle_fs_error() and btrfs_printk() helps distinguish the
first error from subsequent messages which may only exist due to an
error state.

Under the new format, most initial errors will look like:
`BTRFS: error (device loop0) in ...`
while subsequent errors will begin with:
`error (device loop0: state E) in ...`

An initial transaction abort error will look like
`error (device loop0: state A) in ...`
and subsequent messages will contain
`(device loop0: state EA) in ...`

In addition to the error states we can also print other states that are
temporary, like remounting, device replace, or indicate a global state
that may affect functionality.

Now implemented:

E - filesystem error detected
A - transaction aborted
L - log tree errors

M - remounting in progress
R - device replace in progress
C - data checksums not verified (mounted with ignoredatacsums)

Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  2 ++
 fs/btrfs/super.c | 68 ++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5745b37a7e552..8e79e3ded8825 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -149,6 +149,8 @@ enum {
 
 	/* Indicates there was an error cleaning up a log tree. */
 	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+
+	BTRFS_FS_STATE_COUNT
 };
 
 #define BTRFS_BACKREF_REV_MAX		256
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 33cfc9e27451e..b228efe8ab6e2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
+#ifdef CONFIG_PRINTK
+
+#define STATE_STRING_PREFACE	": state "
+#define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+
+/*
+ * Characters to print to indicate error conditions or uncommon filesystem sate.
+ * RO is not an error.
+ */
+static const char fs_state_chars[] = {
+	[BTRFS_FS_STATE_ERROR]			= 'E',
+	[BTRFS_FS_STATE_REMOUNTING]		= 'M',
+	[BTRFS_FS_STATE_RO]			= 0,
+	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A',
+	[BTRFS_FS_STATE_DEV_REPLACING]		= 'R',
+	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0,
+	[BTRFS_FS_STATE_NO_CSUMS]		= 'C',
+	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L',
+};
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+	unsigned int bit;
+	bool states_printed = false;
+	unsigned long fs_state = READ_ONCE(info->fs_state);
+	char *curr = buf;
+
+	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+	curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+	for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
+		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
+		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
+			*curr++ = fs_state_chars[bit];
+			states_printed = true;
+		}
+	}
+
+	/* If no states were printed, reset the buffer */
+	if (!states_printed)
+		curr = buf;
+
+	*curr++ = 0;
+}
+#endif
+
 /*
  * Generally the error codes correspond to their respective errors, but there
  * are a few special cases.
@@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 {
 	struct super_block *sb = fs_info->sb;
 #ifdef CONFIG_PRINTK
+	char statestr[STATE_STRING_BUF_LEN];
 	const char *errstr;
 #endif
 
@@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 
 #ifdef CONFIG_PRINTK
 	errstr = btrfs_decode_error(errno);
+	btrfs_state_to_string(fs_info, statestr);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 		vaf.fmt = fmt;
 		vaf.va = &args;
 
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
-			sb->s_id, function, line, errno, errstr, &vaf);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+			sb->s_id, statestr, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
-			sb->s_id, function, line, errno, errstr);
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+			sb->s_id, statestr, function, line, errno, errstr);
 	}
 #endif
 
@@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
 	vaf.va = &args;
 
 	if (__ratelimit(ratelimit)) {
-		if (fs_info)
-			printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
-				fs_info->sb->s_id, &vaf);
-		else
+		if (fs_info) {
+			char statestr[STATE_STRING_BUF_LEN];
+
+			btrfs_state_to_string(fs_info, statestr);
+			printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+				fs_info->sb->s_id, statestr, &vaf);
+		} else {
 			printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+		}
 	}
 
 	va_end(args);
-- 
GitLab


From 33c44184991ec38e637cc4021e3b5e7d2288cea5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 14:56:11 -0500
Subject: [PATCH 1412/1586] btrfs: pass btrfs_fs_info for deleting snapshots
 and cleaner

We're passing a root around here, but we only really need the fs_info,
so fix up btrfs_clean_one_deleted_snapshot() to take an fs_info instead,
and then fix up all the callers appropriately.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c     | 7 +++----
 fs/btrfs/transaction.c | 4 ++--
 fs/btrfs/transaction.h | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed62e81c0b669..183baeffd9c9e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1947,8 +1947,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 
 static int cleaner_kthread(void *arg)
 {
-	struct btrfs_root *root = arg;
-	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
 	int again;
 
 	while (1) {
@@ -1981,7 +1980,7 @@ static int cleaner_kthread(void *arg)
 
 		btrfs_run_delayed_iputs(fs_info);
 
-		again = btrfs_clean_one_deleted_snapshot(root);
+		again = btrfs_clean_one_deleted_snapshot(fs_info);
 		mutex_unlock(&fs_info->cleaner_mutex);
 
 		/*
@@ -3806,7 +3805,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_sysfs;
 	}
 
-	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
 					       "btrfs-cleaner");
 	if (IS_ERR(fs_info->cleaner_kthread))
 		goto fail_sysfs;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 37f6ec2a3c569..b008c51109589 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2505,10 +2505,10 @@ cleanup_transaction:
  * because btrfs_commit_super will poke cleaner thread and it will process it a
  * few seconds later.
  */
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
 {
+	struct btrfs_root *root;
 	int ret;
-	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	spin_lock(&fs_info->trans_lock);
 	if (list_empty(&fs_info->dead_roots)) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ba8a9826eb375..970ff316069dd 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -217,7 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
 void btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
 void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
 void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
-- 
GitLab


From 7eefae6bb1ddeba1df02acd5c6bb6b7c8e0f34d1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 14:56:12 -0500
Subject: [PATCH 1413/1586] btrfs: pass btrfs_fs_info to
 btrfs_recover_relocation

We don't need a root here, we just need the btrfs_fs_info, we can just
get the specific roots we need from fs_info.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h      | 2 +-
 fs/btrfs/disk-io.c    | 2 +-
 fs/btrfs/relocation.c | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8e79e3ded8825..4db17bd05a215 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3833,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root);
 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root);
-int btrfs_recover_relocation(struct btrfs_root *root);
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
 int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct extent_buffer *buf,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 183baeffd9c9e..6a0b4dbd70e94 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3379,7 +3379,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
 	up_read(&fs_info->cleanup_work_sem);
 
 	mutex_lock(&fs_info->cleaner_mutex);
-	ret = btrfs_recover_relocation(fs_info->tree_root);
+	ret = btrfs_recover_relocation(fs_info);
 	mutex_unlock(&fs_info->cleaner_mutex);
 	if (ret < 0) {
 		btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 50130f943d999..df93c672f332b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4123,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
  * this function resumes merging reloc trees with corresponding fs trees.
  * this is important for keeping the sharing of tree blocks
  */
-int btrfs_recover_relocation(struct btrfs_root *root)
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	LIST_HEAD(reloc_roots);
 	struct btrfs_key key;
 	struct btrfs_root *fs_root;
@@ -4166,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 		    key.type != BTRFS_ROOT_ITEM_KEY)
 			break;
 
-		reloc_root = btrfs_read_tree_root(root, &key);
+		reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
 		if (IS_ERR(reloc_root)) {
 			err = PTR_ERR(reloc_root);
 			goto out;
-- 
GitLab


From ae460f058e9ff31946cc393bf897fc44483f0876 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 09:38:13 -0500
Subject: [PATCH 1414/1586] btrfs: remove the cross file system checks from
 remap

The sb check is already done in do_clone_file_range, and the mnt check
(which will hopefully go away in a subsequent patch) is done in
ioctl_file_clone().  Remove the check in our code and put an ASSERT() to
make sure it doesn't change underneath us.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 03d60db0c5a34..04a88bfe4fcf9 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -781,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 		if (btrfs_root_readonly(root_out))
 			return -EROFS;
 
-		if (file_in->f_path.mnt != file_out->f_path.mnt ||
-		    inode_in->i_sb != inode_out->i_sb)
-			return -EXDEV;
+		ASSERT(inode_in->i_sb == inode_out->i_sb);
 	}
 
 	/* Don't make the dst file partly checksummed */
-- 
GitLab


From 9f5710bbfd3031dd7ce244fa26fba896d35f5342 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 18 Feb 2022 09:38:14 -0500
Subject: [PATCH 1415/1586] fs: allow cross-vfsmount reflink/dedupe

Currently we disallow reflink and dedupe if the two files aren't on the
same vfsmount.  However we really only need to disallow it if they're
not on the same super block.  It is very common for btrfs to have a main
subvolume that is mounted and then different subvolumes mounted at
different locations.  It's allowed to reflink between these volumes, but
the vfsmount check disallows this.  Instead fix dedupe to check for the
same superblock, and simply remove the vfsmount check for reflink as it
already does the superblock check.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/ioctl.c       | 4 ----
 fs/remap_range.c | 7 +------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1ed097e94af2d..090bf47606aba 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 
 	if (!src_file.file)
 		return -EBADF;
-	ret = -EXDEV;
-	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
-		goto fdput;
 	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
 				      olen, 0);
 	if (cloned < 0)
@@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 		ret = -EINVAL;
 	else
 		ret = 0;
-fdput:
 	fdput(src_file);
 	return ret;
 }
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 231159682907d..bc5fb006dc798 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 
 	WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
 
-	/*
-	 * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
-	 * the same mount. Practically, they only need to be on the same file
-	 * system.
-	 */
 	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
 		return -EXDEV;
 
@@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 
 	ret = -EXDEV;
-	if (src_file->f_path.mnt != dst_file->f_path.mnt)
+	if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
 		goto out_drop_write;
 
 	ret = -EISDIR;
-- 
GitLab


From ca5e4ea0beaec8bc674121838bf8614c089effb9 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 18 Feb 2022 13:14:19 +0900
Subject: [PATCH 1416/1586] btrfs: zoned: mark relocation as writing

There is a hung_task issue with running generic/068 on an SMR
device. The hang occurs while a process is trying to thaw the
filesystem. The process is trying to take sb->s_umount to thaw the
FS. The lock is held by fsstress, which calls btrfs_sync_fs() and is
waiting for an ordered extent to finish. However, as the FS is frozen,
the ordered extents never finish.

Having an ordered extent while the FS is frozen is the root cause of
the hang. The ordered extent is initiated from btrfs_relocate_chunk()
which is called from btrfs_reclaim_bgs_work().

This commit adds sb_*_write() around btrfs_relocate_chunk() call
site. For the usual "btrfs balance" command, we already call it with
mnt_want_file() in btrfs_ioctl_balance().

Fixes: 18bb8bbf13c1 ("btrfs: zoned: automatically reclaim zones")
CC: stable@vger.kernel.org # 5.13+
Link: https://github.com/naota/linux/issues/56
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 8 +++++++-
 fs/btrfs/volumes.c     | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3113f6d7f335f..c22d287e020b3 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
 		return;
 
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
+	sb_start_write(fs_info->sb);
+
+	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
+		sb_end_write(fs_info->sb);
 		return;
+	}
 
 	/*
 	 * Long running balances can keep us blocked here for eternity, so
@@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	 */
 	if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
 		btrfs_exclop_finish(fs_info);
+		sb_end_write(fs_info->sb);
 		return;
 	}
 
@@ -1605,6 +1610,7 @@ next:
 	spin_unlock(&fs_info->unused_bgs_lock);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);
 	btrfs_exclop_finish(fs_info);
+	sb_end_write(fs_info->sb);
 }
 
 void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fa7fee09e39b8..5e3e13d4940b1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8304,10 +8304,12 @@ static int relocating_repair_kthread(void *data)
 	target = cache->start;
 	btrfs_put_block_group(cache);
 
+	sb_start_write(fs_info->sb);
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
 		btrfs_info(fs_info,
 			   "zoned: skip relocating block group %llu to repair: EBUSY",
 			   target);
+		sb_end_write(fs_info->sb);
 		return -EBUSY;
 	}
 
@@ -8335,6 +8337,7 @@ out:
 		btrfs_put_block_group(cache);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);
 	btrfs_exclop_finish(fs_info);
+	sb_end_write(fs_info->sb);
 
 	return ret;
 }
-- 
GitLab


From 06bae876634ebf837ba70ea3de532b288326103d Mon Sep 17 00:00:00 2001
From: Niels Dossche <dossche.niels@gmail.com>
Date: Fri, 25 Feb 2022 22:20:28 +0100
Subject: [PATCH 1417/1586] btrfs: extend locking to all space_info members
 accesses

bytes_pinned is always accessed under space_info->lock, except in
btrfs_preempt_reclaim_metadata_space, however the other members are
accessed under that lock. The reserved member of the rsv's are also
partially accessed under a lock and partially not. Move all these
accesses into the same lock to ensure consistency.

This could potentially race and lead to a flush instead of a commit but
it's not a big problem as it's only for preemptive flush.

CC: stable@vger.kernel.org # 5.15+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Niels Dossche <niels.dossche@ugent.be>
Signed-off-by: Niels Dossche <dossche.niels@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 294242c194d80..62382ae1eb02a 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1061,7 +1061,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 			trans_rsv->reserved;
 		if (block_rsv_size < space_info->bytes_may_use)
 			delalloc_size = space_info->bytes_may_use - block_rsv_size;
-		spin_unlock(&space_info->lock);
 
 		/*
 		 * We don't want to include the global_rsv in our calculation,
@@ -1092,6 +1091,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 			flush = FLUSH_DELAYED_REFS_NR;
 		}
 
+		spin_unlock(&space_info->lock);
+
 		/*
 		 * We don't want to reclaim everything, just a portion, so scale
 		 * down the to_reclaim by 1/4.  If it takes us down to 0,
-- 
GitLab


From 313ab75399d0c7d0ebc718c545572c1b4d8d22ef Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 28 Feb 2022 16:29:29 +0000
Subject: [PATCH 1418/1586] btrfs: add and use helper for unlinking inode
 during log replay

During log replay there is this pattern of running delayed items after
every inode unlink. To avoid repeating this several times, move the
logic into an helper function and use it instead of calling
btrfs_unlink_inode() followed by btrfs_run_delayed_items().

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 77 +++++++++++++++++----------------------------
 1 file changed, 29 insertions(+), 48 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d661222a3aa49..571dae8ad65e8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -899,6 +899,26 @@ out:
 	return ret;
 }
 
+static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
+				       struct btrfs_inode *dir,
+				       struct btrfs_inode *inode,
+				       const char *name,
+				       int name_len)
+{
+	int ret;
+
+	ret = btrfs_unlink_inode(trans, dir, inode, name, name_len);
+	if (ret)
+		return ret;
+	/*
+	 * Whenever we need to check if a name exists or not, we check the
+	 * fs/subvolume tree. So after an unlink we must run delayed items, so
+	 * that future checks for a name during log replay see that the name
+	 * does not exists anymore.
+	 */
+	return btrfs_run_delayed_items(trans);
+}
+
 /*
  * when cleaning up conflicts between the directory names in the
  * subvolume, directory names in the log and directory names in the
@@ -941,12 +961,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	ret = btrfs_unlink_inode(trans, dir, BTRFS_I(inode), name,
+	ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name,
 			name_len);
-	if (ret)
-		goto out;
-	else
-		ret = btrfs_run_delayed_items(trans);
 out:
 	kfree(name);
 	iput(inode);
@@ -1106,12 +1122,9 @@ again:
 				inc_nlink(&inode->vfs_inode);
 				btrfs_release_path(path);
 
-				ret = btrfs_unlink_inode(trans, dir, inode,
+				ret = unlink_inode_for_log_replay(trans, dir, inode,
 						victim_name, victim_name_len);
 				kfree(victim_name);
-				if (ret)
-					return ret;
-				ret = btrfs_run_delayed_items(trans);
 				if (ret)
 					return ret;
 				*search_done = 1;
@@ -1178,14 +1191,11 @@ again:
 					inc_nlink(&inode->vfs_inode);
 					btrfs_release_path(path);
 
-					ret = btrfs_unlink_inode(trans,
+					ret = unlink_inode_for_log_replay(trans,
 							BTRFS_I(victim_parent),
 							inode,
 							victim_name,
 							victim_name_len);
-					if (!ret)
-						ret = btrfs_run_delayed_items(
-								  trans);
 				}
 				iput(victim_parent);
 				kfree(victim_name);
@@ -1340,19 +1350,10 @@ again:
 				kfree(name);
 				goto out;
 			}
-			ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
+			ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
 						 inode, name, namelen);
 			kfree(name);
 			iput(dir);
-			/*
-			 * Whenever we need to check if a name exists or not, we
-			 * check the subvolume tree. So after an unlink we must
-			 * run delayed items, so that future checks for a name
-			 * during log replay see that the name does not exists
-			 * anymore.
-			 */
-			if (!ret)
-				ret = btrfs_run_delayed_items(trans);
 			if (ret)
 				goto out;
 			goto again;
@@ -1448,8 +1449,8 @@ static int add_link(struct btrfs_trans_handle *trans,
 		ret = -ENOENT;
 		goto out;
 	}
-	ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(other_inode),
-				 name, namelen);
+	ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(other_inode),
+					  name, namelen);
 	if (ret)
 		goto out;
 	/*
@@ -1458,10 +1459,6 @@ static int add_link(struct btrfs_trans_handle *trans,
 	 */
 	if (other_inode->i_nlink == 0)
 		inc_nlink(other_inode);
-
-	ret = btrfs_run_delayed_items(trans);
-	if (ret)
-		goto out;
 add_link:
 	ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
 			     name, namelen, 0, ref_index);
@@ -1594,7 +1591,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 			ret = btrfs_inode_ref_exists(inode, dir, key->type,
 						     name, namelen);
 			if (ret > 0) {
-				ret = btrfs_unlink_inode(trans,
+				ret = unlink_inode_for_log_replay(trans,
 							 BTRFS_I(dir),
 							 BTRFS_I(inode),
 							 name, namelen);
@@ -1605,15 +1602,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 				 */
 				if (!ret && inode->i_nlink == 0)
 					inc_nlink(inode);
-				/*
-				 * Whenever we need to check if a name exists or
-				 * not, we check the subvolume tree. So after an
-				 * unlink we must run delayed items, so that future
-				 * checks for a name during log replay see that the
-				 * name does not exists anymore.
-				 */
-				if (!ret)
-					ret = btrfs_run_delayed_items(trans);
 			}
 			if (ret < 0)
 				goto out;
@@ -2350,15 +2338,8 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
 		goto out;
 
 	inc_nlink(inode);
-	ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(inode), name,
-				 name_len);
-	if (ret)
-		goto out;
-
-	ret = btrfs_run_delayed_items(trans);
-	if (ret)
-		goto out;
-
+	ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode),
+					  name, name_len);
 	/*
 	 * Unlike dir item keys, dir index keys can only have one name (entry) in
 	 * them, as there are no key collisions since each key has a unique offset
-- 
GitLab


From b3c958a3694eb8280de9ee3b241ced03bce5ad5f Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 23 Feb 2022 14:06:43 -0500
Subject: [PATCH 1419/1586] btrfs: remove BUG_ON(ret) in
 alloc_reserved_tree_block

Switch this to an ASSERT() and return the error in the normal case.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b7b49b4eb68db..23edecb5b4050 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4761,9 +4761,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_block_group(trans, extent_key.objectid,
 				       fs_info->nodesize, true);
 	if (ret) { /* -ENOENT, logic error */
+		ASSERT(!ret);
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
 			extent_key.objectid, extent_key.offset);
-		BUG();
+		return ret;
 	}
 
 	trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
-- 
GitLab


From 3466670558d70b30238bbef5fa0267bc6301b8db Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 23 Feb 2022 14:06:44 -0500
Subject: [PATCH 1420/1586] btrfs: add a alloc_reserved_extent helper

We duplicate this logic for both data and metadata, at this point we've
already done our type specific extent root operations, this is just
doing the accounting and removing the space from the free space tree.
Extract this common logic out into a helper.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 56 ++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 23edecb5b4050..431a5a4f73955 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4604,6 +4604,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
 	return ret;
 }
 
+static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+				 u64 num_bytes)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	int ret;
+
+	ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
+	if (ret)
+		return ret;
+
+	ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
+	if (ret) {
+		ASSERT(!ret);
+		btrfs_err(fs_info, "update block group failed for %llu %llu",
+			  bytenr, num_bytes);
+		return ret;
+	}
+
+	trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
+	return 0;
+}
+
 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 				      u64 parent, u64 root_objectid,
 				      u64 flags, u64 owner, u64 offset,
@@ -4664,18 +4686,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
 
-	ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
-	if (ret)
-		return ret;
-
-	ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
-	if (ret) { /* -ENOENT, logic error */
-		btrfs_err(fs_info, "update block group failed for %llu %llu",
-			ins->objectid, ins->offset);
-		BUG();
-	}
-	trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
-	return ret;
+	return alloc_reserved_extent(trans, ins->objectid, ins->offset);
 }
 
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
@@ -4693,7 +4704,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	struct btrfs_delayed_tree_ref *ref;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
-	u64 num_bytes;
 	u64 flags = extent_op->flags_to_set;
 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
 
@@ -4703,12 +4713,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	if (skinny_metadata) {
 		extent_key.offset = ref->level;
 		extent_key.type = BTRFS_METADATA_ITEM_KEY;
-		num_bytes = fs_info->nodesize;
 	} else {
 		extent_key.offset = node->num_bytes;
 		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
 		size += sizeof(*block_info);
-		num_bytes = node->num_bytes;
 	}
 
 	path = btrfs_alloc_path();
@@ -4753,23 +4761,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
 
-	ret = remove_from_free_space_tree(trans, extent_key.objectid,
-					  num_bytes);
-	if (ret)
-		return ret;
-
-	ret = btrfs_update_block_group(trans, extent_key.objectid,
-				       fs_info->nodesize, true);
-	if (ret) { /* -ENOENT, logic error */
-		ASSERT(!ret);
-		btrfs_err(fs_info, "update block group failed for %llu %llu",
-			extent_key.objectid, extent_key.offset);
-		return ret;
-	}
-
-	trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
-					  fs_info->nodesize);
-	return ret;
+	return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
 }
 
 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
-- 
GitLab


From 5b2a54bb7c47c10ab6d96311d46a76b4a84f0b6f Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 23 Feb 2022 14:06:45 -0500
Subject: [PATCH 1421/1586] btrfs: remove last_ref from the extent freeing code

This is a remnant of the work I did for qgroups a long time ago to only
run for a block when we had dropped the last ref.  We haven't done that
for years, but the code remains.  Drop this remnant.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 431a5a4f73955..faeaf53c3cfb2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -598,7 +598,7 @@ fail:
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 					   struct btrfs_root *root,
 					   struct btrfs_path *path,
-					   int refs_to_drop, int *last_ref)
+					   int refs_to_drop)
 {
 	struct btrfs_key key;
 	struct btrfs_extent_data_ref *ref1 = NULL;
@@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 
 	if (num_refs == 0) {
 		ret = btrfs_del_item(trans, root, path);
-		*last_ref = 1;
 	} else {
 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1072,8 +1071,7 @@ static noinline_for_stack
 void update_inline_extent_backref(struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
-				  struct btrfs_delayed_extent_op *extent_op,
-				  int *last_ref)
+				  struct btrfs_delayed_extent_op *extent_op)
 {
 	struct extent_buffer *leaf = path->nodes[0];
 	struct btrfs_extent_item *ei;
@@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
 		else
 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
 	} else {
-		*last_ref = 1;
 		size =  btrfs_extent_inline_ref_size(type);
 		item_size = btrfs_item_size(leaf, path->slots[0]);
 		ptr = (unsigned long)iref;
@@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 			}
 			return -EUCLEAN;
 		}
-		update_inline_extent_backref(path, iref, refs_to_add,
-					     extent_op, NULL);
+		update_inline_extent_backref(path, iref, refs_to_add, extent_op);
 	} else if (ret == -ENOENT) {
 		setup_inline_extent_backref(trans->fs_info, path, iref, parent,
 					    root_objectid, owner, offset,
@@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref *iref,
-				 int refs_to_drop, int is_data, int *last_ref)
+				 int refs_to_drop, int is_data)
 {
 	int ret = 0;
 
 	BUG_ON(!is_data && refs_to_drop != 1);
-	if (iref) {
-		update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
-					     last_ref);
-	} else if (is_data) {
-		ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
-					     last_ref);
-	} else {
-		*last_ref = 1;
+	if (iref)
+		update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+	else if (is_data)
+		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+	else
 		ret = btrfs_del_item(trans, root, path);
-	}
 	return ret;
 }
 
@@ -2942,7 +2934,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	u64 refs;
 	u64 bytenr = node->bytenr;
 	u64 num_bytes = node->num_bytes;
-	int last_ref = 0;
 	bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
 
 	extent_root = btrfs_extent_root(info, bytenr);
@@ -3009,8 +3000,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			}
 			/* Must be SHARED_* item, remove the backref first */
 			ret = remove_extent_backref(trans, extent_root, path,
-						    NULL, refs_to_drop, is_data,
-						    &last_ref);
+						    NULL, refs_to_drop, is_data);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
@@ -3135,8 +3125,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 		}
 		if (found_extent) {
 			ret = remove_extent_backref(trans, extent_root, path,
-						    iref, refs_to_drop, is_data,
-						    &last_ref);
+						    iref, refs_to_drop, is_data);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
@@ -3181,7 +3170,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			}
 		}
 
-		last_ref = 1;
 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
 				      num_to_del);
 		if (ret) {
-- 
GitLab


From 8f8aa4c7a9bde010211484a9c0475846acfc399f Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 23 Feb 2022 14:06:46 -0500
Subject: [PATCH 1422/1586] btrfs: factor out do_free_extent_accounting helper

__btrfs_free_extent() does all of the hard work of updating the extent
ref items, and then at the end if we dropped the extent completely it
does the cleanup accounting work.  We're going to only want to do that
work for metadata with extent tree v2, so extract this bit into its own
helper.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 52 ++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index faeaf53c3cfb2..f477035a2ac23 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2853,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 	return 0;
 }
 
+static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
+				     u64 bytenr, u64 num_bytes, bool is_data)
+{
+	int ret;
+
+	if (is_data) {
+		struct btrfs_root *csum_root;
+
+		csum_root = btrfs_csum_root(trans->fs_info, bytenr);
+		ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
+		if (ret) {
+			btrfs_abort_transaction(trans, ret);
+			return ret;
+		}
+	}
+
+	ret = add_to_free_space_tree(trans, bytenr, num_bytes);
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
+		return ret;
+	}
+
+	ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
+	if (ret)
+		btrfs_abort_transaction(trans, ret);
+
+	return ret;
+}
+
 /*
  * Drop one or more refs of @node.
  *
@@ -3178,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 		}
 		btrfs_release_path(path);
 
-		if (is_data) {
-			struct btrfs_root *csum_root;
-			csum_root = btrfs_csum_root(info, bytenr);
-			ret = btrfs_del_csums(trans, csum_root, bytenr,
-					      num_bytes);
-			if (ret) {
-				btrfs_abort_transaction(trans, ret);
-				goto out;
-			}
-		}
-
-		ret = add_to_free_space_tree(trans, bytenr, num_bytes);
-		if (ret) {
-			btrfs_abort_transaction(trans, ret);
-			goto out;
-		}
-
-		ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
-		if (ret) {
-			btrfs_abort_transaction(trans, ret);
-			goto out;
-		}
+		ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
 	}
 	btrfs_release_path(path);
 
-- 
GitLab


From 4eb150d6122bf51744d07c569ea811e8ce4bdd6d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 22 Feb 2022 15:41:19 +0800
Subject: [PATCH 1423/1586] btrfs: unify the error handling pattern for
 read_tree_block()

We had an error handling pattern for read_tree_block() like this:

	eb = read_tree_block();
	if (IS_ERR(eb)) {
		/*
		 * Handling error here
		 * Normally ended up with return or goto out.
		 */
	} else if (!extent_buffer_uptodate(eb)) {
		/*
		 * Different error handling here
		 * Normally also ended up with return or goto out;
		 */
	}

This is fine, but if we want to add extra check for each
read_tree_block(), the existing if-else-if is not that expandable and
will take reader some seconds to figure out there is no extra branch.

Here we change it to a more common way, without the extra else:

	eb = read_tree_block();
	if (IS_ERR(eb)) {
		/*
		 * Handling error here
		 */
		return eb or goto out;
	}
	if (!extent_buffer_uptodate(eb)) {
		/*
		 * Different error handling here
		 */
		return eb or goto out;
	}

This also removes some oddball call sites which uses some creative way
to check error.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c    |  7 +++++--
 fs/btrfs/ctree.c      | 30 ++++++++++++++++--------------
 fs/btrfs/disk-io.c    | 16 +++++++++-------
 fs/btrfs/print-tree.c |  4 ++--
 fs/btrfs/relocation.c |  4 ++--
 5 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index c9ee579bc5a61..ebc392ea1d74c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
 		if (IS_ERR(eb)) {
 			free_pref(ref);
 			return PTR_ERR(eb);
-		} else if (!extent_buffer_uptodate(eb)) {
+		}
+		if (!extent_buffer_uptodate(eb)) {
 			free_pref(ref);
 			free_extent_buffer(eb);
 			return -EIO;
 		}
+
 		if (lock)
 			btrfs_tree_read_lock(eb);
 		if (btrfs_header_level(eb) == 0)
@@ -1335,7 +1337,8 @@ again:
 				if (IS_ERR(eb)) {
 					ret = PTR_ERR(eb);
 					goto out;
-				} else if (!extent_buffer_uptodate(eb)) {
+				}
+				if (!extent_buffer_uptodate(eb)) {
 					free_extent_buffer(eb);
 					ret = -EIO;
 					goto out;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1bb5335c6d757..9b2d9cd416763 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
 			     btrfs_header_owner(parent),
 			     btrfs_node_ptr_generation(parent, slot),
 			     level - 1, &first_key);
-	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+	if (IS_ERR(eb))
+		return eb;
+	if (!extent_buffer_uptodate(eb)) {
 		free_extent_buffer(eb);
-		eb = ERR_PTR(-EIO);
+		return ERR_PTR(-EIO);
 	}
 
 	return eb;
@@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	ret = -EAGAIN;
 	tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
 			      gen, parent_level - 1, &first_key);
-	if (!IS_ERR(tmp)) {
-		/*
-		 * If the read above didn't mark this buffer up to date,
-		 * it will never end up being up to date.  Set ret to EIO now
-		 * and give up so that our caller doesn't loop forever
-		 * on our EAGAINs.
-		 */
-		if (!extent_buffer_uptodate(tmp))
-			ret = -EIO;
-		free_extent_buffer(tmp);
-	} else {
-		ret = PTR_ERR(tmp);
+	if (IS_ERR(tmp)) {
+		btrfs_release_path(p);
+		return PTR_ERR(tmp);
 	}
+	/*
+	 * If the read above didn't mark this buffer up to date,
+	 * it will never end up being up to date.  Set ret to EIO now
+	 * and give up so that our caller doesn't loop forever
+	 * on our EAGAINs.
+	 */
+	if (!extent_buffer_uptodate(tmp))
+		ret = -EIO;
+	free_extent_buffer(tmp);
 
 	btrfs_release_path(p);
 	return ret;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6a0b4dbd70e94..dd70b4ad838e5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1543,7 +1543,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
 		ret = PTR_ERR(root->node);
 		root->node = NULL;
 		goto fail;
-	} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+	}
+	if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
 		ret = -EIO;
 		goto fail;
 	}
@@ -2537,11 +2538,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 		log_tree_root->node = NULL;
 		btrfs_put_root(log_tree_root);
 		return ret;
-	} else if (!extent_buffer_uptodate(log_tree_root->node)) {
+	}
+	if (!extent_buffer_uptodate(log_tree_root->node)) {
 		btrfs_err(fs_info, "failed to read log tree");
 		btrfs_put_root(log_tree_root);
 		return -EIO;
 	}
+
 	/* returns with log_tree_root freed on success */
 	ret = btrfs_recover_log_trees(log_tree_root);
 	if (ret) {
@@ -2983,15 +2986,14 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
 	if (IS_ERR(root->node)) {
 		ret = PTR_ERR(root->node);
 		root->node = NULL;
-	} else if (!extent_buffer_uptodate(root->node)) {
+		return ret;
+	}
+	if (!extent_buffer_uptodate(root->node)) {
 		free_extent_buffer(root->node);
 		root->node = NULL;
-		ret = -EIO;
+		return -EIO;
 	}
 
-	if (ret)
-		return ret;
-
 	btrfs_set_root_node(&root->root_item, root->node);
 	root->commit_root = btrfs_root_node(root);
 	btrfs_set_root_refs(&root->root_item, 1);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 524fdb0ddd74b..dd8777872143a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -392,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
 				       btrfs_header_owner(c),
 				       btrfs_node_ptr_generation(c, i),
 				       level - 1, &first_key);
-		if (IS_ERR(next)) {
+		if (IS_ERR(next))
 			continue;
-		} else if (!extent_buffer_uptodate(next)) {
+		if (!extent_buffer_uptodate(next)) {
 			free_extent_buffer(next);
 			continue;
 		}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index df93c672f332b..fdc2c4b411f00 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
 
 	eb = read_tree_block(fs_info, block->bytenr, block->owner,
 			     block->key.offset, block->level, NULL);
-	if (IS_ERR(eb)) {
+	if (IS_ERR(eb))
 		return PTR_ERR(eb);
-	} else if (!extent_buffer_uptodate(eb)) {
+	if (!extent_buffer_uptodate(eb)) {
 		free_extent_buffer(eb);
 		return -EIO;
 	}
-- 
GitLab


From 9a4ffa1bd629c7c55a1238c65e58fe0ce9c12d08 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 22 Feb 2022 15:41:20 +0800
Subject: [PATCH 1424/1586] btrfs: unify the error handling of
 btrfs_read_buffer()

There is one oddball error handling of btrfs_read_buffer():

	ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
	if (!ret) {
		*eb_ret = tmp;
		return 0;
	}
	free_extent_buffer(tmp);
	btrfs_release_path(p);
	return -EIO;

While all other call sites check the error first.  Unify the behavior.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9b2d9cd416763..0eecf98d0abb8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1438,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 
 		/* now we're allowed to do a blocking uptodate check */
 		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
-		if (!ret) {
-			*eb_ret = tmp;
-			return 0;
+		if (ret) {
+			free_extent_buffer(tmp);
+			btrfs_release_path(p);
+			return -EIO;
 		}
-		free_extent_buffer(tmp);
-		btrfs_release_path(p);
-		return -EIO;
+		*eb_ret = tmp;
+		return 0;
 	}
 
 	/*
-- 
GitLab


From 3777369ff1518b579560611a0d0c33f930154f64 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 2 Mar 2022 09:10:21 +0800
Subject: [PATCH 1425/1586] btrfs: verify the tranisd of the to-be-written
 dirty extent buffer

[BUG]
There is a bug report that a bitflip in the transid part of an extent
buffer makes btrfs to reject certain tree blocks:

  BTRFS error (device dm-0): parent transid verify failed on 1382301696 wanted 262166 found 22

[CAUSE]
Note the failed transid check, hex(262166) = 0x40016, while
hex(22) = 0x16.

It's an obvious bitflip.

Furthermore, the reporter also confirmed the bitflip is from the
hardware, so it's a real hardware caused bitflip, and such problem can
not be detected by the existing tree-checker framework.

As tree-checker can only verify the content inside one tree block, while
generation of a tree block can only be verified against its parent.

So such problem remain undetected.

[FIX]
Although tree-checker can not verify it at write-time, we still have a
quick (but not the most accurate) way to catch such obvious corruption.

Function csum_one_extent_buffer() is called before we submit metadata
write.

Thus it means, all the extent buffer passed in should be dirty tree
blocks, and should be newer than last committed transaction.

Using that we can catch the above bitflip.

Although it's not a perfect solution, as if the corrupted generation is
higher than the correct value, we have no way to catch it at all.

Reported-by: Christoph Anton Mitterer <calestyo@scientia.org>
Link: https://lore.kernel.org/linux-btrfs/2dfcbc130c55cc6fd067b93752e90bd2b079baca.camel@scientia.org/
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Qu Wenruo <wqu@sus,ree.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd70b4ad838e5..09693ab4fde0f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
 	else
 		ret = btrfs_check_leaf_full(eb);
 
-	if (ret < 0) {
-		btrfs_print_tree(eb, 0);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Also check the generation, the eb reached here must be newer than
+	 * last committed. Or something seriously wrong happened.
+	 */
+	if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
+		ret = -EUCLEAN;
 		btrfs_err(fs_info,
-			"block=%llu write time tree block corruption detected",
-			eb->start);
-		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
-		return ret;
+			"block=%llu bad generation, have %llu expect > %llu",
+			  eb->start, btrfs_header_generation(eb),
+			  fs_info->last_trans_committed);
+		goto error;
 	}
 	write_extent_buffer(eb, result, 0, fs_info->csum_size);
 
 	return 0;
+
+error:
+	btrfs_print_tree(eb, 0);
+	btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
+		  eb->start);
+	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+	return ret;
 }
 
 /* Checksum all dirty extent buffers in one bio_vec */
-- 
GitLab


From bf7bd725b022cca0195aa81a378a82b1e731a4a3 Mon Sep 17 00:00:00 2001
From: Niels Dossche <dossche.niels@gmail.com>
Date: Thu, 3 Mar 2022 01:38:39 +0100
Subject: [PATCH 1426/1586] btrfs: add lockdep_assert_held to
 need_preemptive_reclaim

In a previous patch ("btrfs: extend locking to all space_info members
accesses") the locking for the space_info members was extended in
btrfs_preempt_reclaim_metadata_space because not all the member
accesses that needed locks were actually locked (bytes_pinned et al).

It was then suggested to also add a call to lockdep_assert_held to
need_preemptive_reclaim. This function also works with space_info
members. As of now, it has only two call sites which both hold the lock.

Suggested-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Niels Dossche <dossche.niels@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 62382ae1eb02a..b87931a458ebb 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 	u64 thresh = div_factor_fine(space_info->total_bytes, 90);
 	u64 used;
 
+	lockdep_assert_held(&space_info->lock);
+
 	/* If we're just plain full then async reclaim just slows us down. */
 	if ((space_info->bytes_used + space_info->bytes_reserved +
 	     global_rsv_size) >= thresh)
-- 
GitLab


From 79c9234ba596e903907de20573fd4bcc85315b06 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Thu, 3 Mar 2022 22:40:27 +0800
Subject: [PATCH 1427/1586] btrfs: don't access possibly stale fs_info data in
 device_list_add

Syzbot reported a possible use-after-free in printing information
in device_list_add.

Very similar with the bug fixed by commit 0697d9a61099 ("btrfs: don't
access possibly stale fs_info data for printing duplicate device"),
but this time the use occurs in btrfs_info_in_rcu.

  Call Trace:
   kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
   btrfs_printk+0x395/0x425 fs/btrfs/super.c:244
   device_list_add.cold+0xd7/0x2ed fs/btrfs/volumes.c:957
   btrfs_scan_one_device+0x4c7/0x5c0 fs/btrfs/volumes.c:1387
   btrfs_control_ioctl+0x12a/0x2d0 fs/btrfs/super.c:2409
   vfs_ioctl fs/ioctl.c:51 [inline]
   __do_sys_ioctl fs/ioctl.c:874 [inline]
   __se_sys_ioctl fs/ioctl.c:860 [inline]
   __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
   do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Fix this by modifying device->fs_info to NULL too.

Reported-and-tested-by: syzbot+82650a4e0ed38f218363@syzkaller.appspotmail.com
CC: stable@vger.kernel.org # 4.19+
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5e3e13d4940b1..1be7cb2f955fc 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -921,16 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		/*
 		 * We are going to replace the device path for a given devid,
 		 * make sure it's the same device if the device is mounted
+		 *
+		 * NOTE: the device->fs_info may not be reliable here so pass
+		 * in a NULL to message helpers instead. This avoids a possible
+		 * use-after-free when the fs_info and fs_info->sb are already
+		 * torn down.
 		 */
 		if (device->bdev) {
 			if (device->devt != path_devt) {
 				mutex_unlock(&fs_devices->device_list_mutex);
-				/*
-				 * device->fs_info may not be reliable here, so
-				 * pass in a NULL instead. This avoids a
-				 * possible use-after-free when the fs_info and
-				 * fs_info->sb are already torn down.
-				 */
 				btrfs_warn_in_rcu(NULL,
 	"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
 						  path, devid, found_transid,
@@ -938,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 						  task_pid_nr(current));
 				return ERR_PTR(-EEXIST);
 			}
-			btrfs_info_in_rcu(device->fs_info,
+			btrfs_info_in_rcu(NULL,
 	"devid %llu device path %s changed to %s scanned by %s (%d)",
 					  devid, rcu_str_deref(device->name),
 					  path, current->comm,
-- 
GitLab


From d3e29967079c522ce1c5cab0e9fab2c280b977eb Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 7 Mar 2022 15:30:02 +0200
Subject: [PATCH 1428/1586] btrfs: zoned: put block group after final usage

It's counter-intuitive (and wrong) to put the block group _before_ the
final usage in submit_eb_page. Fix it by re-ordering the call to
btrfs_put_block_group after its final reference. Also fix a minor typo
in 'implies'

Fixes: be1a1d7a5d24 ("btrfs: zoned: finish fully written block group")
CC: stable@vger.kernel.org # 5.16+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3f13ff0affdb3..78486bbd1ac90 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4786,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 		return ret;
 	}
 	if (cache) {
-		/* Impiles write in zoned mode */
-		btrfs_put_block_group(cache);
-		/* Mark the last eb in a block group */
+		/*
+		 * Implies write in zoned mode. Mark the last eb in a block group.
+		 */
 		if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
 			set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+		btrfs_put_block_group(cache);
 	}
 	ret = write_one_eb(eb, wbc, epd);
 	free_extent_buffer(eb);
-- 
GitLab


From daaca3522a8e67c46e39ef09c1d542e866f85f3b Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 14 Mar 2022 12:30:18 +0800
Subject: [PATCH 1429/1586] block: release rq qos structures for queue without
 disk

blkcg_init_queue() may add rq qos structures to request queue, previously
blk_cleanup_queue() calls rq_qos_exit() to release them, but commit
8e141f9eb803 ("block: drain file system I/O on del_gendisk")
moves rq_qos_exit() into del_gendisk(), so memory leak is caused
because queues may not have disk, such as un-present scsi luns, nvme
admin queue, ...

Fixes the issue by adding rq_qos_exit() to blk_cleanup_queue() back.

BTW, v5.18 won't need this patch any more since we move
blkcg_init_queue()/blkcg_exit_queue() into disk allocation/release
handler, and patches have been in for-5.18/block.

Cc: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
Reported-by: syzbot+b42749a851a47a0f581b@syzkaller.appspotmail.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220314043018.177141-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1039515c99d6a..779b4a1f66ac2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -50,6 +50,7 @@
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
 #include "blk-throttle.h"
+#include "blk-rq-qos.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -314,6 +315,9 @@ void blk_cleanup_queue(struct request_queue *q)
 	 */
 	blk_freeze_queue(q);
 
+	/* cleanup rq qos structures for queue without disk */
+	rq_qos_exit(q);
+
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
 	blk_sync_queue(q);
-- 
GitLab


From aa1b46dcdc7baaf5fec0be25782ef24b26aa209e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 13 Mar 2022 21:15:02 -1000
Subject: [PATCH 1430/1586] block: fix rq-qos breakage from skipping
 rq_qos_done_bio()

a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't
tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set.
While this fixed a potential oops, it also broke blk-iocost by skipping the
done_bio callback for merged bios.

Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(),
rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED
distinguishing the former from the latter. rq_qos_done_bio() is not called
for bios which wenth through rq_qos_merge(). This royally confuses
blk-iocost as the merged bios never finish and are considered perpetually
in-flight.

One reliably reproducible failure mode is an intermediate cgroup geting
stuck active preventing its children from being activated due to the
leaf-only rule, leading to loss of control. The following is from
resctl-bench protection scenario which emulates isolating a web server like
workload from a memory bomb run on an iocost configuration which should
yield a reasonable level of protection.

  # cat /sys/block/nvme2n1/device/model
  Samsung SSD 970 PRO 512GB
  # cat /sys/fs/cgroup/io.cost.model
  259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025
  # cat /sys/fs/cgroup/io.cost.qos
  259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00
  # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1
  ...
  Memory Hog Summary
  ==================

  IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m
              W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m

  Isolation and Request Latency Impact Distributions:

                min   p01   p05   p10   p25   p50   p75   p90   p95   p99   max  mean stdev
  isol%       15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82
  lat-imp%        0     0     0     0     0  4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6

  Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96%

The isolation result of 58.12% is close to what this device would show
without any IO control.

Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and
calling rq_qos_done_bio() on them too. For consistency and clarity, rename
BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into
rq_qos_done_bio() so that it's next to the code paths that set the flags.

With the patch applied, the above same benchmark shows:

  # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1
  ...
  Memory Hog Summary
  ==================

  IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m
              W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m

  Isolation and Request Latency Impact Distributions:

                min   p01   p05   p10   p25   p50   p75   p90   p95   p99   max  mean stdev
  isol%       84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42  2.81
  lat-imp%        0     0     0     0     0  2.81  5.73 11.11 13.92 17.53 22.61  4.10  4.68

  Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0%

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: a647a524a467 ("block: don't call rq_qos_ops->done_bio if the bio isn't tracked")
Cc: stable@vger.kernel.org # v5.15+
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  3 +--
 block/blk-iolatency.c     |  2 +-
 block/blk-rq-qos.h        | 20 +++++++++++---------
 include/linux/blk_types.h |  3 ++-
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 151cace2dbe16..33979f306e9e7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1516,8 +1516,7 @@ again:
 	if (!bio_integrity_endio(bio))
 		return;
 
-	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
-		rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio);
+	rq_qos_done_bio(bio);
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
 		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 010e658d44a82..2f33932e72e36 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 	int inflight = 0;
 
 	blkg = bio->bi_blkg;
-	if (!blkg || !bio_flagged(bio, BIO_TRACKED))
+	if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
 		return;
 
 	iolat = blkg_to_lat(bio->bi_blkg);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 3cfbc8668cba9..68267007da1c6 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
 		__rq_qos_requeue(q->rq_qos, rq);
 }
 
-static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
+static inline void rq_qos_done_bio(struct bio *bio)
 {
-	if (q->rq_qos)
-		__rq_qos_done_bio(q->rq_qos, bio);
+	if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
+			     bio_flagged(bio, BIO_QOS_MERGED))) {
+		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+		if (q->rq_qos)
+			__rq_qos_done_bio(q->rq_qos, bio);
+	}
 }
 
 static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
 {
-	/*
-	 * BIO_TRACKED lets controllers know that a bio went through the
-	 * normal rq_qos path.
-	 */
 	if (q->rq_qos) {
-		bio_set_flag(bio, BIO_TRACKED);
+		bio_set_flag(bio, BIO_QOS_THROTTLED);
 		__rq_qos_throttle(q->rq_qos, bio);
 	}
 }
@@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
 static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
 				struct bio *bio)
 {
-	if (q->rq_qos)
+	if (q->rq_qos) {
+		bio_set_flag(bio, BIO_QOS_MERGED);
 		__rq_qos_merge(q->rq_qos, rq, bio);
+	}
 }
 
 static inline void rq_qos_queue_depth_changed(struct request_queue *q)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5561e58d158ac..0c3563b45fe90 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -324,7 +324,8 @@ enum {
 	BIO_TRACE_COMPLETION,	/* bio_endio() should trace the final completion
 				 * of this bio. */
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
-	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
+	BIO_QOS_THROTTLED,	/* bio went through rq_qos throttle path */
+	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
 	BIO_PERCPU_CACHE,	/* can participate in per-cpu alloc cache */
-- 
GitLab


From 837d9e49402eaf030db55a49f96fc51d73b4b441 Mon Sep 17 00:00:00 2001
From: Kurt Cancemi <kurt@x64architecture.com>
Date: Sat, 12 Mar 2022 15:15:13 -0500
Subject: [PATCH 1431/1586] net: phy: marvell: Fix invalid comparison in the
 resume and suspend functions

This bug resulted in only the current mode being resumed and suspended when
the PHY supported both fiber and copper modes and when the PHY only supported
copper mode the fiber mode would incorrectly be attempted to be resumed and
suspended.

Fixes: 3758be3dc162 ("Marvell phy: add functions to suspend and resume both interfaces: fiber and copper links.")
Signed-off-by: Kurt Cancemi <kurt@x64architecture.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20220312201512.326047-1-kurt@x64architecture.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/marvell.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 2429db614b59a..2702faf7b0f60 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1687,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev)
 	int err;
 
 	/* Suspend the fiber mode first */
-	if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
-			       phydev->supported)) {
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+			      phydev->supported)) {
 		err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
 		if (err < 0)
 			goto error;
@@ -1722,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev)
 	int err;
 
 	/* Resume the fiber mode first */
-	if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
-			       phydev->supported)) {
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT,
+			      phydev->supported)) {
 		err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
 		if (err < 0)
 			goto error;
-- 
GitLab


From 6b2b04590b51aa4cf395fcd185ce439cab5961dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 14 Mar 2022 14:30:11 -1000
Subject: [PATCH 1432/1586] block: don't merge across cgroup boundaries if
 blkcg is enabled

blk-iocost and iolatency are cgroup aware rq-qos policies but they didn't
disable merges across different cgroups. This obviously can lead to
accounting and control errors but more importantly to priority inversions -
e.g. an IO which belongs to a higher priority cgroup or IO class may end up
getting throttled incorrectly because it gets merged to an IO issued from a
low priority cgroup.

Fix it by adding blk_cgroup_mergeable() which is called from merge paths and
rejects cross-cgroup and cross-issue_as_root merges.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: d70675121546 ("block: introduce blk-iolatency io controller")
Cc: stable@vger.kernel.org # v4.19+
Cc: Josef Bacik <jbacik@fb.com>
Link: https://lore.kernel.org/r/Yi/eE/6zFNyWJ+qd@slm.duckdns.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.h | 17 +++++++++++++++++
 block/blk-merge.c  | 11 +++++++++++
 2 files changed, 28 insertions(+)

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 3e91803c4a555..47e1e38390c96 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,6 +15,7 @@
  */
 
 #include <linux/blk-cgroup.h>
+#include <linux/blk-mq.h>
 
 /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
 #define BLKG_STAT_CPU_BATCH	(INT_MAX / 2)
@@ -428,6 +429,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 }
 
+/**
+ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
+ * @rq: request to merge into
+ * @bio: bio to merge
+ *
+ * @bio and @rq should belong to the same cgroup and their issue_as_root should
+ * match. The latter is necessary as we don't want to throttle e.g. a metadata
+ * update because it happens to be next to a regular IO.
+ */
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
+{
+	return rq->bio->bi_blkg == bio->bi_blkg &&
+		bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
+}
+
 void blk_cgroup_bio_start(struct bio *bio);
 void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
 #else	/* CONFIG_BLK_CGROUP */
@@ -467,6 +483,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { }
 static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
 static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline void blk_cgroup_bio_start(struct bio *bio) { }
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
 
 #define blk_queue_for_each_rl(rl, q)	\
 	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8d8177f71ebdb..ea6968313b4a8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,6 +9,7 @@
 #include <linux/blk-integrity.h>
 #include <linux/scatterlist.h>
 #include <linux/part_stat.h>
+#include <linux/blk-cgroup.h>
 
 #include <trace/events/block.h>
 
@@ -598,6 +599,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
 		unsigned int nr_phys_segs)
 {
+	if (!blk_cgroup_mergeable(req, bio))
+		goto no_merge;
+
 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
 		goto no_merge;
 
@@ -694,6 +698,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (total_phys_segments > blk_rq_get_max_segments(req))
 		return 0;
 
+	if (!blk_cgroup_mergeable(req, next->bio))
+		return 0;
+
 	if (blk_integrity_merge_rq(q, req, next) == false)
 		return 0;
 
@@ -902,6 +909,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	if (bio_data_dir(bio) != rq_data_dir(rq))
 		return false;
 
+	/* don't merge across cgroup boundaries */
+	if (!blk_cgroup_mergeable(rq, bio))
+		return false;
+
 	/* only merge integrity protected bio into ditto rq */
 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
 		return false;
-- 
GitLab


From 5600f6986628dde8881734090588474f54a540a8 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Sun, 13 Mar 2022 22:56:32 -0700
Subject: [PATCH 1433/1586] Input: aiptek - properly check endpoint type

Syzbot reported warning in usb_submit_urb() which is caused by wrong
endpoint type. There was a check for the number of endpoints, but not
for the type of endpoint.

Fix it by replacing old desc.bNumEndpoints check with
usb_find_common_endpoints() helper for finding endpoints

Fail log:

usb 5-1: BOGUS urb xfer, pipe 1 != type 3
WARNING: CPU: 2 PID: 48 at drivers/usb/core/urb.c:502 usb_submit_urb+0xed2/0x18a0 drivers/usb/core/urb.c:502
Modules linked in:
CPU: 2 PID: 48 Comm: kworker/2:2 Not tainted 5.17.0-rc6-syzkaller-00226-g07ebd38a0da2 #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014
Workqueue: usb_hub_wq hub_event
...
Call Trace:
 <TASK>
 aiptek_open+0xd5/0x130 drivers/input/tablet/aiptek.c:830
 input_open_device+0x1bb/0x320 drivers/input/input.c:629
 kbd_connect+0xfe/0x160 drivers/tty/vt/keyboard.c:1593

Fixes: 8e20cf2bce12 ("Input: aiptek - fix crash on detecting device without endpoints")
Reported-and-tested-by: syzbot+75cccf2b7da87fb6f84b@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20220308194328.26220-1-paskripkin@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/tablet/aiptek.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index fcb1b646436a5..1581f6ef09279 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -1787,15 +1787,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0);
 	input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
 
-	/* Verify that a device really has an endpoint */
-	if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
+	err = usb_find_common_endpoints(intf->cur_altsetting,
+					NULL, NULL, &endpoint, NULL);
+	if (err) {
 		dev_err(&intf->dev,
-			"interface has %d endpoints, but must have minimum 1\n",
-			intf->cur_altsetting->desc.bNumEndpoints);
-		err = -EINVAL;
+			"interface has no int in endpoints, but must have minimum 1\n");
 		goto fail3;
 	}
-	endpoint = &intf->cur_altsetting->endpoint[0].desc;
 
 	/* Go set up our URB, which is called when the tablet receives
 	 * input.
-- 
GitLab


From 69ad4ef868c1fc7609daa235dfa46d28ba7a3ba3 Mon Sep 17 00:00:00 2001
From: Matt Lupfer <mlupfer@ddn.com>
Date: Tue, 8 Mar 2022 15:27:02 +0000
Subject: [PATCH 1434/1586] scsi: mpt3sas: Page fault in reply q processing

A page fault was encountered in mpt3sas on a LUN reset error path:

[  145.763216] mpt3sas_cm1: Task abort tm failed: handle(0x0002),timeout(30) tr_method(0x0) smid(3) msix_index(0)
[  145.778932] scsi 1:0:0:0: task abort: FAILED scmd(0x0000000024ba29a2)
[  145.817307] scsi 1:0:0:0: attempting device reset! scmd(0x0000000024ba29a2)
[  145.827253] scsi 1:0:0:0: [sg1] tag#2 CDB: Receive Diagnostic 1c 01 01 ff fc 00
[  145.837617] scsi target1:0:0: handle(0x0002), sas_address(0x500605b0000272b9), phy(0)
[  145.848598] scsi target1:0:0: enclosure logical id(0x500605b0000272b8), slot(0)
[  149.858378] mpt3sas_cm1: Poll ReplyDescriptor queues for completion of smid(0), task_type(0x05), handle(0x0002)
[  149.875202] BUG: unable to handle page fault for address: 00000007fffc445d
[  149.885617] #PF: supervisor read access in kernel mode
[  149.894346] #PF: error_code(0x0000) - not-present page
[  149.903123] PGD 0 P4D 0
[  149.909387] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  149.917417] CPU: 24 PID: 3512 Comm: scsi_eh_1 Kdump: loaded Tainted: G S         O      5.10.89-altav-1 #1
[  149.934327] Hardware name: DDN           200NVX2             /200NVX2-MB          , BIOS ATHG2.2.02.01 09/10/2021
[  149.951871] RIP: 0010:_base_process_reply_queue+0x4b/0x900 [mpt3sas]
[  149.961889] Code: 0f 84 22 02 00 00 8d 48 01 49 89 fd 48 8d 57 38 f0 0f b1 4f 38 0f 85 d8 01 00 00 49 8b 45 10 45 31 e4 41 8b 55 0c 48 8d 1c d0 <0f> b6 03 83 e0 0f 3c 0f 0f 85 a2 00 00 00 e9 e6 01 00 00 0f b7 ee
[  149.991952] RSP: 0018:ffffc9000f1ebcb8 EFLAGS: 00010246
[  150.000937] RAX: 0000000000000055 RBX: 00000007fffc445d RCX: 000000002548f071
[  150.011841] RDX: 00000000ffff8881 RSI: 0000000000000001 RDI: ffff888125ed50d8
[  150.022670] RBP: 0000000000000000 R08: 0000000000000000 R09: c0000000ffff7fff
[  150.033445] R10: ffffc9000f1ebb68 R11: ffffc9000f1ebb60 R12: 0000000000000000
[  150.044204] R13: ffff888125ed50d8 R14: 0000000000000080 R15: 34cdc00034cdea80
[  150.054963] FS:  0000000000000000(0000) GS:ffff88dfaf200000(0000) knlGS:0000000000000000
[  150.066715] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  150.076078] CR2: 00000007fffc445d CR3: 000000012448a006 CR4: 0000000000770ee0
[  150.086887] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  150.097670] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  150.108323] PKRU: 55555554
[  150.114690] Call Trace:
[  150.120497]  ? printk+0x48/0x4a
[  150.127049]  mpt3sas_scsih_issue_tm.cold.114+0x2e/0x2b3 [mpt3sas]
[  150.136453]  mpt3sas_scsih_issue_locked_tm+0x86/0xb0 [mpt3sas]
[  150.145759]  scsih_dev_reset+0xea/0x300 [mpt3sas]
[  150.153891]  scsi_eh_ready_devs+0x541/0x9e0 [scsi_mod]
[  150.162206]  ? __scsi_host_match+0x20/0x20 [scsi_mod]
[  150.170406]  ? scsi_try_target_reset+0x90/0x90 [scsi_mod]
[  150.178925]  ? blk_mq_tagset_busy_iter+0x45/0x60
[  150.186638]  ? scsi_try_target_reset+0x90/0x90 [scsi_mod]
[  150.195087]  scsi_error_handler+0x3a5/0x4a0 [scsi_mod]
[  150.203206]  ? __schedule+0x1e9/0x610
[  150.209783]  ? scsi_eh_get_sense+0x210/0x210 [scsi_mod]
[  150.217924]  kthread+0x12e/0x150
[  150.224041]  ? kthread_worker_fn+0x130/0x130
[  150.231206]  ret_from_fork+0x1f/0x30

This is caused by mpt3sas_base_sync_reply_irqs() using an invalid reply_q
pointer outside of the list_for_each_entry() loop. At the end of the full
list traversal the pointer is invalid.

Move the _base_process_reply_queue() call inside of the loop.

Link: https://lore.kernel.org/r/d625deae-a958-0ace-2ba3-0888dd0a415b@ddn.com
Fixes: 711a923c14d9 ("scsi: mpt3sas: Postprocessing of target and LUN reset")
Cc: stable@vger.kernel.org
Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Matt Lupfer <mlupfer@ddn.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 511726f92d9a5..76229b839560a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2011,9 +2011,10 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
 				enable_irq(reply_q->os_irq);
 			}
 		}
+
+		if (poll)
+			_base_process_reply_queue(reply_q);
 	}
-	if (poll)
-		_base_process_reply_queue(reply_q);
 }
 
 /**
-- 
GitLab


From 0f8946ae704ac6880c590beb91bc3a732595a28a Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Sat, 12 Mar 2022 23:41:40 +0100
Subject: [PATCH 1435/1586] net: mdio: mscc-miim: fix duplicate debugfs entry

This driver can have up to two regmaps. If the second one is registered
its debugfs entry will have the same name as the first one and the
following error will be printed:

[    3.833521] debugfs: Directory 'e200413c.mdio' with parent 'regmap' already present!

Give the second regmap a name to avoid this.

Fixes: a27a76282837 ("net: mdio: mscc-miim: convert to a regmap implementation")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20220312224140.4173930-1-michael@walle.cc
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/mdio/mdio-mscc-miim.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 7d2abaf2b2c97..64fb76c1e3959 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = {
 	.reg_stride	= 4,
 };
 
+static const struct regmap_config mscc_miim_phy_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.name		= "phy",
+};
+
 int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name,
 		    struct regmap *mii_regmap, int status_offset)
 {
@@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev)
 		}
 
 		phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs,
-						   &mscc_miim_regmap_config);
+						   &mscc_miim_phy_regmap_config);
 		if (IS_ERR(phy_regmap)) {
 			dev_err(&pdev->dev, "Unable to create phy register regmap\n");
 			return PTR_ERR(phy_regmap);
-- 
GitLab


From c700525fcc06b05adfea78039de02628af79e07a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 12 Mar 2022 15:29:58 -0800
Subject: [PATCH 1436/1586] net/packet: fix slab-out-of-bounds access in
 packet_recvmsg()

syzbot found that when an AF_PACKET socket is using PACKET_COPY_THRESH
and mmap operations, tpacket_rcv() is queueing skbs with
garbage in skb->cb[], triggering a too big copy [1]

Presumably, users of af_packet using mmap() already gets correct
metadata from the mapped buffer, we can simply make sure
to clear 12 bytes that might be copied to user space later.

BUG: KASAN: stack-out-of-bounds in memcpy include/linux/fortify-string.h:225 [inline]
BUG: KASAN: stack-out-of-bounds in packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489
Write of size 165 at addr ffffc9000385fb78 by task syz-executor233/3631

CPU: 0 PID: 3631 Comm: syz-executor233 Not tainted 5.17.0-rc7-syzkaller-02396-g0b3660695e80 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xf/0x336 mm/kasan/report.c:255
 __kasan_report mm/kasan/report.c:442 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
 memcpy+0x39/0x60 mm/kasan/shadow.c:66
 memcpy include/linux/fortify-string.h:225 [inline]
 packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489
 sock_recvmsg_nosec net/socket.c:948 [inline]
 sock_recvmsg net/socket.c:966 [inline]
 sock_recvmsg net/socket.c:962 [inline]
 ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
 ___sys_recvmsg+0x127/0x200 net/socket.c:2674
 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7fdfd5954c29
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffcf8e71e48 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdfd5954c29
RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000005
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffcf8e71e60
R13: 00000000000f4240 R14: 000000000000c1ff R15: 00007ffcf8e71e54
 </TASK>

addr ffffc9000385fb78 is located in stack of task syz-executor233/3631 at offset 32 in frame:
 ____sys_recvmsg+0x0/0x600 include/linux/uio.h:246

this frame has 1 object:
 [32, 160) 'addr'

Memory state around the buggy address:
 ffffc9000385fa80: 00 04 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00
 ffffc9000385fb00: 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00
>ffffc9000385fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3
                                                                ^
 ffffc9000385fc00: f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 f1
 ffffc9000385fc80: f1 f1 f1 00 f2 f2 f2 00 f2 f2 f2 00 00 00 00 00
==================================================================

Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220312232958.3535620-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/packet/af_packet.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ab87f22cc7ecd..a7273af2d9009 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2317,8 +2317,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 					copy_skb = skb_get(skb);
 					skb_head = skb->data;
 				}
-				if (copy_skb)
+				if (copy_skb) {
+					memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+					       sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
 					skb_set_owner_r(copy_skb, sk);
+				}
 			}
 			snaplen = po->rx_ring.frame_size - macoff;
 			if ((int)snaplen < 0) {
@@ -3462,6 +3465,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
+		const size_t max_len = min(sizeof(skb->cb),
+					   sizeof(struct sockaddr_storage));
 		int copy_len;
 
 		/* If the address length field is there to be filled
@@ -3484,6 +3489,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 				msg->msg_namelen = sizeof(struct sockaddr_ll);
 			}
 		}
+		if (WARN_ON_ONCE(copy_len > max_len)) {
+			copy_len = max_len;
+			msg->msg_namelen = copy_len;
+		}
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
 	}
 
-- 
GitLab


From da7837339641601f202f27515771dc0646083938 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Mar 2022 08:13:04 +0100
Subject: [PATCH 1437/1586] nvmet: move the call to nvmet_ns_changed out of
 nvmet_ns_revalidate

nvmet_ns_changed states via lockdep that the ns->subsys->lock must be
held. The only caller of nvmet_ns_changed which does not acquire that
lock is nvmet_ns_revalidate. nvmet_ns_revalidate has 3 callers,
of which 2 do not acquire that lock: nvmet_execute_identify_cns_cs_ns
and nvmet_execute_identify_ns. The other caller
nvmet_ns_revalidate_size_store does acquire the lock.

Move the call to nvmet_ns_changed from nvmet_ns_revalidate to the callers
so that they can perform the correct locking as needed.

This issue was found using a static type-based analyser and manually
verified.

Reported-by: Niels Dossche <dossche.niels@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/target/admin-cmd.c | 6 +++++-
 drivers/nvme/target/configfs.c  | 3 ++-
 drivers/nvme/target/core.c      | 5 ++---
 drivers/nvme/target/nvmet.h     | 2 +-
 drivers/nvme/target/zns.c       | 6 +++++-
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 6fb24746de069..46d0dab686dd6 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -511,7 +511,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
 		goto done;
 	}
 
-	nvmet_ns_revalidate(req->ns);
+	if (nvmet_ns_revalidate(req->ns)) {
+		mutex_lock(&req->ns->subsys->lock);
+		nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
+		mutex_unlock(&req->ns->subsys->lock);
+	}
 
 	/*
 	 * nuse = ncap = nsze isn't always true, but we have no way to find
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ac801fa1641bc..b650312dd4aed 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -586,7 +586,8 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
 		mutex_unlock(&ns->subsys->lock);
 		return -EINVAL;
 	}
-	nvmet_ns_revalidate(ns);
+	if (nvmet_ns_revalidate(ns))
+		nvmet_ns_changed(ns->subsys, ns->nsid);
 	mutex_unlock(&ns->subsys->lock);
 	return count;
 }
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 724a6d373340a..92af37d33ec34 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -531,7 +531,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
 		ns->nsid);
 }
 
-void nvmet_ns_revalidate(struct nvmet_ns *ns)
+bool nvmet_ns_revalidate(struct nvmet_ns *ns)
 {
 	loff_t oldsize = ns->size;
 
@@ -540,8 +540,7 @@ void nvmet_ns_revalidate(struct nvmet_ns *ns)
 	else
 		nvmet_file_ns_revalidate(ns);
 
-	if (oldsize != ns->size)
-		nvmet_ns_changed(ns->subsys, ns->nsid);
+	return oldsize != ns->size;
 }
 
 int nvmet_ns_enable(struct nvmet_ns *ns)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 69637bf8f8e19..d910c6aad4b66 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -542,7 +542,7 @@ u16 nvmet_file_flush(struct nvmet_req *req);
 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
 void nvmet_file_ns_revalidate(struct nvmet_ns *ns);
-void nvmet_ns_revalidate(struct nvmet_ns *ns);
+bool nvmet_ns_revalidate(struct nvmet_ns *ns);
 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
 
 bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 3e421217a7ade..e34718b095504 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -123,7 +123,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
 		goto done;
 	}
 
-	nvmet_ns_revalidate(req->ns);
+	if (nvmet_ns_revalidate(req->ns)) {
+		mutex_lock(&req->ns->subsys->lock);
+		nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
+		mutex_unlock(&req->ns->subsys->lock);
+	}
 	zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
 					req->ns->blksize_shift;
 	id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
-- 
GitLab


From 0c48645a7f3988a624767d025fa3275ae24b6ca1 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 15 Mar 2022 10:14:36 +0100
Subject: [PATCH 1438/1586] nvmet: revert "nvmet: make discovery NQN
 configurable"

Revert commit 626851e9225d ("nvmet: make discovery NQN configurable");
the interface was deemed incorrect and will be replaced with a different
one.

Fixes: 626851e9225d ("nvmet: make discovery NQN configurable")
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c | 39 ----------------------------------
 drivers/nvme/target/core.c     |  3 +--
 2 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 091a0ca16361c..496d775c67707 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1233,44 +1233,6 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 
-static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item,
-			char *page)
-{
-	return snprintf(page, PAGE_SIZE, "%s\n",
-			nvmet_disc_subsys->subsysnqn);
-}
-
-static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item,
-			const char *page, size_t count)
-{
-	struct nvmet_subsys *subsys = to_subsys(item);
-	char *subsysnqn;
-	int len;
-
-	len = strcspn(page, "\n");
-	if (!len)
-		return -EINVAL;
-
-	subsysnqn = kmemdup_nul(page, len, GFP_KERNEL);
-	if (!subsysnqn)
-		return -ENOMEM;
-
-	/*
-	 * The discovery NQN must be different from subsystem NQN.
-	 */
-	if (!strcmp(subsysnqn, subsys->subsysnqn)) {
-		kfree(subsysnqn);
-		return -EBUSY;
-	}
-	down_write(&nvmet_config_sem);
-	kfree(nvmet_disc_subsys->subsysnqn);
-	nvmet_disc_subsys->subsysnqn = subsysnqn;
-	up_write(&nvmet_config_sem);
-
-	return count;
-}
-CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn);
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
 						char *page)
@@ -1300,7 +1262,6 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_cntlid_min,
 	&nvmet_subsys_attr_attr_cntlid_max,
 	&nvmet_subsys_attr_attr_model,
-	&nvmet_subsys_attr_attr_discovery_nqn,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	&nvmet_subsys_attr_attr_pi_enable,
 #endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5119c687de683..626caf6f1e4b4 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1493,8 +1493,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 	if (!port)
 		return NULL;
 
-	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) ||
-	    !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) {
+	if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
 		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 			return NULL;
 		return nvmet_disc_subsys;
-- 
GitLab


From 0f74b29a4f53627376cf5a5fb7b0b3fa748a0b2b Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Mon, 14 Mar 2022 09:34:48 +0800
Subject: [PATCH 1439/1586] atm: eni: Add check for dma_map_single

As the potential failure of the dma_map_single(),
it should be better to check it and return error
if fails.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/eni.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 422753d52244b..a31ffe16e626f 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags);
 	skb_data3 = skb->data[3];
 	paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len,
 			       DMA_TO_DEVICE);
+	if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr))
+		return enq_next;
 	ENI_PRV_PADDR(skb) = paddr;
 	/* prepare DMA queue entries */
 	j = 0;
-- 
GitLab


From 8a317e00798ac0893042e63807429ffddce52a34 Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Tue, 15 Mar 2022 11:33:10 +0800
Subject: [PATCH 1440/1586] regulator: vctrl: Use min() instead of doing it
 manually

Fix following coccicheck warning:
drivers/regulator/vctrl-regulator.c:188:15-17: WARNING opportunity for max()

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Link: https://lore.kernel.org/r/1647315190-16139-1-git-send-email-baihaowen@meizu.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/vctrl-regulator.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c
index d2a37978fc3a8..aac7be3b33f7c 100644
--- a/drivers/regulator/vctrl-regulator.c
+++ b/drivers/regulator/vctrl-regulator.c
@@ -185,10 +185,7 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev,
 		unsigned int next_sel;
 		int delay;
 
-		if (selector >= vctrl->vtable[vctrl->sel].ovp_min_sel)
-			next_sel = selector;
-		else
-			next_sel = vctrl->vtable[vctrl->sel].ovp_min_sel;
+		next_sel = max_t(unsigned int, selector, vctrl->vtable[vctrl->sel].ovp_min_sel);
 
 		ret = regulator_set_voltage_rdev(rdev->supply->rdev,
 					    vctrl->vtable[next_sel].ctrl,
-- 
GitLab


From 03b1be379dcee2e9c866c2a455a1a4a9581b3efd Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 15 Mar 2022 11:24:06 +0800
Subject: [PATCH 1441/1586] spi: mediatek: support tick_delay without
 enhance_timing

this patch support tick_delay bit[31:30] without enhance_timing feature.

Fixes: f84d866ab43f("spi: mediatek: add tick_delay support")
Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220315032411.2826-2-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index bbfeb8046c17c..3fd89548ec3cb 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -43,8 +43,11 @@
 #define SPI_CFG1_PACKET_LOOP_OFFSET       8
 #define SPI_CFG1_PACKET_LENGTH_OFFSET     16
 #define SPI_CFG1_GET_TICK_DLY_OFFSET      29
+#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1   30
 
 #define SPI_CFG1_GET_TICK_DLY_MASK        0xe0000000
+#define SPI_CFG1_GET_TICK_DLY_MASK_V1     0xc0000000
+
 #define SPI_CFG1_CS_IDLE_MASK             0xff
 #define SPI_CFG1_PACKET_LOOP_MASK         0xff00
 #define SPI_CFG1_PACKET_LENGTH_MASK       0x3ff0000
@@ -346,9 +349,15 @@ static int mtk_spi_prepare_message(struct spi_master *master,
 
 	/* tick delay */
 	reg_val = readl(mdata->base + SPI_CFG1_REG);
-	reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
-	reg_val |= ((chip_config->tick_delay & 0x7)
-		<< SPI_CFG1_GET_TICK_DLY_OFFSET);
+	if (mdata->dev_comp->enhance_timing) {
+		reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
+		reg_val |= ((chip_config->tick_delay & 0x7)
+			    << SPI_CFG1_GET_TICK_DLY_OFFSET);
+	} else {
+		reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1;
+		reg_val |= ((chip_config->tick_delay & 0x3)
+			    << SPI_CFG1_GET_TICK_DLY_OFFSET_V1);
+	}
 	writel(reg_val, mdata->base + SPI_CFG1_REG);
 
 	/* set hw cs timing */
-- 
GitLab


From 2002c13243d595e211c0dad6b8e2e87f906f474b Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Mon, 14 Mar 2022 12:53:45 +0100
Subject: [PATCH 1442/1586] spi: sun4i: fix typos in comments

Various spelling mistakes in comments.
Detected with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20220314115354.144023-22-Julia.Lawall@inria.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sun4i.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1fdfc6e6691d2..6000d0761206c 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -280,7 +280,7 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	 * SPI_CLK = MOD_CLK / (2 ^ (cdr + 1))
 	 * Or we can use CDR2, which is calculated with the formula:
 	 * SPI_CLK = MOD_CLK / (2 * (cdr + 1))
-	 * Wether we use the former or the latter is set through the
+	 * Whether we use the former or the latter is set through the
 	 * DRS bit.
 	 *
 	 * First try CDR2, and if we can't reach the expected
-- 
GitLab


From da40a352db2bae470a3eea038cc53454c24e67f3 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 15 Mar 2022 11:24:07 +0800
Subject: [PATCH 1443/1586] spi: Add compatible for MT7986

This patch adds dt-binding documentation for MT7986.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220315032411.2826-3-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/mediatek,spi-mt65xx.yaml          | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
index ac9ff12ae27a2..818130b11bb91 100644
--- a/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
+++ b/Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml
@@ -30,6 +30,10 @@ properties:
               - mediatek,mt8192-spi
               - mediatek,mt8195-spi
           - const: mediatek,mt6765-spi
+      - items:
+          - enum:
+              - mediatek,mt7986-spi-ipm
+          - const: mediatek,spi-ipm
       - items:
           - enum:
               - mediatek,mt2701-spi
-- 
GitLab


From 7e963fb2a33ce488e65258ab5be38a4855923033 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Tue, 15 Mar 2022 11:24:08 +0800
Subject: [PATCH 1444/1586] spi: mediatek: add ipm design support for MT7986

this patch add the support of ipm design.

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20220315032411.2826-4-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 102 +++++++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 3fd89548ec3cb..1a0b3208dfcaf 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -31,6 +31,7 @@
 #define SPI_CFG2_REG                      0x0028
 #define SPI_TX_SRC_REG_64                 0x002c
 #define SPI_RX_DST_REG_64                 0x0030
+#define SPI_CFG3_IPM_REG                  0x0040
 
 #define SPI_CFG0_SCK_HIGH_OFFSET          0
 #define SPI_CFG0_SCK_LOW_OFFSET           8
@@ -51,6 +52,7 @@
 #define SPI_CFG1_CS_IDLE_MASK             0xff
 #define SPI_CFG1_PACKET_LOOP_MASK         0xff00
 #define SPI_CFG1_PACKET_LENGTH_MASK       0x3ff0000
+#define SPI_CFG1_IPM_PACKET_LENGTH_MASK   GENMASK(31, 16)
 #define SPI_CFG2_SCK_HIGH_OFFSET          0
 #define SPI_CFG2_SCK_LOW_OFFSET           16
 
@@ -71,7 +73,13 @@
 #define SPI_CMD_TX_ENDIAN            BIT(15)
 #define SPI_CMD_FINISH_IE            BIT(16)
 #define SPI_CMD_PAUSE_IE             BIT(17)
+#define SPI_CMD_IPM_NONIDLE_MODE     BIT(19)
+#define SPI_CMD_IPM_SPIM_LOOP        BIT(21)
+#define SPI_CMD_IPM_GET_TICKDLY_OFFSET    22
 
+#define SPI_CMD_IPM_GET_TICKDLY_MASK	GENMASK(24, 22)
+#define SPI_CFG3_IPM_HALF_DUPLEX_DIR		BIT(2)
+#define SPI_CFG3_IPM_HALF_DUPLEX_EN		BIT(3)
 #define MT8173_SPI_MAX_PAD_SEL 3
 
 #define MTK_SPI_PAUSE_INT_STATUS 0x2
@@ -81,6 +89,7 @@
 
 #define MTK_SPI_MAX_FIFO_SIZE 32U
 #define MTK_SPI_PACKET_SIZE 1024
+#define MTK_SPI_IPM_PACKET_SIZE SZ_64K
 #define MTK_SPI_32BITS_MASK  (0xffffffff)
 
 #define DMA_ADDR_EXT_BITS (36)
@@ -96,6 +105,9 @@ struct mtk_spi_compatible {
 	bool dma_ext;
 	/* some IC no need unprepare SPI clk */
 	bool no_need_unprepare;
+	/* IPM design adjust and extend register to support more features */
+	bool ipm_design;
+
 };
 
 struct mtk_spi {
@@ -119,6 +131,12 @@ static const struct mtk_spi_compatible mt2712_compat = {
 	.must_tx = true,
 };
 
+static const struct mtk_spi_compatible mtk_ipm_compat = {
+	.enhance_timing = true,
+	.dma_ext = true,
+	.ipm_design = true,
+};
+
 static const struct mtk_spi_compatible mt6765_compat = {
 	.need_pad_sel = true,
 	.must_tx = true,
@@ -160,6 +178,9 @@ static const struct mtk_chip_config mtk_default_chip_info = {
 };
 
 static const struct of_device_id mtk_spi_of_match[] = {
+	{ .compatible = "mediatek,spi-ipm",
+		.data = (void *)&mtk_ipm_compat,
+	},
 	{ .compatible = "mediatek,mt2701-spi",
 		.data = (void *)&mtk_common_compat,
 	},
@@ -278,12 +299,11 @@ static int mtk_spi_set_hw_cs_timing(struct spi_device *spi)
 	return 0;
 }
 
-static int mtk_spi_prepare_message(struct spi_master *master,
-				   struct spi_message *msg)
+static int mtk_spi_hw_init(struct spi_master *master,
+			   struct spi_device *spi)
 {
 	u16 cpha, cpol;
 	u32 reg_val;
-	struct spi_device *spi = msg->spi;
 	struct mtk_chip_config *chip_config = spi->controller_data;
 	struct mtk_spi *mdata = spi_master_get_devdata(master);
 
@@ -291,6 +311,15 @@ static int mtk_spi_prepare_message(struct spi_master *master,
 	cpol = spi->mode & SPI_CPOL ? 1 : 0;
 
 	reg_val = readl(mdata->base + SPI_CMD_REG);
+	if (mdata->dev_comp->ipm_design) {
+		/* SPI transfer without idle time until packet length done */
+		reg_val |= SPI_CMD_IPM_NONIDLE_MODE;
+		if (spi->mode & SPI_LOOP)
+			reg_val |= SPI_CMD_IPM_SPIM_LOOP;
+		else
+			reg_val &= ~SPI_CMD_IPM_SPIM_LOOP;
+	}
+
 	if (cpha)
 		reg_val |= SPI_CMD_CPHA;
 	else
@@ -348,23 +377,39 @@ static int mtk_spi_prepare_message(struct spi_master *master,
 		       mdata->base + SPI_PAD_SEL_REG);
 
 	/* tick delay */
-	reg_val = readl(mdata->base + SPI_CFG1_REG);
 	if (mdata->dev_comp->enhance_timing) {
-		reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
-		reg_val |= ((chip_config->tick_delay & 0x7)
-			    << SPI_CFG1_GET_TICK_DLY_OFFSET);
+		if (mdata->dev_comp->ipm_design) {
+			reg_val = readl(mdata->base + SPI_CMD_REG);
+			reg_val &= ~SPI_CMD_IPM_GET_TICKDLY_MASK;
+			reg_val |= ((chip_config->tick_delay & 0x7)
+				    << SPI_CMD_IPM_GET_TICKDLY_OFFSET);
+			writel(reg_val, mdata->base + SPI_CMD_REG);
+		} else {
+			reg_val = readl(mdata->base + SPI_CFG1_REG);
+			reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK;
+			reg_val |= ((chip_config->tick_delay & 0x7)
+				    << SPI_CFG1_GET_TICK_DLY_OFFSET);
+			writel(reg_val, mdata->base + SPI_CFG1_REG);
+		}
 	} else {
+		reg_val = readl(mdata->base + SPI_CFG1_REG);
 		reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1;
 		reg_val |= ((chip_config->tick_delay & 0x3)
 			    << SPI_CFG1_GET_TICK_DLY_OFFSET_V1);
+		writel(reg_val, mdata->base + SPI_CFG1_REG);
 	}
-	writel(reg_val, mdata->base + SPI_CFG1_REG);
 
 	/* set hw cs timing */
 	mtk_spi_set_hw_cs_timing(spi);
 	return 0;
 }
 
+static int mtk_spi_prepare_message(struct spi_master *master,
+				   struct spi_message *msg)
+{
+	return mtk_spi_hw_init(master, msg->spi);
+}
+
 static void mtk_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	u32 reg_val;
@@ -386,13 +431,13 @@ static void mtk_spi_set_cs(struct spi_device *spi, bool enable)
 }
 
 static void mtk_spi_prepare_transfer(struct spi_master *master,
-				     struct spi_transfer *xfer)
+				     u32 speed_hz)
 {
 	u32 div, sck_time, reg_val;
 	struct mtk_spi *mdata = spi_master_get_devdata(master);
 
-	if (xfer->speed_hz < mdata->spi_clk_hz / 2)
-		div = DIV_ROUND_UP(mdata->spi_clk_hz, xfer->speed_hz);
+	if (speed_hz < mdata->spi_clk_hz / 2)
+		div = DIV_ROUND_UP(mdata->spi_clk_hz, speed_hz);
 	else
 		div = 1;
 
@@ -423,12 +468,24 @@ static void mtk_spi_setup_packet(struct spi_master *master)
 	u32 packet_size, packet_loop, reg_val;
 	struct mtk_spi *mdata = spi_master_get_devdata(master);
 
-	packet_size = min_t(u32, mdata->xfer_len, MTK_SPI_PACKET_SIZE);
+	if (mdata->dev_comp->ipm_design)
+		packet_size = min_t(u32,
+				    mdata->xfer_len,
+				    MTK_SPI_IPM_PACKET_SIZE);
+	else
+		packet_size = min_t(u32,
+				    mdata->xfer_len,
+				    MTK_SPI_PACKET_SIZE);
+
 	packet_loop = mdata->xfer_len / packet_size;
 
 	reg_val = readl(mdata->base + SPI_CFG1_REG);
-	reg_val &= ~(SPI_CFG1_PACKET_LENGTH_MASK | SPI_CFG1_PACKET_LOOP_MASK);
+	if (mdata->dev_comp->ipm_design)
+		reg_val &= ~SPI_CFG1_IPM_PACKET_LENGTH_MASK;
+	else
+		reg_val &= ~SPI_CFG1_PACKET_LENGTH_MASK;
 	reg_val |= (packet_size - 1) << SPI_CFG1_PACKET_LENGTH_OFFSET;
+	reg_val &= ~SPI_CFG1_PACKET_LOOP_MASK;
 	reg_val |= (packet_loop - 1) << SPI_CFG1_PACKET_LOOP_OFFSET;
 	writel(reg_val, mdata->base + SPI_CFG1_REG);
 }
@@ -523,7 +580,7 @@ static int mtk_spi_fifo_transfer(struct spi_master *master,
 	mdata->cur_transfer = xfer;
 	mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, xfer->len);
 	mdata->num_xfered = 0;
-	mtk_spi_prepare_transfer(master, xfer);
+	mtk_spi_prepare_transfer(master, xfer->speed_hz);
 	mtk_spi_setup_packet(master);
 
 	if (xfer->tx_buf) {
@@ -556,7 +613,7 @@ static int mtk_spi_dma_transfer(struct spi_master *master,
 	mdata->cur_transfer = xfer;
 	mdata->num_xfered = 0;
 
-	mtk_spi_prepare_transfer(master, xfer);
+	mtk_spi_prepare_transfer(master, xfer->speed_hz);
 
 	cmd = readl(mdata->base + SPI_CMD_REG);
 	if (xfer->tx_buf)
@@ -591,6 +648,19 @@ static int mtk_spi_transfer_one(struct spi_master *master,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
+	struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
+	u32 reg_val = 0;
+
+	/* prepare xfer direction and duplex mode */
+	if (mdata->dev_comp->ipm_design) {
+		if (!xfer->tx_buf || !xfer->rx_buf) {
+			reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_EN;
+			if (xfer->rx_buf)
+				reg_val |= SPI_CFG3_IPM_HALF_DUPLEX_DIR;
+		}
+		writel(reg_val, mdata->base + SPI_CFG3_IPM_REG);
+	}
+
 	if (master->can_dma(master, spi, xfer))
 		return mtk_spi_dma_transfer(master, spi, xfer);
 	else
@@ -757,6 +827,8 @@ static int mtk_spi_probe(struct platform_device *pdev)
 
 	if (mdata->dev_comp->must_tx)
 		master->flags = SPI_MASTER_MUST_TX;
+	if (mdata->dev_comp->ipm_design)
+		master->mode_bits |= SPI_LOOP;
 
 	if (mdata->dev_comp->need_pad_sel) {
 		mdata->pad_num = of_property_count_u32_elems(
-- 
GitLab


From 184416d4b98509fb4c3d8fc3d6dc1437896cc159 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 15 Mar 2022 13:30:09 +0300
Subject: [PATCH 1445/1586] NFSD: prevent underflow in
 nfssvc_decode_writeargs()

Smatch complains:

	fs/nfsd/nfsxdr.c:341 nfssvc_decode_writeargs()
	warn: no lower bound on 'args->len'

Change the type to unsigned to prevent this issue.

Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsproc.c | 2 +-
 fs/nfsd/xdr.h     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 18b8eb43a19bc..fcdab8a8a41f4 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 	unsigned long cnt = argp->len;
 	unsigned int nvecs;
 
-	dprintk("nfsd: WRITE    %s %d bytes at %d\n",
+	dprintk("nfsd: WRITE    %s %u bytes at %d\n",
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 528fb299430e6..852f71580bd06 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -32,7 +32,7 @@ struct nfsd_readargs {
 struct nfsd_writeargs {
 	svc_fh			fh;
 	__u32			offset;
-	int			len;
+	__u32			len;
 	struct xdr_buf		payload;
 };
 
-- 
GitLab


From 65f3324f4b6fed78b8761c3b74615ecf0ffa81fa Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 1 Mar 2022 11:04:24 +0300
Subject: [PATCH 1446/1586] usb: gadget: rndis: prevent integer overflow in
 rndis_set_response()

If "BufOffset" is very large the "BufOffset + 8" operation can have an
integer overflow.

Cc: stable@kernel.org
Fixes: 38ea1eac7d88 ("usb: gadget: rndis: check size of RNDIS_MSG_SET command")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20220301080424.GA17208@kili
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/rndis.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 00b3f6b3bb31d..713efd9aefde8 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -640,6 +640,7 @@ static int rndis_set_response(struct rndis_params *params,
 	BufLength = le32_to_cpu(buf->InformationBufferLength);
 	BufOffset = le32_to_cpu(buf->InformationBufferOffset);
 	if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
+	    (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
 	    (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
 		    return -EINVAL;
 
-- 
GitLab


From 239071064732bc4a30308cbba11014aa1aab550a Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Tue, 8 Mar 2022 13:56:06 +0100
Subject: [PATCH 1447/1586] partially Revert "usb: musb: Set the DT node on the
 child device"

This reverts the omap2430 changes of

commit cf081d009c44 ("usb: musb: Set the DT node on the child device")

Since v5.17-rc1, musb is broken on the gta04 and openpandora devices
(omap3530/dm3730). BeagleBone Black (am335x) seems to work.

Symptoms of this bug are

a) main symptom

[   21.336517] using random host ethernet address
[   21.341430] using host ethernet address: 32:70:05:18:ff:78
[   21.341461] using self ethernet address: 46:10:3a:b3:af:d9
[   21.358184] usb0: HOST MAC 32:70:05:18:ff:78
[   21.376678] usb0: MAC 46:10:3a:b3:af:d9
[   21.388305] using random self ethernet address
[   21.393371] using random host ethernet address
[   21.398162] g_ether gadget: Ethernet Gadget, version: Memorial Day 2008
[   21.421081] g_ether gadget: g_ether ready
[   21.492156] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   21.691345] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   21.803192] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   21.819427] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   22.124450] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   22.168518] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   22.179382] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.213592] musb-hdrc musb-hdrc.1.auto: pm runtime get failed in musb_gadget_queue
[   23.221832] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.227905] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.239440] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.401000] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.407073] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.426361] musb-hdrc musb-hdrc.1.auto: Could not enable: -22
[   23.734466] musb-hdrc musb-hdrc.1.auto: pm runtime get failed in musb_gadget_queue
[   23.742462] musb-hdrc musb-hdrc.1.auto: pm runtime get failed in musb_gadget_queue
[   23.750396] musb-hdrc musb-hdrc.1.auto: pm runtime get failed in musb_gadget_queue
... (repeats with high frequency)

This stops if the USB cable is unplugged and restarts if it is plugged in again.

b) also found in the log

[    6.498107] ------------[ cut here ]------------
[    6.502960] WARNING: CPU: 0 PID: 868 at arch/arm/mach-omap2/omap_hwmod.c:1885 _enable+0x50/0x234
[    6.512207] omap_hwmod: usb_otg_hs: enabled state can only be entered from initialized, idle, or disabled state
[    6.522766] Modules linked in: omap2430(+) bmp280_i2c bmp280 itg3200 at24 tsc2007 leds_tca6507 bma180 hmc5843_i2c hmc5843_core industrialio_triggered_buffer lis3lv02d_i2c kfifo_buf lis3lv02d phy_twl4030_usb snd_soc_omap_mcbsp snd_soc_ti_sdma musb_hdrc snd_soc_twl4030 gnss_sirf twl4030_vibra twl4030_madc twl4030_charger twl4030_pwrbutton gnss industrialio ehci_omap omapdrm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm drm_panel_orientation_quirks cec
[    6.566436] CPU: 0 PID: 868 Comm: udevd Not tainted 5.16.0-rc5-letux+ #8251
[    6.573730] Hardware name: Generic OMAP36xx (Flattened Device Tree)
[    6.580322] [<c010ed30>] (unwind_backtrace) from [<c010a1d0>] (show_stack+0x10/0x14)
[    6.588470] [<c010a1d0>] (show_stack) from [<c0897c14>] (dump_stack_lvl+0x40/0x4c)
[    6.596405] [<c0897c14>] (dump_stack_lvl) from [<c0130cc4>] (__warn+0xb4/0xdc)
[    6.604003] [<c0130cc4>] (__warn) from [<c0130d5c>] (warn_slowpath_fmt+0x70/0x9c)
[    6.611846] [<c0130d5c>] (warn_slowpath_fmt) from [<c011f4d4>] (_enable+0x50/0x234)
[    6.619903] [<c011f4d4>] (_enable) from [<c012081c>] (omap_hwmod_enable+0x28/0x40)
[    6.627838] [<c012081c>] (omap_hwmod_enable) from [<c0120ff4>] (omap_device_enable+0x4c/0x78)
[    6.636779] [<c0120ff4>] (omap_device_enable) from [<c0121030>] (_od_runtime_resume+0x10/0x3c)
[    6.645812] [<c0121030>] (_od_runtime_resume) from [<c05c688c>] (__rpm_callback+0x3c/0xf4)
[    6.654510] [<c05c688c>] (__rpm_callback) from [<c05c6994>] (rpm_callback+0x50/0x54)
[    6.662628] [<c05c6994>] (rpm_callback) from [<c05c66b0>] (rpm_resume+0x448/0x4e4)
[    6.670593] [<c05c66b0>] (rpm_resume) from [<c05c6784>] (__pm_runtime_resume+0x38/0x50)
[    6.678985] [<c05c6784>] (__pm_runtime_resume) from [<bf14ab20>] (musb_init_controller+0x350/0xa5c [musb_hdrc])
[    6.689727] [<bf14ab20>] (musb_init_controller [musb_hdrc]) from [<c05bccb8>] (platform_probe+0x58/0xa8)
[    6.699737] [<c05bccb8>] (platform_probe) from [<c05badf0>] (really_probe+0x170/0x2fc)
[    6.708068] [<c05badf0>] (really_probe) from [<c05bb040>] (__driver_probe_device+0xc4/0xd8)
[    6.716827] [<c05bb040>] (__driver_probe_device) from [<c05bb084>] (driver_probe_device+0x30/0xac)
[    6.726226] [<c05bb084>] (driver_probe_device) from [<c05bb3d0>] (__device_attach_driver+0x94/0xb4)
[    6.735717] [<c05bb3d0>] (__device_attach_driver) from [<c05b93f8>] (bus_for_each_drv+0xa0/0xb4)
[    6.744934] [<c05b93f8>] (bus_for_each_drv) from [<c05bb248>] (__device_attach+0xc0/0x134)
[    6.753631] [<c05bb248>] (__device_attach) from [<c05b9fcc>] (bus_probe_device+0x28/0x80)
[    6.762207] [<c05b9fcc>] (bus_probe_device) from [<c05b7e40>] (device_add+0x5fc/0x788)
[    6.770507] [<c05b7e40>] (device_add) from [<c05bd240>] (platform_device_add+0x70/0x1bc)
[    6.779022] [<c05bd240>] (platform_device_add) from [<bf177830>] (omap2430_probe+0x260/0x2d4 [omap2430])
[    6.789001] [<bf177830>] (omap2430_probe [omap2430]) from [<c05bccb8>] (platform_probe+0x58/0xa8)
[    6.798309] [<c05bccb8>] (platform_probe) from [<c05badf0>] (really_probe+0x170/0x2fc)
[    6.806610] [<c05badf0>] (really_probe) from [<c05bb040>] (__driver_probe_device+0xc4/0xd8)
[    6.815399] [<c05bb040>] (__driver_probe_device) from [<c05bb084>] (driver_probe_device+0x30/0xac)
[    6.824798] [<c05bb084>] (driver_probe_device) from [<c05bb4b4>] (__driver_attach+0xc4/0xd8)
[    6.833648] [<c05bb4b4>] (__driver_attach) from [<c05b9308>] (bus_for_each_dev+0x64/0xa0)
[    6.842224] [<c05b9308>] (bus_for_each_dev) from [<c05ba248>] (bus_add_driver+0x148/0x1a4)
[    6.850891] [<c05ba248>] (bus_add_driver) from [<c05bbd1c>] (driver_register+0xb4/0xf8)
[    6.859313] [<c05bbd1c>] (driver_register) from [<c0101f54>] (do_one_initcall+0x90/0x1c8)
[    6.867889] [<c0101f54>] (do_one_initcall) from [<c0893968>] (do_init_module+0x4c/0x204)
[    6.876373] [<c0893968>] (do_init_module) from [<c01b4c30>] (load_module+0x13f0/0x1928)
[    6.884796] [<c01b4c30>] (load_module) from [<c01b53a0>] (sys_finit_module+0xa0/0xc0)
[    6.893005] [<c01b53a0>] (sys_finit_module) from [<c0100080>] (ret_fast_syscall+0x0/0x54)
[    6.901580] Exception stack(0xc2807fa8 to 0xc2807ff0)
[    6.906890] 7fa0:                   b6e517d4 00052068 00000006 b6e509f8 00000000 b6e5131c
[    6.915466] 7fc0: b6e517d4 00052068 cd718000 0000017b 00020000 00037f78 00050048 00063368
[    6.924011] 7fe0: bed8fef0 bed8fee0 b6e4ac4b b6f55a42
[    6.929321] ---[ end trace d715ff121b58763c ]---

c) git bisect result on testing for "musb-hdrc" in the console log:

cf081d009c447647c6b36aced535ca427dbebe72 is the first bad commit
commit cf081d009c447647c6b36aced535ca427dbebe72
Author: Rob Herring <robh@kernel.org>
Date:   Wed Dec 15 17:07:57 2021 -0600

  usb: musb: Set the DT node on the child device

  The musb glue drivers just copy the glue resources to the musb child device.
  Instead, set the musb child device's DT node pointer to the parent device's
  node so that platform_get_irq_byname() can find the resources in the DT.
  This removes the need for statically populating the IRQ resources from the
  DT which has been deprecated for some time.

  Signed-off-by: Rob Herring <robh@kernel.org>
  Link: https://lore.kernel.org/r/20211215230756.2009115-3-robh@kernel.org
  Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

drivers/usb/musb/am35x.c    | 2 ++
drivers/usb/musb/da8xx.c    | 2 ++
drivers/usb/musb/jz4740.c   | 1 +
drivers/usb/musb/mediatek.c | 2 ++
drivers/usb/musb/omap2430.c | 1 +
drivers/usb/musb/ux500.c    | 1 +
6 files changed, 9 insertions(+)

Reverting this patch makes musb work again as before.

Fixes: cf081d009c44 ("usb: musb: Set the DT node on the child device")
Cc: Rob Herring <robh@kernel.org>
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Link: https://lore.kernel.org/r/f62f5fc11f9ecae7e57f3fd66939e051bd3b11fc.1646744166.git.hns@goldelico.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/omap2430.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 7d4d0713f4f0d..d2b7e613eb34f 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -327,7 +327,6 @@ static int omap2430_probe(struct platform_device *pdev)
 	musb->dev.parent		= &pdev->dev;
 	musb->dev.dma_mask		= &omap2430_dmamask;
 	musb->dev.coherent_dma_mask	= omap2430_dmamask;
-	device_set_of_node_from_dev(&musb->dev, &pdev->dev);
 
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
-- 
GitLab


From 23a9dbbe0faf124fc4c139615633b9d12a3a89ef Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 15 Mar 2022 18:34:06 +0300
Subject: [PATCH 1448/1586] NFSD: prevent integer overflow on 32 bit systems

On a 32 bit system, the "len * sizeof(*p)" operation can have an
integer overflow.

Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b519609af1d02..4417f667c757e 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
 
 	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
 		return -EBADMSG;
+	if (len > SIZE_MAX / sizeof(*p))
+		return -EBADMSG;
 	p = xdr_inline_decode(xdr, len * sizeof(*p));
 	if (unlikely(!p))
 		return -EBADMSG;
-- 
GitLab


From 01683580c65ffe0ce72d52cfb5225b80b477c598 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat (VMware)" <srivatsa@csail.mit.edu>
Date: Thu, 24 Feb 2022 14:24:22 -0800
Subject: [PATCH 1449/1586] MAINTAINERS: Update maintainers for paravirt ops
 and VMware hypervisor interface

Deep has decided to transfer the joint-maintainership of paravirt ops
to Srivatsa, and the maintainership of the VMware hypervisor interface
to Srivatsa and Alexey. Update the MAINTAINERS file to reflect this
change, and also add Alexey as a reviewer for paravirt ops.

Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Alexey Makhalov <amakhalov@vmware.com>
Acked-by: Deep Shah <sdeep@vmware.com>
Acked-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/164574143710.654750.17342470717937593195.stgit@csail.mit.edu
---
 MAINTAINERS | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ea3e6c9143848..4f81c3dd782f9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14576,7 +14576,8 @@ F:	include/uapi/linux/ppdev.h
 
 PARAVIRT_OPS INTERFACE
 M:	Juergen Gross <jgross@suse.com>
-M:	Deep Shah <sdeep@vmware.com>
+M:	Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+R:	Alexey Makhalov <amakhalov@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 L:	x86@kernel.org
@@ -20585,10 +20586,13 @@ S:	Maintained
 F:	drivers/misc/vmw_balloon.c
 
 VMWARE HYPERVISOR INTERFACE
-M:	Deep Shah <sdeep@vmware.com>
+M:	Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+M:	Alexey Makhalov <amakhalov@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
+L:	x86@kernel.org
 S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware
 F:	arch/x86/include/asm/vmware.h
 F:	arch/x86/kernel/cpu/vmware.c
 
-- 
GitLab


From d69079d04f9319bfd4e90b788f19927142c55901 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat (VMware)" <srivatsa@csail.mit.edu>
Date: Thu, 24 Feb 2022 14:24:36 -0800
Subject: [PATCH 1450/1586] MAINTAINERS: Add Zack as maintainer of vmmouse
 driver

Zack Rusin will be taking over the maintainership of the VMware
vmmouse driver. Update the MAINTAINERS file to reflect this change.

Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Zack Rusin <zackr@vmware.com>
Link: https://lore.kernel.org/r/164574146977.654750.10918397477833459687.stgit@csail.mit.edu
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4f81c3dd782f9..630ac98da03e2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20628,6 +20628,7 @@ S:	Maintained
 F:	drivers/misc/vmw_vmci/
 
 VMWARE VMMOUSE SUBDRIVER
+M:	Zack Rusin <zackr@vmware.com>
 M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	linux-input@vger.kernel.org
-- 
GitLab


From 686016f732420f88dd2e1d67cf4bb5dffdb93c82 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat (VMware)" <srivatsa@csail.mit.edu>
Date: Thu, 24 Feb 2022 14:24:49 -0800
Subject: [PATCH 1451/1586] MAINTAINERS: Mark VMware mailing list entries as
 email aliases

VMware mailing lists in the MAINTAINERS file are private lists meant
for VMware-internal review/notification for patches to the respective
subsystems. Anyone can post to these addresses, but there is no public
read access like open mailing lists, which makes them more like email
aliases instead (to reach out to reviewers).

So update all the VMware mailing list references in the MAINTAINERS
file to mark them as such, using "R: email-alias@vmware.com".

Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Juergen Gross <jgross@suse.com>
Acked-by: Joe Perches <joe@perches.com>
Acked-by: Zack Rusin <zackr@vmware.com>
Link: https://lore.kernel.org/r/164574148378.654750.15832494349474144706.stgit@csail.mit.edu
---
 MAINTAINERS | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 630ac98da03e2..e6e5466ec1889 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6301,8 +6301,8 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/vboxvideo/
 
 DRM DRIVER FOR VMWARE VIRTUAL GPU
-M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
 M:	Zack Rusin <zackr@vmware.com>
+R:	VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -14578,7 +14578,7 @@ PARAVIRT_OPS INTERFACE
 M:	Juergen Gross <jgross@suse.com>
 M:	Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
 R:	Alexey Makhalov <amakhalov@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 L:	x86@kernel.org
 S:	Supported
@@ -20580,7 +20580,7 @@ F:	tools/testing/vsock/
 
 VMWARE BALLOON DRIVER
 M:	Nadav Amit <namit@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/misc/vmw_balloon.c
@@ -20588,7 +20588,7 @@ F:	drivers/misc/vmw_balloon.c
 VMWARE HYPERVISOR INTERFACE
 M:	Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
 M:	Alexey Makhalov <amakhalov@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 L:	x86@kernel.org
 S:	Supported
@@ -20599,14 +20599,14 @@ F:	arch/x86/kernel/cpu/vmware.c
 VMWARE PVRDMA DRIVER
 M:	Bryan Tan <bryantan@vmware.com>
 M:	Vishnu Dasa <vdasa@vmware.com>
-M:	VMware PV-Drivers <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	linux-rdma@vger.kernel.org
 S:	Maintained
 F:	drivers/infiniband/hw/vmw_pvrdma/
 
 VMware PVSCSI driver
 M:	Vishal Bhakta <vbhakta@vmware.com>
-M:	VMware PV-Drivers <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/vmw_pvscsi.c
@@ -20614,7 +20614,7 @@ F:	drivers/scsi/vmw_pvscsi.h
 
 VMWARE VIRTUAL PTP CLOCK DRIVER
 M:	Vivek Thampi <vithampi@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/ptp/ptp_vmw.c
@@ -20622,15 +20622,15 @@ F:	drivers/ptp/ptp_vmw.c
 VMWARE VMCI DRIVER
 M:	Jorgen Hansen <jhansen@vmware.com>
 M:	Vishnu Dasa <vdasa@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	linux-kernel@vger.kernel.org
-L:	pv-drivers@vmware.com (private)
 S:	Maintained
 F:	drivers/misc/vmw_vmci/
 
 VMWARE VMMOUSE SUBDRIVER
 M:	Zack Rusin <zackr@vmware.com>
-M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+R:	VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com>
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/input/mouse/vmmouse.c
@@ -20638,7 +20638,7 @@ F:	drivers/input/mouse/vmmouse.h
 
 VMWARE VMXNET3 ETHERNET DRIVER
 M:	Ronak Doshi <doshir@vmware.com>
-M:	pv-drivers@vmware.com
+R:	VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/vmxnet3/
-- 
GitLab


From 4d9237e32c5db4f07f749a7ff1dd9b366bf3600e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 15 Mar 2022 10:54:08 -0600
Subject: [PATCH 1452/1586] io_uring: recycle apoll_poll entries

Particularly for networked workloads, io_uring intensively uses its
poll based backend to get a notification when data/space is available.
Profiling workloads, we see 3-4% of alloc+free that is directly attributed
to just the apoll allocation and free (and the rest being skb alloc+free).

For the fast path, we have ctx->uring_lock held already for both issue
and the inline completions, and we can utilize that to avoid any extra
locking needed to have a basic recycling cache for the apoll entries on
both the alloc and free side.

Double poll still requires an allocation. But those are rare and not
a fast path item.

With the simple cache in place, we see a 3-4% reduction in overhead for
the workload.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 43 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 299154efcd8a0..b17cf54653df6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -387,6 +387,7 @@ struct io_ring_ctx {
 		struct list_head	cq_overflow_list;
 		struct xarray		io_buffers;
 		struct list_head	io_buffers_cache;
+		struct list_head	apoll_cache;
 		struct xarray		personalities;
 		u32			pers_next;
 		unsigned		sq_thread_idle;
@@ -1528,6 +1529,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
+	INIT_LIST_HEAD(&ctx->apoll_cache);
 	init_completion(&ctx->ref_comp);
 	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
@@ -2650,6 +2652,15 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 
 			if (!(req->flags & REQ_F_CQE_SKIP))
 				__io_fill_cqe(req, req->result, req->cflags);
+			if ((req->flags & REQ_F_POLLED) && req->apoll) {
+				struct async_poll *apoll = req->apoll;
+
+				if (apoll->double_poll)
+					kfree(apoll->double_poll);
+				list_add(&apoll->poll.wait.entry,
+						&ctx->apoll_cache);
+				req->flags &= ~REQ_F_POLLED;
+			}
 		}
 
 		io_commit_cqring(ctx);
@@ -6143,7 +6154,7 @@ enum {
 	IO_APOLL_READY
 };
 
-static int io_arm_poll_handler(struct io_kiocb *req)
+static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 {
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
@@ -6168,9 +6179,16 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 		mask |= POLLOUT | POLLWRNORM;
 	}
 
-	apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
-	if (unlikely(!apoll))
-		return IO_APOLL_ABORTED;
+	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+	    !list_empty(&ctx->apoll_cache)) {
+		apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+						poll.wait.entry);
+		list_del_init(&apoll->poll.wait.entry);
+	} else {
+		apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+		if (unlikely(!apoll))
+			return IO_APOLL_ABORTED;
+	}
 	apoll->double_poll = NULL;
 	req->apoll = apoll;
 	req->flags |= REQ_F_POLLED;
@@ -7271,7 +7289,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
 			continue;
 		}
 
-		if (io_arm_poll_handler(req) == IO_APOLL_OK)
+		if (io_arm_poll_handler(req, issue_flags) == IO_APOLL_OK)
 			return;
 		/* aborted or ready, in either case retry blocking */
 		needs_poll = false;
@@ -7417,7 +7435,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
 {
 	struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
 
-	switch (io_arm_poll_handler(req)) {
+	switch (io_arm_poll_handler(req, 0)) {
 	case IO_APOLL_READY:
 		io_req_task_queue(req);
 		break;
@@ -9938,6 +9956,18 @@ static void io_wait_rsrc_data(struct io_rsrc_data *data)
 		wait_for_completion(&data->done);
 }
 
+static void io_flush_apoll_cache(struct io_ring_ctx *ctx)
+{
+	struct async_poll *apoll;
+
+	while (!list_empty(&ctx->apoll_cache)) {
+		apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
+						poll.wait.entry);
+		list_del(&apoll->poll.wait.entry);
+		kfree(apoll);
+	}
+}
+
 static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	io_sq_thread_finish(ctx);
@@ -9960,6 +9990,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
 	io_eventfd_unregister(ctx);
+	io_flush_apoll_cache(ctx);
 	mutex_unlock(&ctx->uring_lock);
 	io_destroy_buffers(ctx);
 	if (ctx->sq_creds)
-- 
GitLab


From 56e337f2cf1326323844927a04e9dbce9a244835 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Tue, 15 Mar 2022 17:52:05 +0100
Subject: [PATCH 1453/1586] Revert "gpio: Revert regression in sysfs-gpio
 (gpiolib.c)"

This reverts commit fc328a7d1fcce263db0b046917a66f3aa6e68719.

This commit - while attempting to fix a regression - has caused a number
of other problems. As the fallout from it is more significant than the
initial problem itself, revert it for now before we find a correct
solution.

Link: https://lore.kernel.org/all/20220314192522.GA3031157@roeck-us.net/
Link: https://lore.kernel.org/stable/20220314155509.552218-1-michael@walle.cc/
Link: https://lore.kernel.org/all/20211217153555.9413-1-marcelo.jimenez@gmail.com/
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reported-and-bisected-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Michael Walle <michael@walle.cc>
Cc: Thorsten Leemhuis <linux@leemhuis.info>
Cc: Marcelo Roberto Jimenez <marcelo.jimenez@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpiolib.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index defb7c464b875..6630d92e30ada 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1701,6 +1701,11 @@ static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc)
  */
 int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset)
 {
+#ifdef CONFIG_PINCTRL
+	if (list_empty(&gc->gpiodev->pin_ranges))
+		return 0;
+#endif
+
 	return pinctrl_gpio_request(gc->gpiodev->base + offset);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_request);
@@ -1712,6 +1717,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request);
  */
 void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset)
 {
+#ifdef CONFIG_PINCTRL
+	if (list_empty(&gc->gpiodev->pin_ranges))
+		return;
+#endif
+
 	pinctrl_gpio_free(gc->gpiodev->base + offset);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_free);
-- 
GitLab


From e9b667a82cdcfe21d590344447d65daed52b353b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 3 Mar 2022 16:00:17 -0500
Subject: [PATCH 1454/1586] usb: usbtmc: Fix bug in pipe direction for control
 transfers

The syzbot fuzzer reported a minor bug in the usbtmc driver:

usb 5-1: BOGUS control dir, pipe 80001e80 doesn't match bRequestType 0
WARNING: CPU: 0 PID: 3813 at drivers/usb/core/urb.c:412
usb_submit_urb+0x13a5/0x1970 drivers/usb/core/urb.c:410
Modules linked in:
CPU: 0 PID: 3813 Comm: syz-executor122 Not tainted
5.17.0-rc5-syzkaller-00306-g2293be58d6a1 #0
...
Call Trace:
 <TASK>
 usb_start_wait_urb+0x113/0x530 drivers/usb/core/message.c:58
 usb_internal_control_msg drivers/usb/core/message.c:102 [inline]
 usb_control_msg+0x2a5/0x4b0 drivers/usb/core/message.c:153
 usbtmc_ioctl_request drivers/usb/class/usbtmc.c:1947 [inline]

The problem is that usbtmc_ioctl_request() uses usb_rcvctrlpipe() for
all of its transfers, whether they are in or out.  It's easy to fix.

CC: <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+a48e3d1a875240cab5de@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/YiEsYTPEE6lOCOA5@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 73f419adce610..4bb6d304eb4b2 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1919,6 +1919,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 	struct usbtmc_ctrlrequest request;
 	u8 *buffer = NULL;
 	int rv;
+	unsigned int is_in, pipe;
 	unsigned long res;
 
 	res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
@@ -1928,12 +1929,14 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 	if (request.req.wLength > USBTMC_BUFSIZE)
 		return -EMSGSIZE;
 
+	is_in = request.req.bRequestType & USB_DIR_IN;
+
 	if (request.req.wLength) {
 		buffer = kmalloc(request.req.wLength, GFP_KERNEL);
 		if (!buffer)
 			return -ENOMEM;
 
-		if ((request.req.bRequestType & USB_DIR_IN) == 0) {
+		if (!is_in) {
 			/* Send control data to device */
 			res = copy_from_user(buffer, request.data,
 					     request.req.wLength);
@@ -1944,8 +1947,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 		}
 	}
 
+	if (is_in)
+		pipe = usb_rcvctrlpipe(data->usb_dev, 0);
+	else
+		pipe = usb_sndctrlpipe(data->usb_dev, 0);
 	rv = usb_control_msg(data->usb_dev,
-			usb_rcvctrlpipe(data->usb_dev, 0),
+			pipe,
 			request.req.bRequest,
 			request.req.bRequestType,
 			request.req.wValue,
@@ -1957,7 +1964,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
 		goto exit;
 	}
 
-	if (rv && (request.req.bRequestType & USB_DIR_IN)) {
+	if (rv && is_in) {
 		/* Read control data from device */
 		res = copy_to_user(request.data, buffer, rv);
 		if (res)
-- 
GitLab


From 16b1941eac2bd499f065a6739a40ce0011a3d740 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 5 Mar 2022 21:47:22 -0500
Subject: [PATCH 1455/1586] usb: gadget: Fix use-after-free bug by not setting
 udc->dev.driver

The syzbot fuzzer found a use-after-free bug:

BUG: KASAN: use-after-free in dev_uevent+0x712/0x780 drivers/base/core.c:2320
Read of size 8 at addr ffff88802b934098 by task udevd/3689

CPU: 2 PID: 3689 Comm: udevd Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0x8d/0x303 mm/kasan/report.c:255
 __kasan_report mm/kasan/report.c:442 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
 dev_uevent+0x712/0x780 drivers/base/core.c:2320
 uevent_show+0x1b8/0x380 drivers/base/core.c:2391
 dev_attr_show+0x4b/0x90 drivers/base/core.c:2094

Although the bug manifested in the driver core, the real cause was a
race with the gadget core.  dev_uevent() does:

	if (dev->driver)
		add_uevent_var(env, "DRIVER=%s", dev->driver->name);

and between the test and the dereference of dev->driver, the gadget
core sets dev->driver to NULL.

The race wouldn't occur if the gadget core registered its devices on
a real bus, using the standard synchronization techniques of the
driver core.  However, it's not necessary to make such a large change
in order to fix this bug; all we need to do is make sure that
udc->dev.driver is always NULL.

In fact, there is no reason for udc->dev.driver ever to be set to
anything, let alone to the value it currently gets: the address of the
gadget's driver.  After all, a gadget driver only knows how to manage
a gadget, not how to manage a UDC.

This patch simply removes the statements in the gadget core that touch
udc->dev.driver.

Fixes: 2ccea03a8f7e ("usb: gadget: introduce UDC Class")
CC: <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+348b571beb5eeb70a582@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/YiQgukfFFbBnwJ/9@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 568534a0d17c8..c109b069f511f 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1436,7 +1436,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
 	usb_gadget_udc_stop(udc);
 
 	udc->driver = NULL;
-	udc->dev.driver = NULL;
 	udc->gadget->dev.driver = NULL;
 }
 
@@ -1498,7 +1497,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
 			driver->function);
 
 	udc->driver = driver;
-	udc->dev.driver = &driver->driver;
 	udc->gadget->dev.driver = &driver->driver;
 
 	usb_gadget_udc_set_speed(udc, driver->max_speed);
@@ -1521,7 +1519,6 @@ err1:
 		dev_err(&udc->dev, "failed to start %s: %d\n",
 			udc->driver->function, ret);
 	udc->driver = NULL;
-	udc->dev.driver = NULL;
 	udc->gadget->dev.driver = NULL;
 	return ret;
 }
-- 
GitLab


From 733ab7e1b5d1041204c4ca7373f6e6f9d08e3283 Mon Sep 17 00:00:00 2001
From: David Jeffery <djeffery@redhat.com>
Date: Fri, 11 Mar 2022 13:43:59 -0500
Subject: [PATCH 1456/1586] scsi: fnic: Finish scsi_cmnd before dropping the
 spinlock

When aborting a SCSI command through fnic, there is a race with the fnic
interrupt handler which can result in the SCSI command and its request
being completed twice. If the interrupt handler claims the command by
setting CMD_SP to NULL first, the abort handler assumes the interrupt
handler has completed the command and returns SUCCESS, causing the request
for the scsi_cmnd to be re-queued.

But the interrupt handler may not have finished the command yet. After it
drops the spinlock protecting CMD_SP, it does memory cleanup before finally
calling scsi_done() to complete the scsi_cmnd. If the call to scsi_done
occurs after the abort handler finishes and re-queues the request, the
completion of the scsi_cmnd will advance and try to double complete a
request already queued for retry.

This patch fixes the issue by moving scsi_done() and any other use of
scsi_cmnd to before the spinlock is released by the interrupt handler.

Link: https://lore.kernel.org/r/20220311184359.2345319-1-djeffery@redhat.com
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 88c549f257dbf..40a52feb315da 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -986,8 +986,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	CMD_SP(sc) = NULL;
 	CMD_FLAGS(sc) |= FNIC_IO_DONE;
 
-	spin_unlock_irqrestore(io_lock, flags);
-
 	if (hdr_status != FCPIO_SUCCESS) {
 		atomic64_inc(&fnic_stats->io_stats.io_failures);
 		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
@@ -996,8 +994,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 
-	mempool_free(io_req, fnic->io_req_pool);
-
 	cmd_trace = ((u64)hdr_status << 56) |
 		  (u64)icmnd_cmpl->scsi_status << 48 |
 		  (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 |
@@ -1021,6 +1017,12 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	} else
 		fnic->lport->host_stats.fcp_control_requests++;
 
+	/* Call SCSI completion function to complete the IO */
+	scsi_done(sc);
+	spin_unlock_irqrestore(io_lock, flags);
+
+	mempool_free(io_req, fnic->io_req_pool);
+
 	atomic64_dec(&fnic_stats->io_stats.active_ios);
 	if (atomic64_read(&fnic->io_cmpl_skip))
 		atomic64_dec(&fnic->io_cmpl_skip);
@@ -1049,9 +1051,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 		if(io_duration_time > atomic64_read(&fnic_stats->io_stats.current_max_io_time))
 			atomic64_set(&fnic_stats->io_stats.current_max_io_time, io_duration_time);
 	}
-
-	/* Call SCSI completion function to complete the IO */
-	scsi_done(sc);
 }
 
 /* fnic_fcpio_itmf_cmpl_handler
-- 
GitLab


From 8126b1c73108bc691f5643df19071a59a69d0bc6 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 14 Mar 2022 19:59:53 +0100
Subject: [PATCH 1457/1586] pstore: Don't use semaphores in
 always-atomic-context code

pstore_dump() is *always* invoked in atomic context (nowadays in an RCU
read-side critical section, before that under a spinlock).
It doesn't make sense to try to use semaphores here.

This is mostly a revert of commit ea84b580b955 ("pstore: Convert buf_lock
to semaphore"), except that two parts aren't restored back exactly as they
were:

 - keep the lock initialization in pstore_register
 - in efi_pstore_write(), always set the "block" flag to false
 - omit "is_locked", that was unnecessary since
   commit 959217c84c27 ("pstore: Actually give up during locking failure")
 - fix the bailout message

The actual problem that the buggy commit was trying to address may have
been that the use of preemptible() in efi_pstore_write() was wrong - it
only looks at preempt_count() and the state of IRQs, but __rcu_read_lock()
doesn't touch either of those under CONFIG_PREEMPT_RCU.
(Sidenote: CONFIG_PREEMPT_RCU means that the scheduler can preempt tasks in
RCU read-side critical sections, but you're not allowed to actively
block/reschedule.)

Lockdep probably never caught the problem because it's very rare that you
actually hit the contended case, so lockdep always just sees the
down_trylock(), not the down_interruptible(), and so it can't tell that
there's a problem.

Fixes: ea84b580b955 ("pstore: Convert buf_lock to semaphore")
Cc: stable@vger.kernel.org
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220314185953.2068993-1-jannh@google.com
---
 drivers/firmware/efi/efi-pstore.c |  2 +-
 fs/pstore/platform.c              | 38 +++++++++++++++----------------
 include/linux/pstore.h            |  6 ++---
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 0ef086e43090b..7e771c56c13c6 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record)
 		efi_name[i] = name[i];
 
 	ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
-			      preemptible(), record->size, record->psi->buf);
+			      false, record->size, record->psi->buf);
 
 	if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE))
 		if (!schedule_work(&efivar_work))
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f243cb5e6a4fb..e26162f102ffe 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -143,21 +143,22 @@ static void pstore_timer_kick(void)
 	mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms));
 }
 
-/*
- * Should pstore_dump() wait for a concurrent pstore_dump()? If
- * not, the current pstore_dump() will report a failure to dump
- * and return.
- */
-static bool pstore_cannot_wait(enum kmsg_dump_reason reason)
+static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
 {
-	/* In NMI path, pstore shouldn't block regardless of reason. */
+	/*
+	 * In case of NMI path, pstore shouldn't be blocked
+	 * regardless of reason.
+	 */
 	if (in_nmi())
 		return true;
 
 	switch (reason) {
 	/* In panic case, other cpus are stopped by smp_send_stop(). */
 	case KMSG_DUMP_PANIC:
-	/* Emergency restart shouldn't be blocked. */
+	/*
+	 * Emergency restart shouldn't be blocked by spinning on
+	 * pstore_info::buf_lock.
+	 */
 	case KMSG_DUMP_EMERG:
 		return true;
 	default:
@@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned long	total = 0;
 	const char	*why;
 	unsigned int	part = 1;
+	unsigned long	flags = 0;
 	int		ret;
 
 	why = kmsg_dump_reason_str(reason);
 
-	if (down_trylock(&psinfo->buf_lock)) {
-		/* Failed to acquire lock: give up if we cannot wait. */
-		if (pstore_cannot_wait(reason)) {
-			pr_err("dump skipped in %s path: may corrupt error record\n",
-				in_nmi() ? "NMI" : why);
-			return;
-		}
-		if (down_interruptible(&psinfo->buf_lock)) {
-			pr_err("could not grab semaphore?!\n");
+	if (pstore_cannot_block_path(reason)) {
+		if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) {
+			pr_err("dump skipped in %s path because of concurrent dump\n",
+					in_nmi() ? "NMI" : why);
 			return;
 		}
+	} else {
+		spin_lock_irqsave(&psinfo->buf_lock, flags);
 	}
 
 	kmsg_dump_rewind(&iter);
@@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		total += record.size;
 		part++;
 	}
-
-	up(&psinfo->buf_lock);
+	spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 }
 
 static struct kmsg_dumper pstore_dumper = {
@@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi)
 		psi->write_user = pstore_write_user_compat;
 	psinfo = psi;
 	mutex_init(&psinfo->read_mutex);
-	sema_init(&psinfo->buf_lock, 1);
+	spin_lock_init(&psinfo->buf_lock);
 
 	if (psi->flags & PSTORE_FLAGS_DMESG)
 		allocate_buf_for_compression();
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index eb93a54cff31f..e97a8188f0fd8 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -14,7 +14,7 @@
 #include <linux/errno.h>
 #include <linux/kmsg_dump.h>
 #include <linux/mutex.h>
-#include <linux/semaphore.h>
+#include <linux/spinlock.h>
 #include <linux/time.h>
 #include <linux/types.h>
 
@@ -87,7 +87,7 @@ struct pstore_record {
  * @owner:	module which is responsible for this backend driver
  * @name:	name of the backend driver
  *
- * @buf_lock:	semaphore to serialize access to @buf
+ * @buf_lock:	spinlock to serialize access to @buf
  * @buf:	preallocated crash dump buffer
  * @bufsize:	size of @buf available for crash dump bytes (must match
  *		smallest number of bytes available for writing to a
@@ -178,7 +178,7 @@ struct pstore_info {
 	struct module	*owner;
 	const char	*name;
 
-	struct semaphore buf_lock;
+	spinlock_t	buf_lock;
 	char		*buf;
 	size_t		bufsize;
 
-- 
GitLab


From 01b44ef2bf6bc83df8a4703029fd611fbfc31c60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 10 Mar 2022 19:18:09 -0500
Subject: [PATCH 1458/1586] counter: Stop using dev_get_drvdata() to get the
 counter device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dev_get_drvdata() returns NULL since commit b56346ddbd82 ("counter: Use
container_of instead of drvdata to track counter_device") which wrongly
claimed there were no users of drvdata. Convert to container_of() to
fix a null pointer dereference.

Reported-by: Oleksij Rempel <o.rempel@pengutronix.de>
Fixes: b56346ddbd82 ("counter: Use container_of instead of drvdata to track counter_device")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Link: https://lore.kernel.org/all/20220204082556.370348-1-u.kleine-koenig@pengutronix.de/
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/4a14311a3b935b62b33e665a97ecaaf2f078228a.1646957732.git.vilhelm.gray@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/counter/counter-sysfs.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index 7cc4d1d523ea4..04eac41dad33e 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -19,6 +19,11 @@
 
 #include "counter-sysfs.h"
 
+static inline struct counter_device *counter_from_dev(struct device *dev)
+{
+	return container_of(dev, struct counter_device, dev);
+}
+
 /**
  * struct counter_attribute - Counter sysfs attribute
  * @dev_attr:	device attribute for sysfs
@@ -90,7 +95,7 @@ static ssize_t counter_comp_u8_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u8 data = 0;
 
@@ -122,7 +127,7 @@ static ssize_t counter_comp_u8_store(struct device *dev,
 				     const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	bool bool_data = 0;
 	u8 data = 0;
@@ -158,7 +163,7 @@ static ssize_t counter_comp_u32_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	const struct counter_available *const avail = a->comp.priv;
 	int err;
 	u32 data = 0;
@@ -221,7 +226,7 @@ static ssize_t counter_comp_u32_store(struct device *dev,
 				      const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	struct counter_count *const count = a->parent;
 	struct counter_synapse *const synapse = a->comp.priv;
 	const struct counter_available *const avail = a->comp.priv;
@@ -281,7 +286,7 @@ static ssize_t counter_comp_u64_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u64 data = 0;
 
@@ -309,7 +314,7 @@ static ssize_t counter_comp_u64_store(struct device *dev,
 				      const char *buf, size_t len)
 {
 	const struct counter_attribute *const a = to_counter_attribute(attr);
-	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_device *const counter = counter_from_dev(dev);
 	int err;
 	u64 data = 0;
 
-- 
GitLab


From e9c14b59ea2ec19afe22d60b07583b7e08c74290 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 14 Mar 2022 15:28:19 -0700
Subject: [PATCH 1459/1586] Add Paolo Abeni to networking maintainers

Growing the network maintainers team from 2 to 3.

Signed-off-by: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20220314222819.958428-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e127c2fb08a71..cd0f68d4a34a6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13376,6 +13376,7 @@ F:	net/core/drop_monitor.c
 NETWORKING DRIVERS
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
+M:	Paolo Abeni <pabeni@redhat.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
@@ -13422,6 +13423,7 @@ F:	tools/testing/selftests/drivers/net/dsa/
 NETWORKING [GENERAL]
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
+M:	Paolo Abeni <pabeni@redhat.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
-- 
GitLab


From f153546913bada41a811722f2c6d17c3243a0333 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 7 Mar 2022 18:47:39 +0100
Subject: [PATCH 1460/1586] ice: fix NULL pointer dereference in
 ice_update_vsi_tx_ring_stats()

It is possible to do NULL pointer dereference in routine that updates
Tx ring stats. Currently only stats and bytes are updated when ring
pointer is valid, but later on ring is accessed to propagate gathered Tx
stats onto VSI stats.

Change the existing logic to move to next ring when ring is NULL.

Fixes: e72bba21355d ("ice: split ice_ring onto Tx/Rx separate structs")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 493942e910be0..d4a7c39fd0786 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5962,8 +5962,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
 		u64 pkts = 0, bytes = 0;
 
 		ring = READ_ONCE(rings[i]);
-		if (ring)
-			ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
+		if (!ring)
+			continue;
+		ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes);
 		vsi_stats->tx_packets += pkts;
 		vsi_stats->tx_bytes += bytes;
 		vsi->tx_restart += ring->tx_stats.restart_q;
-- 
GitLab


From 1b4ae7d925c6569fff27313b4d84171b11510893 Mon Sep 17 00:00:00 2001
From: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
Date: Thu, 10 Mar 2022 10:46:52 -0800
Subject: [PATCH 1461/1586] ice: destroy flow director filter mutex after
 releasing VSIs

Currently fdir_fltr_lock is accessed in ice_vsi_release_all() function
after it is destroyed. Instead destroy mutex after ice_vsi_release_all.

Fixes: 40319796b732 ("ice: Add flow director support for channel mode")
Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
Tested-by: Bharathi Sreenivas <bharathi.sreenivas@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d4a7c39fd0786..b7e8744b0c0a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4880,7 +4880,6 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_devlink_unregister_params(pf);
 	set_bit(ICE_DOWN, pf->state);
 
-	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_deinit_lag(pf);
 	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 		ice_ptp_release(pf);
@@ -4888,6 +4887,7 @@ static void ice_remove(struct pci_dev *pdev)
 		ice_remove_arfs(pf);
 	ice_setup_mc_magic_wake(pf);
 	ice_vsi_release_all(pf);
+	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_set_wake(pf);
 	ice_free_irq_msix_misc(pf);
 	ice_for_each_vsi(pf, i) {
-- 
GitLab


From 16b2dd8cdf6f4e0597c34899de74b4d012b78188 Mon Sep 17 00:00:00 2001
From: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
Date: Wed, 9 Mar 2022 16:37:39 +0100
Subject: [PATCH 1462/1586] iavf: Fix double free in iavf_reset_task

Fix double free possibility in iavf_disable_vf, as crit_lock is
freed in caller, iavf_reset_task. Add kernel-doc for iavf_disable_vf.
Remove mutex_unlock in iavf_disable_vf.
Without this patch there is double free scenario, when calling
iavf_reset_task.

Fixes: e85ff9c631e1 ("iavf: Fix deadlock in iavf_reset_task")
Signed-off-by: Przemyslaw Patynowski <przemyslawx.patynowski@intel.com>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8e644e9ed8da0..45570e3f782e8 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2541,6 +2541,13 @@ restart_watchdog:
 		queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
 }
 
+/**
+ * iavf_disable_vf - disable VF
+ * @adapter: board private structure
+ *
+ * Set communication failed flag and free all resources.
+ * NOTE: This function is expected to be called with crit_lock being held.
+ **/
 static void iavf_disable_vf(struct iavf_adapter *adapter)
 {
 	struct iavf_mac_filter *f, *ftmp;
@@ -2595,7 +2602,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
 	memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
 	iavf_shutdown_adminq(&adapter->hw);
 	adapter->netdev->flags &= ~IFF_UP;
-	mutex_unlock(&adapter->crit_lock);
 	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
 	iavf_change_state(adapter, __IAVF_DOWN);
 	wake_up(&adapter->down_waitqueue);
-- 
GitLab


From 79d45f57a19537a1ec6ebf836944e968b154f86e Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 14 Mar 2022 16:03:21 +0100
Subject: [PATCH 1463/1586] sr: simplify the local variable initialization in
 sr_block_open()

Commit 01d0c698536f ("sr: implement ->free_disk to simplify refcounting")
refactored sr_block_open(), initialized one variable with a duplicate
assignment (probably an unintended copy & paste duplication) and turned one
error case into an early return, which makes the initialization of the
return variable needless.

So, simplify the local variable initialization in sr_block_open() to make
the code a bit more clear.

No functional change. No change in resulting object code.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220314150321.17720-1-lukas.bulwahn@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 11fbdc75bb711..641552d6330b6 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -483,9 +483,9 @@ static void sr_revalidate_disk(struct scsi_cd *cd)
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
-	struct scsi_cd *cd = cd = scsi_cd(bdev->bd_disk);
+	struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
 	struct scsi_device *sdev = cd->device;
-	int ret = -ENXIO;
+	int ret;
 
 	if (scsi_device_get(cd->device))
 		return -ENXIO;
-- 
GitLab


From 78be0471da4e9fff874307d73d68f0173fa6a154 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:50 +0530
Subject: [PATCH 1464/1586] ext4: return early for non-eligible fast_commit
 track events

Currently ext4_fc_track_template() checks, whether the trace event
path belongs to replay or does sb has ineligible set, if yes it simply
returns. This patch pulls those checks before calling
ext4_fc_track_template() in the callers of ext4_fc_track_template().

[ Add checks to ext4_rename() which calls the __ext4_fc_track_*()
  functions directly. -- TYT ]

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/3cd025d9c490218a92e6d8fb30b6123e693373e3.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c | 59 +++++++++++++++++++++++++++++++++++--------
 fs/ext4/namei.c       | 15 ++++++++---
 2 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 55d33f296caed..6990429daa0ed 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -379,13 +379,6 @@ static int ext4_fc_track_template(
 	tid_t tid = 0;
 	int ret;
 
-	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
-	    (sbi->s_mount_state & EXT4_FC_REPLAY))
-		return -EOPNOTSUPP;
-
-	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
-		return -EINVAL;
-
 	tid = handle->h_transaction->t_tid;
 	mutex_lock(&ei->i_fc_lock);
 	if (tid == ei->i_sync_tid) {
@@ -499,7 +492,17 @@ void __ext4_fc_track_unlink(handle_t *handle,
 
 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
 {
-	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+	struct inode *inode = d_inode(dentry);
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+	    (sbi->s_mount_state & EXT4_FC_REPLAY))
+		return;
+
+	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+		return;
+
+	__ext4_fc_track_unlink(handle, inode, dentry);
 }
 
 void __ext4_fc_track_link(handle_t *handle,
@@ -518,7 +521,17 @@ void __ext4_fc_track_link(handle_t *handle,
 
 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
 {
-	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
+	struct inode *inode = d_inode(dentry);
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+	    (sbi->s_mount_state & EXT4_FC_REPLAY))
+		return;
+
+	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+		return;
+
+	__ext4_fc_track_link(handle, inode, dentry);
 }
 
 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
@@ -537,7 +550,17 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
 
 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
 {
-	__ext4_fc_track_create(handle, d_inode(dentry), dentry);
+	struct inode *inode = d_inode(dentry);
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+	    (sbi->s_mount_state & EXT4_FC_REPLAY))
+		return;
+
+	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+		return;
+
+	__ext4_fc_track_create(handle, inode, dentry);
 }
 
 /* __track_fn for inode tracking */
@@ -553,6 +576,7 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
 
 void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int ret;
 
 	if (S_ISDIR(inode->i_mode))
@@ -564,6 +588,13 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 		return;
 	}
 
+	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+	    (sbi->s_mount_state & EXT4_FC_REPLAY))
+		return;
+
+	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+		return;
+
 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
 	trace_ext4_fc_track_inode(inode, ret);
 }
@@ -603,12 +634,20 @@ static int __track_range(struct inode *inode, void *arg, bool update)
 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
 			 ext4_lblk_t end)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct __track_range_args args;
 	int ret;
 
 	if (S_ISDIR(inode->i_mode))
 		return;
 
+	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
+	    (sbi->s_mount_state & EXT4_FC_REPLAY))
+		return;
+
+	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+		return;
+
 	args.start = start;
 	args.end = end;
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 39e223f7bf64d..e37da8d5cd0c1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3891,12 +3891,19 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 		ext4_fc_mark_ineligible(old.inode->i_sb,
 			EXT4_FC_REASON_RENAME_DIR, handle);
 	} else {
+		struct super_block *sb = old.inode->i_sb;
+
 		if (new.inode)
 			ext4_fc_track_unlink(handle, new.dentry);
-		__ext4_fc_track_link(handle, old.inode, new.dentry);
-		__ext4_fc_track_unlink(handle, old.inode, old.dentry);
-		if (whiteout)
-			__ext4_fc_track_create(handle, whiteout, old.dentry);
+		if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+		    !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
+		    !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
+			__ext4_fc_track_link(handle, old.inode, new.dentry);
+			__ext4_fc_track_unlink(handle, old.inode, old.dentry);
+			if (whiteout)
+				__ext4_fc_track_create(handle, whiteout,
+						       old.dentry);
+		}
 	}
 
 	if (new.inode) {
-- 
GitLab


From 08f4c42abad1b93914a93f9042e2443593bd3137 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:51 +0530
Subject: [PATCH 1465/1586] ext4: add new trace event in ext4_fc_cleanup

This adds a new trace event in ext4_fc_cleanup() which is helpful in debugging
some fast_commit issues.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/794cdb1d5d3622f3f80d30c222ff6652ea68c375.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c       |  1 +
 include/trace/events/ext4.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 6990429daa0ed..f4a56298fd883 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1280,6 +1280,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
 	if (full && sbi->s_fc_bh)
 		sbi->s_fc_bh = NULL;
 
+	trace_ext4_fc_cleanup(journal, full, tid);
 	jbd2_fc_release_bufs(journal);
 
 	spin_lock(&sbi->s_fc_lock);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 3d48fcb629876..20654aaf94950 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2863,6 +2863,32 @@ TRACE_EVENT(ext4_fc_track_range,
 		      __entry->end)
 	);
 
+TRACE_EVENT(ext4_fc_cleanup,
+	TP_PROTO(journal_t *journal, int full, tid_t tid),
+
+	TP_ARGS(journal, full, tid),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(int, j_fc_off)
+		__field(int, full)
+		__field(tid_t, tid)
+	),
+
+	TP_fast_assign(
+		struct super_block *sb = journal->j_private;
+
+		__entry->dev = sb->s_dev;
+		__entry->j_fc_off = journal->j_fc_off;
+		__entry->full = full;
+		__entry->tid = tid;
+	),
+
+	TP_printk("dev %d,%d, j_fc_off %d, full %d, tid %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->j_fc_off, __entry->full, __entry->tid)
+	);
+
 TRACE_EVENT(ext4_update_sb,
 	TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk,
 		 unsigned int flags),
-- 
GitLab


From 1d2e2440c5191da82b1191298909283a58f0ece8 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:52 +0530
Subject: [PATCH 1466/1586] ext4: add transaction tid info in fc_track events

This patch adds the transaction & inode tid info in trace events for
callers of ext4_fc_track_template(). This is helpful in debugging race
conditions where an inode could belong to two different transaction tids.
It also fixes the checkpatch warnings which says use tabs instead of
spaces.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/c203c09dc11bb372803c430f621f25a4b8c2c8b4.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c       |  10 ++--
 include/trace/events/ext4.h | 113 ++++++++++++++++++++++--------------
 2 files changed, 73 insertions(+), 50 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f4a56298fd883..849fd4dcb8251 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -487,7 +487,7 @@ void __ext4_fc_track_unlink(handle_t *handle,
 
 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 					(void *)&args, 0);
-	trace_ext4_fc_track_unlink(inode, dentry, ret);
+	trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
 }
 
 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
@@ -516,7 +516,7 @@ void __ext4_fc_track_link(handle_t *handle,
 
 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 					(void *)&args, 0);
-	trace_ext4_fc_track_link(inode, dentry, ret);
+	trace_ext4_fc_track_link(handle, inode, dentry, ret);
 }
 
 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
@@ -545,7 +545,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
 
 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
 					(void *)&args, 0);
-	trace_ext4_fc_track_create(inode, dentry, ret);
+	trace_ext4_fc_track_create(handle, inode, dentry, ret);
 }
 
 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
@@ -596,7 +596,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 		return;
 
 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
-	trace_ext4_fc_track_inode(inode, ret);
+	trace_ext4_fc_track_inode(handle, inode, ret);
 }
 
 struct __track_range_args {
@@ -653,7 +653,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
 
 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
 
-	trace_ext4_fc_track_range(inode, start, end, ret);
+	trace_ext4_fc_track_range(handle, inode, start, end, ret);
 }
 
 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 20654aaf94950..6bd90df07b5c4 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2782,32 +2782,41 @@ TRACE_EVENT(ext4_fc_stats,
 
 DECLARE_EVENT_CLASS(ext4_fc_track_dentry,
 
-	TP_PROTO(struct inode *inode, struct dentry *dentry, int ret),
+	TP_PROTO(handle_t *handle, struct inode *inode,
+		 struct dentry *dentry, int ret),
 
-	TP_ARGS(inode, dentry, ret),
+	TP_ARGS(handle, inode, dentry, ret),
 
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
+		__field(tid_t, t_tid)
 		__field(ino_t, i_ino)
+		__field(tid_t, i_sync_tid)
 		__field(int, error)
 	),
 
 	TP_fast_assign(
+		struct ext4_inode_info *ei = EXT4_I(inode);
+
 		__entry->dev = inode->i_sb->s_dev;
+		__entry->t_tid = handle->h_transaction->t_tid;
 		__entry->i_ino = inode->i_ino;
+		__entry->i_sync_tid = ei->i_sync_tid;
 		__entry->error = ret;
 	),
 
-	TP_printk("dev %d,%d, ino %lu, error %d",
+	TP_printk("dev %d,%d, t_tid %u, ino %lu, i_sync_tid %u, error %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->i_ino, __entry->error
+		  __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+		  __entry->error
 	)
 );
 
 #define DEFINE_EVENT_CLASS_DENTRY(__type)				\
 DEFINE_EVENT(ext4_fc_track_dentry, ext4_fc_track_##__type,		\
-	TP_PROTO(struct inode *inode, struct dentry *dentry, int ret),	\
-	TP_ARGS(inode, dentry, ret)					\
+	TP_PROTO(handle_t *handle, struct inode *inode,			\
+		 struct dentry *dentry, int ret),			\
+	TP_ARGS(handle, inode, dentry, ret)				\
 )
 
 DEFINE_EVENT_CLASS_DENTRY(create);
@@ -2815,52 +2824,66 @@ DEFINE_EVENT_CLASS_DENTRY(link);
 DEFINE_EVENT_CLASS_DENTRY(unlink);
 
 TRACE_EVENT(ext4_fc_track_inode,
-	    TP_PROTO(struct inode *inode, int ret),
+	TP_PROTO(handle_t *handle, struct inode *inode, int ret),
 
-	    TP_ARGS(inode, ret),
+	TP_ARGS(handle, inode, ret),
 
-	    TP_STRUCT__entry(
-		    __field(dev_t, dev)
-		    __field(int, ino)
-		    __field(int, error)
-		    ),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(tid_t, t_tid)
+		__field(ino_t, i_ino)
+		__field(tid_t, i_sync_tid)
+		__field(int, error)
+	),
 
-	    TP_fast_assign(
-		    __entry->dev = inode->i_sb->s_dev;
-		    __entry->ino = inode->i_ino;
-		    __entry->error = ret;
-		    ),
+	TP_fast_assign(
+		struct ext4_inode_info *ei = EXT4_I(inode);
 
-	    TP_printk("dev %d:%d, inode %d, error %d",
-		      MAJOR(__entry->dev), MINOR(__entry->dev),
-		      __entry->ino, __entry->error)
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->t_tid = handle->h_transaction->t_tid;
+		__entry->i_ino = inode->i_ino;
+		__entry->i_sync_tid = ei->i_sync_tid;
+		__entry->error = ret;
+	),
+
+	TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+		  __entry->error)
 	);
 
 TRACE_EVENT(ext4_fc_track_range,
-	    TP_PROTO(struct inode *inode, long start, long end, int ret),
-
-	    TP_ARGS(inode, start, end, ret),
-
-	    TP_STRUCT__entry(
-		    __field(dev_t, dev)
-		    __field(int, ino)
-		    __field(long, start)
-		    __field(long, end)
-		    __field(int, error)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->dev = inode->i_sb->s_dev;
-		    __entry->ino = inode->i_ino;
-		    __entry->start = start;
-		    __entry->end = end;
-		    __entry->error = ret;
-		    ),
-
-	    TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld",
-		      MAJOR(__entry->dev), MINOR(__entry->dev),
-		      __entry->ino, __entry->error, __entry->start,
-		      __entry->end)
+	TP_PROTO(handle_t *handle, struct inode *inode,
+		 long start, long end, int ret),
+
+	TP_ARGS(handle, inode, start, end, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(tid_t, t_tid)
+		__field(ino_t, i_ino)
+		__field(tid_t, i_sync_tid)
+		__field(long, start)
+		__field(long, end)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		struct ext4_inode_info *ei = EXT4_I(inode);
+
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->t_tid = handle->h_transaction->t_tid;
+		__entry->i_ino = inode->i_ino;
+		__entry->i_sync_tid = ei->i_sync_tid;
+		__entry->start = start;
+		__entry->end = end;
+		__entry->error = ret;
+	),
+
+	TP_printk("dev %d:%d, t_tid %u, inode %lu, i_sync_tid %u, error %d, start %ld, end %ld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->t_tid, __entry->i_ino, __entry->i_sync_tid,
+		  __entry->error, __entry->start, __entry->end)
 	);
 
 TRACE_EVENT(ext4_fc_cleanup,
-- 
GitLab


From d9bf099cb980d63cd9a45a135259a6cabcb814a5 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:53 +0530
Subject: [PATCH 1467/1586] ext4: add commit_tid info in jbd debug log

This adds commit_tid argument in ext4_fc_update_stats()
so that we can add this information too in jbd_debug logs.
This is also required in a later patch to pass the commit_tid info in
ext4_fc_commit_start/stop() trace events.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/dabda3f2919a60e01887e798bf5915216b451733.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 849fd4dcb8251..88ed99e670c58 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1164,11 +1164,12 @@ out:
 }
 
 static void ext4_fc_update_stats(struct super_block *sb, int status,
-				 u64 commit_time, int nblks)
+				 u64 commit_time, int nblks, tid_t commit_tid)
 {
 	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
 
-	jbd_debug(1, "Fast commit ended with status = %d", status);
+	jbd_debug(1, "Fast commit ended with status = %d for tid %u",
+			status, commit_tid);
 	if (status == EXT4_FC_STATUS_OK) {
 		stats->fc_num_commits++;
 		stats->fc_numblks += nblks;
@@ -1218,14 +1219,16 @@ restart_fc:
 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
 			commit_tid > journal->j_commit_sequence)
 			goto restart_fc;
-		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
+		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
+				commit_tid);
 		return 0;
 	} else if (ret) {
 		/*
 		 * Commit couldn't start. Just update stats and perform a
 		 * full commit.
 		 */
-		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
+		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
+				commit_tid);
 		return jbd2_complete_transaction(journal, commit_tid);
 	}
 
@@ -1257,12 +1260,12 @@ restart_fc:
 	 * don't react too strongly to vast changes in the commit time
 	 */
 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
-	ext4_fc_update_stats(sb, status, commit_time, nblks);
+	ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
 	return ret;
 
 fallback:
 	ret = jbd2_fc_end_commit_fallback(journal);
-	ext4_fc_update_stats(sb, status, 0, 0);
+	ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
 	return ret;
 }
 
-- 
GitLab


From 5641ace54471cb5c393e71f33232088602455c6b Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:54 +0530
Subject: [PATCH 1468/1586] ext4: add commit tid info in
 ext4_fc_commit_start/stop trace events

This adds commit_tid info in ext4_fc_commit_start/stop which is helpful
in debugging fast_commit issues.

For e.g. issues where due to jbd2 journal full commit, FC miss to commit
updates to a file.

Also improves TP_prink format string i.e. all ext4 and jbd2 trace events
starts with "dev MAjOR,MINOR". Let's follow the same convention while we
are still at it.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/ebcd6b9ab5b718db30f90854497886801ce38c63.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c       |  4 ++--
 include/trace/events/ext4.h | 21 +++++++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 88ed99e670c58..3d72565ec6e85 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1187,7 +1187,7 @@ static void ext4_fc_update_stats(struct super_block *sb, int status,
 	} else {
 		stats->fc_skipped_commits++;
 	}
-	trace_ext4_fc_commit_stop(sb, nblks, status);
+	trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
 }
 
 /*
@@ -1208,7 +1208,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
 		return jbd2_complete_transaction(journal, commit_tid);
 
-	trace_ext4_fc_commit_start(sb);
+	trace_ext4_fc_commit_start(sb, commit_tid);
 
 	start_time = ktime_get();
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6bd90df07b5c4..aad97376e0325 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2686,26 +2686,29 @@ TRACE_EVENT(ext4_fc_replay,
 );
 
 TRACE_EVENT(ext4_fc_commit_start,
-	TP_PROTO(struct super_block *sb),
+	TP_PROTO(struct super_block *sb, tid_t commit_tid),
 
-	TP_ARGS(sb),
+	TP_ARGS(sb, commit_tid),
 
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
+		__field(tid_t, tid)
 	),
 
 	TP_fast_assign(
 		__entry->dev = sb->s_dev;
+		__entry->tid = commit_tid;
 	),
 
-	TP_printk("fast_commit started on dev %d,%d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev))
+	TP_printk("dev %d,%d tid %u", MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tid)
 );
 
 TRACE_EVENT(ext4_fc_commit_stop,
-	    TP_PROTO(struct super_block *sb, int nblks, int reason),
+	    TP_PROTO(struct super_block *sb, int nblks, int reason,
+		     tid_t commit_tid),
 
-	TP_ARGS(sb, nblks, reason),
+	TP_ARGS(sb, nblks, reason, commit_tid),
 
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -2714,6 +2717,7 @@ TRACE_EVENT(ext4_fc_commit_stop,
 		__field(int, num_fc)
 		__field(int, num_fc_ineligible)
 		__field(int, nblks_agg)
+		__field(tid_t, tid)
 	),
 
 	TP_fast_assign(
@@ -2724,12 +2728,13 @@ TRACE_EVENT(ext4_fc_commit_stop,
 		__entry->num_fc_ineligible =
 			EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits;
 		__entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks;
+		__entry->tid = commit_tid;
 	),
 
-	TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d",
+	TP_printk("dev %d,%d nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d, tid %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->nblks, __entry->reason, __entry->num_fc,
-		  __entry->num_fc_ineligible, __entry->nblks_agg)
+		  __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
 );
 
 #define FC_REASON_NAME_STAT(reason)					\
-- 
GitLab


From 163f11b8b3489fbf04be8d8d0e029f1dcd6546d1 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sat, 12 Mar 2022 11:09:55 +0530
Subject: [PATCH 1469/1586] ext4: fix remaining two trace events to use same
 printk convention

All ext4 & jbd2 trace events starts with "dev Major:Minor".
While we are still improving/adding the ftrace events for FC,
let's fix last two remaining trace events to follow the same
convention.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/8f33b163f0f29df2491c03b79f8ac96890ea5184.1647057583.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index aad97376e0325..ce2b3ad0ee86d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2654,7 +2654,7 @@ TRACE_EVENT(ext4_fc_replay_scan,
 		__entry->off = off;
 	),
 
-	TP_printk("FC scan pass on dev %d,%d: error %d, off %d",
+	TP_printk("dev %d,%d error %d, off %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->error, __entry->off)
 );
@@ -2680,7 +2680,7 @@ TRACE_EVENT(ext4_fc_replay,
 		__entry->priv2 = priv2;
 	),
 
-	TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d",
+	TP_printk("dev %d,%d: tag %d, ino %d, data1 %d, data2 %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->tag, __entry->ino, __entry->priv1, __entry->priv2)
 );
-- 
GitLab


From 919adbfec29d5b89b3e45620653cbeeb0d42e6fd Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 12 Mar 2022 21:39:35 -0500
Subject: [PATCH 1470/1586] ext4: fix kernel doc warnings

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/balloc.c | 1 +
 fs/ext4/ioctl.c  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a0fb0c4bdc7cd..78ee3ef795aec 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -411,6 +411,7 @@ verified:
  * ext4_read_block_bitmap_nowait()
  * @sb:			super block
  * @block_group:	given block group
+ * @ignore_locked:	ignore locked buffers
  *
  * Read the bitmap for a given block_group,and validate the
  * bits for block/inode/inode tables are set in the bitmaps
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a8022c2c6a582..992229ca2d830 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -269,7 +269,7 @@ out:
 	return err ? err : 0;
 }
 
-/**
+/*
  * Swap memory between @a and @b for @len bytes.
  *
  * @a:          pointer to first memory area
@@ -290,7 +290,7 @@ static void memswap(void *a, void *b, size_t len)
 	}
 }
 
-/**
+/*
  * Swap i_data and associated attributes between @inode1 and @inode2.
  * This function is used for the primary swap between inode1 and inode2
  * and also to revert this primary swap in case of errors.
@@ -344,7 +344,7 @@ void ext4_reset_inode_seed(struct inode *inode)
 	ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen));
 }
 
-/**
+/*
  * Swap the information from the given @inode and the inode
  * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
  * important fields of the inodes.
-- 
GitLab


From fd6ca3f5b80f6dda381fe84211be3b491f28bf0f Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Tue, 15 Mar 2022 02:34:12 +0000
Subject: [PATCH 1471/1586] hwmon: (scpi-hwmon): Use of_device_get_match_data()

Use of_device_get_match_data() to simplify the code.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Link: https://lore.kernel.org/r/20220315023412.2118415-1-chi.minghao@zte.com.cn
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/scpi-hwmon.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 919877970ae3b..5187c6dd5a4f0 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -141,7 +141,6 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 	struct scpi_ops *scpi_ops;
 	struct device *hwdev, *dev = &pdev->dev;
 	struct scpi_sensors *scpi_sensors;
-	const struct of_device_id *of_id;
 	int idx, ret;
 
 	scpi_ops = get_scpi_ops();
@@ -171,12 +170,11 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 
 	scpi_sensors->scpi_ops = scpi_ops;
 
-	of_id = of_match_device(scpi_of_match, &pdev->dev);
-	if (!of_id) {
+	scale = of_device_get_match_data(&pdev->dev);
+	if (!scale) {
 		dev_err(&pdev->dev, "Unable to initialize scpi-hwmon data\n");
 		return -ENODEV;
 	}
-	scale = of_id->data;
 
 	for (i = 0, idx = 0; i < nr_sensors; i++) {
 		struct sensor_data *sensor = &scpi_sensors->data[idx];
-- 
GitLab


From ed6e0401e68bdfe08de9b44968fb235ff10ccee1 Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Wed, 9 Feb 2022 14:00:52 +0800
Subject: [PATCH 1472/1586] erofs: use meta buffers for erofs_read_superblock()

The only change is that, meta buffers read cache page without __GFP_FS
flag, which shall not matter.

Link: https://lore.kernel.org/r/20220209060108.43051-7-jefflexu@linux.alibaba.com
Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 915eefe0d7e2c..12755217631f4 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -281,21 +281,19 @@ static int erofs_init_devices(struct super_block *sb,
 static int erofs_read_superblock(struct super_block *sb)
 {
 	struct erofs_sb_info *sbi;
-	struct page *page;
+	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
 	struct erofs_super_block *dsb;
 	unsigned int blkszbits;
 	void *data;
 	int ret;
 
-	page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
-	if (IS_ERR(page)) {
+	data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
+	if (IS_ERR(data)) {
 		erofs_err(sb, "cannot read erofs superblock");
-		return PTR_ERR(page);
+		return PTR_ERR(data);
 	}
 
 	sbi = EROFS_SB(sb);
-
-	data = kmap(page);
 	dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
 
 	ret = -EINVAL;
@@ -365,8 +363,7 @@ static int erofs_read_superblock(struct super_block *sb)
 	if (erofs_sb_has_ztailpacking(sbi))
 		erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
 out:
-	kunmap(page);
-	put_page(page);
+	erofs_put_metabuf(&buf);
 	return ret;
 }
 
-- 
GitLab


From 5c6dcc57e2e50553405f2cf8b949f99b8820a685 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 2 Mar 2022 03:49:50 +0800
Subject: [PATCH 1473/1586] erofs: get rid of `struct z_erofs_collector'

Avoid `struct z_erofs_collector' since there is another context
structure called "struct z_erofs_decompress_frontend".

No logic changes.

Link: https://lore.kernel.org/r/20220301194951.106227-1-hsiangkao@linux.alibaba.com
Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 163 ++++++++++++++++++++++-------------------------
 1 file changed, 77 insertions(+), 86 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 423bc1a61da5c..2673fc105861f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -192,7 +192,10 @@ enum z_erofs_collectmode {
 	COLLECT_PRIMARY_FOLLOWED,
 };
 
-struct z_erofs_collector {
+struct z_erofs_decompress_frontend {
+	struct inode *const inode;
+	struct erofs_map_blocks map;
+
 	struct z_erofs_pagevec_ctor vector;
 
 	struct z_erofs_pcluster *pcl, *tailpcl;
@@ -202,13 +205,6 @@ struct z_erofs_collector {
 	z_erofs_next_pcluster_t owned_head;
 
 	enum z_erofs_collectmode mode;
-};
-
-struct z_erofs_decompress_frontend {
-	struct inode *const inode;
-
-	struct z_erofs_collector clt;
-	struct erofs_map_blocks map;
 
 	bool readahead;
 	/* used for applying cache strategy on the fly */
@@ -216,30 +212,26 @@ struct z_erofs_decompress_frontend {
 	erofs_off_t headoffset;
 };
 
-#define COLLECTOR_INIT() { \
-	.owned_head = Z_EROFS_PCLUSTER_TAIL, \
-	.mode = COLLECT_PRIMARY_FOLLOWED }
-
 #define DECOMPRESS_FRONTEND_INIT(__i) { \
-	.inode = __i, .clt = COLLECTOR_INIT(), \
-	.backmost = true, }
+	.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
+	.mode = COLLECT_PRIMARY_FOLLOWED }
 
 static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
 static DEFINE_MUTEX(z_pagemap_global_lock);
 
-static void preload_compressed_pages(struct z_erofs_collector *clt,
+static void preload_compressed_pages(struct z_erofs_decompress_frontend *fe,
 				     struct address_space *mc,
 				     enum z_erofs_cache_alloctype type,
 				     struct page **pagepool)
 {
-	struct z_erofs_pcluster *pcl = clt->pcl;
+	struct z_erofs_pcluster *pcl = fe->pcl;
 	bool standalone = true;
 	gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
 			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 	struct page **pages;
 	pgoff_t index;
 
-	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
+	if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
 		return;
 
 	pages = pcl->compressed_pages;
@@ -288,7 +280,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 	 * managed cache since it can be moved to the bypass queue instead.
 	 */
 	if (standalone)
-		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+		fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
 }
 
 /* called by erofs_shrinker to get rid of all compressed_pages */
@@ -350,47 +342,47 @@ int erofs_try_to_free_cached_page(struct page *page)
 }
 
 /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
-static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
+static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
 				   struct page *page)
 {
-	struct z_erofs_pcluster *const pcl = clt->pcl;
+	struct z_erofs_pcluster *const pcl = fe->pcl;
 
-	while (clt->icpage_ptr > pcl->compressed_pages)
-		if (!cmpxchg(--clt->icpage_ptr, NULL, page))
+	while (fe->icpage_ptr > pcl->compressed_pages)
+		if (!cmpxchg(--fe->icpage_ptr, NULL, page))
 			return true;
 	return false;
 }
 
 /* callers must be with collection lock held */
-static int z_erofs_attach_page(struct z_erofs_collector *clt,
+static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
 			       struct page *page, enum z_erofs_page_type type,
 			       bool pvec_safereuse)
 {
 	int ret;
 
 	/* give priority for inplaceio */
-	if (clt->mode >= COLLECT_PRIMARY &&
+	if (fe->mode >= COLLECT_PRIMARY &&
 	    type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
-	    z_erofs_try_inplace_io(clt, page))
+	    z_erofs_try_inplace_io(fe, page))
 		return 0;
 
-	ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
+	ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
 				      pvec_safereuse);
-	clt->cl->vcnt += (unsigned int)ret;
+	fe->cl->vcnt += (unsigned int)ret;
 	return ret ? 0 : -EAGAIN;
 }
 
-static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
+static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
 {
-	struct z_erofs_pcluster *pcl = clt->pcl;
-	z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
+	struct z_erofs_pcluster *pcl = f->pcl;
+	z_erofs_next_pcluster_t *owned_head = &f->owned_head;
 
 	/* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
 	if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
 		    *owned_head) == Z_EROFS_PCLUSTER_NIL) {
 		*owned_head = &pcl->next;
 		/* so we can attach this pcluster to our submission chain. */
-		clt->mode = COLLECT_PRIMARY_FOLLOWED;
+		f->mode = COLLECT_PRIMARY_FOLLOWED;
 		return;
 	}
 
@@ -401,24 +393,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
 	if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
 		    *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
 		*owned_head = Z_EROFS_PCLUSTER_TAIL;
-		clt->mode = COLLECT_PRIMARY_HOOKED;
-		clt->tailpcl = NULL;
+		f->mode = COLLECT_PRIMARY_HOOKED;
+		f->tailpcl = NULL;
 		return;
 	}
 	/* type 3, it belongs to a chain, but it isn't the end of the chain */
-	clt->mode = COLLECT_PRIMARY;
+	f->mode = COLLECT_PRIMARY;
 }
 
-static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
+static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe,
 				     struct inode *inode,
 				     struct erofs_map_blocks *map)
 {
-	struct z_erofs_pcluster *pcl = clt->pcl;
+	struct z_erofs_pcluster *pcl = fe->pcl;
 	struct z_erofs_collection *cl;
 	unsigned int length;
 
 	/* to avoid unexpected loop formed by corrupted images */
-	if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
+	if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
 		DBG_BUGON(1);
 		return -EFSCORRUPTED;
 	}
@@ -449,15 +441,15 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
 	}
 	mutex_lock(&cl->lock);
 	/* used to check tail merging loop due to corrupted images */
-	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
-		clt->tailpcl = pcl;
+	if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+		fe->tailpcl = pcl;
 
-	z_erofs_try_to_claim_pcluster(clt);
-	clt->cl = cl;
+	z_erofs_try_to_claim_pcluster(fe);
+	fe->cl = cl;
 	return 0;
 }
 
-static int z_erofs_register_collection(struct z_erofs_collector *clt,
+static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe,
 				       struct inode *inode,
 				       struct erofs_map_blocks *map)
 {
@@ -485,8 +477,8 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
 			Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
 
 	/* new pclusters should be claimed as type 1, primary and followed */
-	pcl->next = clt->owned_head;
-	clt->mode = COLLECT_PRIMARY_FOLLOWED;
+	pcl->next = fe->owned_head;
+	fe->mode = COLLECT_PRIMARY_FOLLOWED;
 
 	cl = z_erofs_primarycollection(pcl);
 	cl->pageofs = map->m_la & ~PAGE_MASK;
@@ -512,18 +504,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
 		}
 
 		if (grp != &pcl->obj) {
-			clt->pcl = container_of(grp,
+			fe->pcl = container_of(grp,
 					struct z_erofs_pcluster, obj);
 			err = -EEXIST;
 			goto err_out;
 		}
 	}
 	/* used to check tail merging loop due to corrupted images */
-	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
-		clt->tailpcl = pcl;
-	clt->owned_head = &pcl->next;
-	clt->pcl = pcl;
-	clt->cl = cl;
+	if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+		fe->tailpcl = pcl;
+	fe->owned_head = &pcl->next;
+	fe->pcl = pcl;
+	fe->cl = cl;
 	return 0;
 
 err_out:
@@ -532,18 +524,18 @@ err_out:
 	return err;
 }
 
-static int z_erofs_collector_begin(struct z_erofs_collector *clt,
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
 				   struct inode *inode,
 				   struct erofs_map_blocks *map)
 {
 	struct erofs_workgroup *grp;
 	int ret;
 
-	DBG_BUGON(clt->cl);
+	DBG_BUGON(fe->cl);
 
 	/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
-	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
-	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+	DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
+	DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
 
 	if (map->m_flags & EROFS_MAP_META) {
 		if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
@@ -555,28 +547,28 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
 
 	grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
 	if (grp) {
-		clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+		fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
 	} else {
 tailpacking:
-		ret = z_erofs_register_collection(clt, inode, map);
+		ret = z_erofs_register_collection(fe, inode, map);
 		if (!ret)
 			goto out;
 		if (ret != -EEXIST)
 			return ret;
 	}
 
-	ret = z_erofs_lookup_collection(clt, inode, map);
+	ret = z_erofs_lookup_collection(fe, inode, map);
 	if (ret) {
-		erofs_workgroup_put(&clt->pcl->obj);
+		erofs_workgroup_put(&fe->pcl->obj);
 		return ret;
 	}
 
 out:
-	z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
-				  clt->cl->pagevec, clt->cl->vcnt);
+	z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
+				  fe->cl->pagevec, fe->cl->vcnt);
 	/* since file-backed online pages are traversed in reverse order */
-	clt->icpage_ptr = clt->pcl->compressed_pages +
-			z_erofs_pclusterpages(clt->pcl);
+	fe->icpage_ptr = fe->pcl->compressed_pages +
+			z_erofs_pclusterpages(fe->pcl);
 	return 0;
 }
 
@@ -610,24 +602,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl)
 	erofs_workgroup_put(&pcl->obj);
 }
 
-static bool z_erofs_collector_end(struct z_erofs_collector *clt)
+static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
 {
-	struct z_erofs_collection *cl = clt->cl;
+	struct z_erofs_collection *cl = fe->cl;
 
 	if (!cl)
 		return false;
 
-	z_erofs_pagevec_ctor_exit(&clt->vector, false);
+	z_erofs_pagevec_ctor_exit(&fe->vector, false);
 	mutex_unlock(&cl->lock);
 
 	/*
 	 * if all pending pages are added, don't hold its reference
 	 * any longer if the pcluster isn't hosted by ourselves.
 	 */
-	if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+	if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
 		z_erofs_collection_put(cl);
 
-	clt->cl = NULL;
+	fe->cl = NULL;
 	return true;
 }
 
@@ -651,7 +643,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 	struct inode *const inode = fe->inode;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 	struct erofs_map_blocks *const map = &fe->map;
-	struct z_erofs_collector *const clt = &fe->clt;
 	const loff_t offset = page_offset(page);
 	bool tight = true;
 
@@ -672,7 +663,7 @@ repeat:
 	if (offset + cur >= map->m_la &&
 	    offset + cur < map->m_la + map->m_llen) {
 		/* didn't get a valid collection previously (very rare) */
-		if (!clt->cl)
+		if (!fe->cl)
 			goto restart_now;
 		goto hitted;
 	}
@@ -680,7 +671,7 @@ repeat:
 	/* go ahead the next map_blocks */
 	erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
 
-	if (z_erofs_collector_end(clt))
+	if (z_erofs_collector_end(fe))
 		fe->backmost = false;
 
 	map->m_la = offset + cur;
@@ -693,11 +684,11 @@ restart_now:
 	if (!(map->m_flags & EROFS_MAP_MAPPED))
 		goto hitted;
 
-	err = z_erofs_collector_begin(clt, inode, map);
+	err = z_erofs_collector_begin(fe, inode, map);
 	if (err)
 		goto err_out;
 
-	if (z_erofs_is_inline_pcluster(clt->pcl)) {
+	if (z_erofs_is_inline_pcluster(fe->pcl)) {
 		void *mp;
 
 		mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
@@ -709,8 +700,8 @@ restart_now:
 			goto err_out;
 		}
 		get_page(fe->map.buf.page);
-		WRITE_ONCE(clt->pcl->compressed_pages[0], fe->map.buf.page);
-		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+		WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
+		fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
 	} else {
 		/* preload all compressed pages (can change mode if needed) */
 		if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
@@ -719,7 +710,7 @@ restart_now:
 		else
 			cache_strategy = DONTALLOC;
 
-		preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+		preload_compressed_pages(fe, MNGD_MAPPING(sbi),
 					 cache_strategy, pagepool);
 	}
 
@@ -730,8 +721,8 @@ hitted:
 	 * those chains are handled asynchronously thus the page cannot be used
 	 * for inplace I/O or pagevec (should be processed in strict order.)
 	 */
-	tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
-		  clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+	tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
+		  fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
 
 	cur = end - min_t(unsigned int, offset + end - map->m_la, end);
 	if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -746,18 +737,18 @@ hitted:
 				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
 
 	if (cur)
-		tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+		tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
 
 retry:
-	err = z_erofs_attach_page(clt, page, page_type,
-				  clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+	err = z_erofs_attach_page(fe, page, page_type,
+				  fe->mode >= COLLECT_PRIMARY_FOLLOWED);
 	/* should allocate an additional short-lived page for pagevec */
 	if (err == -EAGAIN) {
 		struct page *const newpage =
 				alloc_page(GFP_NOFS | __GFP_NOFAIL);
 
 		set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
-		err = z_erofs_attach_page(clt, newpage,
+		err = z_erofs_attach_page(fe, newpage,
 					  Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
 		if (!err)
 			goto retry;
@@ -773,7 +764,7 @@ retry:
 	/* bump up the number of spiltted parts of a page */
 	++spiltted;
 	/* also update nr_pages */
-	clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
+	fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1);
 next_part:
 	/* can be used for verification */
 	map->m_llen = offset + cur - map->m_la;
@@ -1309,7 +1300,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
 	z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
 	struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
 	void *bi_private;
-	z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
+	z_erofs_next_pcluster_t owned_head = f->owned_head;
 	/* bio is NULL initially, so no need to initialize last_{index,bdev} */
 	pgoff_t last_index;
 	struct block_device *last_bdev;
@@ -1417,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb,
 {
 	struct z_erofs_decompressqueue io[NR_JOBQUEUES];
 
-	if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
+	if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
 		return;
 	z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
 
@@ -1517,7 +1508,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 	err = z_erofs_do_read_page(&f, page, &pagepool);
 	z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
 
-	(void)z_erofs_collector_end(&f.clt);
+	(void)z_erofs_collector_end(&f);
 
 	/* if some compressed cluster ready, need submit them anyway */
 	z_erofs_runqueue(inode->i_sb, &f, &pagepool,
@@ -1567,7 +1558,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		put_page(page);
 	}
 	z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
-	(void)z_erofs_collector_end(&f.clt);
+	(void)z_erofs_collector_end(&f);
 
 	z_erofs_runqueue(inode->i_sb, &f, &pagepool,
 			 z_erofs_get_sync_decompress_policy(sbi, nr_pages));
-- 
GitLab


From 6f39d1e1ca46782bf11b8de016e904793d46aed0 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 2 Mar 2022 03:49:51 +0800
Subject: [PATCH 1474/1586] erofs: clean up preload_compressed_pages()

Rename preload_compressed_pages() as z_erofs_bind_cache()
since we're trying to prepare for adapting folios.

Also, add a comment for the gfp setting. No logic changes.

Link: https://lore.kernel.org/r/20220301194951.106227-2-hsiangkao@linux.alibaba.com
Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 2673fc105861f..59aecf42e45c9 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -219,13 +219,17 @@ struct z_erofs_decompress_frontend {
 static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
 static DEFINE_MUTEX(z_pagemap_global_lock);
 
-static void preload_compressed_pages(struct z_erofs_decompress_frontend *fe,
-				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type,
-				     struct page **pagepool)
+static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
+			       enum z_erofs_cache_alloctype type,
+			       struct page **pagepool)
 {
+	struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
 	struct z_erofs_pcluster *pcl = fe->pcl;
 	bool standalone = true;
+	/*
+	 * optimistic allocation without direct reclaim since inplace I/O
+	 * can be used if low memory otherwise.
+	 */
 	gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
 			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 	struct page **pages;
@@ -703,17 +707,15 @@ restart_now:
 		WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
 		fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
 	} else {
-		/* preload all compressed pages (can change mode if needed) */
+		/* bind cache first when cached decompression is preferred */
 		if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
 					       map->m_la))
 			cache_strategy = TRYALLOC;
 		else
 			cache_strategy = DONTALLOC;
 
-		preload_compressed_pages(fe, MNGD_MAPPING(sbi),
-					 cache_strategy, pagepool);
+		z_erofs_bind_cache(fe, cache_strategy, pagepool);
 	}
-
 hitted:
 	/*
 	 * Ensure the current partial page belongs to this submit chain rather
-- 
GitLab


From faac509507e2341093c32b6607a82430bccfb78f Mon Sep 17 00:00:00 2001
From: lihongnan <hongnan.lhn@alibaba-inc.com>
Date: Tue, 8 Mar 2022 11:41:39 +0800
Subject: [PATCH 1475/1586] Documentation/filesystem/dax: update DAX
 description on erofs

Add missing erofs fsdax description since fsdax has been supported
on erofs from Linux 5.15.

Link: https://lore.kernel.org/r/20220308034139.93748-1-hongnan.li@linux.alibaba.com
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Acked-by: Chao Yu <chao@kernel.org>
Signed-off-by: lihongnan <hongnan.lhn@alibaba-inc.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 Documentation/filesystems/dax.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/dax.rst b/Documentation/filesystems/dax.rst
index e3b30429d703d..c04609d8ee243 100644
--- a/Documentation/filesystems/dax.rst
+++ b/Documentation/filesystems/dax.rst
@@ -23,11 +23,11 @@ on it as usual.  The `DAX` code currently only supports files with a block
 size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block
 size when creating the filesystem.
 
-Currently 4 filesystems support `DAX`: ext2, ext4, xfs and virtiofs.
+Currently 5 filesystems support `DAX`: ext2, ext4, xfs, virtiofs and erofs.
 Enabling `DAX` on them is different.
 
-Enabling DAX on ext2
---------------------
+Enabling DAX on ext2 and erofs
+------------------------------
 
 When mounting the filesystem, use the ``-o dax`` option on the command line or
 add 'dax' to the options in ``/etc/fstab``.  This works to enable `DAX` on all files
-- 
GitLab


From d467e980d0239fd95cc93b0995199973624a4825 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Fri, 11 Mar 2022 01:34:48 +0800
Subject: [PATCH 1476/1586] erofs: silence warnings related to impossible
 m_plen

Dan reported two smatch warnings [1],
.. warn: should '1 << lclusterbits' be a 64 bit type?
.. warn: should 'm->compressedlcs << lclusterbits' be a 64 bit type?

In practice, m_plen cannot be more than 1MiB due to on-disk constraint
for the compression mode, so we're always safe here.

In order to make static analyzers happy and not report again, let's
silence them instead.

[1] https://lore.kernel.org/r/202203091002.lJVzsX6e-lkp@intel.com

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20220310173448.19962-1-hsiangkao@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 361b1d6e4bf9e..b4059b9c3bacf 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -494,7 +494,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
 	    ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
 	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
-		map->m_plen = 1 << lclusterbits;
+		map->m_plen = 1ULL << lclusterbits;
 		return 0;
 	}
 	lcn = m->lcn + 1;
@@ -540,7 +540,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 		return -EFSCORRUPTED;
 	}
 out:
-	map->m_plen = m->compressedlcs << lclusterbits;
+	map->m_plen = (u64)m->compressedlcs << lclusterbits;
 	return 0;
 err_bonus_cblkcnt:
 	erofs_err(m->inode->i_sb,
-- 
GitLab


From 886e44c9298a6b428ae046e2fa092ca52e822e6a Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Mon, 14 Mar 2022 10:01:25 +0800
Subject: [PATCH 1477/1586] hv_netvsc: Add check for kvmalloc_array

As the potential failure of the kvmalloc_array(),
it should be better to check and restore the 'data'
if fails in order to avoid the dereference of the
NULL pointer.

Fixes: 6ae746711263 ("hv_netvsc: Add per-cpu ethtool stats for netvsc")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Link: https://lore.kernel.org/r/20220314020125.2365084-1-jiasheng@iscas.ac.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/hyperv/netvsc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3646469433b1c..fde1c492ca02a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	pcpu_sum = kvmalloc_array(num_possible_cpus(),
 				  sizeof(struct netvsc_ethtool_pcpu_stats),
 				  GFP_KERNEL);
+	if (!pcpu_sum)
+		return;
+
 	netvsc_get_pcpu_stats(dev, pcpu_sum);
 	for_each_present_cpu(cpu) {
 		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
-- 
GitLab


From b739e137052069c996ab8b3bc9c25be501ecc63b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Mar 2022 12:58:06 +0100
Subject: [PATCH 1478/1586] nvme: cleanup how disk->disk_name is assigned

They way how assigning the disk name and commenting on why it is done
is split over core.c and multipath.c seems to be rather confusing.

Now that ns_head->disk always exists we can do all the work in core.c
and have a single big comment explaining the issues.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c      | 22 ++++++++++++++++++----
 drivers/nvme/host/multipath.c | 24 +-----------------------
 drivers/nvme/host/nvme.h      |  8 ++------
 3 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f8084ded69e50..fd9878671d734 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3967,13 +3967,27 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 		goto out_cleanup_disk;
 
 	/*
-	 * Without the multipath code enabled, multiple controller per
-	 * subsystems are visible as devices and thus we cannot use the
-	 * subsystem instance.
+	 * If multipathing is enabled, the device name for all disks and not
+	 * just those that represent shared namespaces needs to be based on the
+	 * subsystem instance.  Using the controller instance for private
+	 * namespaces could lead to naming collisions between shared and private
+	 * namespaces if they don't use a common numbering scheme.
+	 *
+	 * If multipathing is not enabled, disk names must use the controller
+	 * instance as shared namespaces will show up as multiple block
+	 * devices.
 	 */
-	if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags))
+	if (ns->head->disk) {
+		sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+			ctrl->instance, ns->head->instance);
+		disk->flags |= GENHD_FL_HIDDEN;
+	} else if (multipath) {
+		sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance,
+			ns->head->instance);
+	} else {
 		sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
 			ns->head->instance);
+	}
 
 	if (nvme_update_ns_info(ns, id))
 		goto out_unlink_ns;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d13b81cd6225c..c97d7f843977c 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -9,7 +9,7 @@
 #include <trace/events/block.h>
 #include "nvme.h"
 
-static bool multipath = true;
+bool multipath = true;
 module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
 	"turn on native support for multiple controllers per subsystem");
@@ -80,28 +80,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
 			blk_freeze_queue_start(h->disk->queue);
 }
 
-/*
- * If multipathing is enabled we need to always use the subsystem instance
- * number for numbering our devices to avoid conflicts between subsystems that
- * have multiple controllers and thus use the multipath-aware subsystem node
- * and those that have a single controller and use the controller node
- * directly.
- */
-bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags)
-{
-	if (!multipath)
-		return false;
-	if (!ns->head->disk) {
-		sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance,
-			ns->head->instance);
-		return true;
-	}
-	sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance,
-		ns->ctrl->instance, ns->head->instance);
-	*flags = GENHD_FL_HIDDEN;
-	return true;
-}
-
 void nvme_failover_req(struct request *req)
 {
 	struct nvme_ns *ns = req->q->queuedata;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 587d92df118b7..47badb76c654c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -770,7 +770,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
 void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
-bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags);
 void nvme_failover_req(struct request *req);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
@@ -793,20 +792,17 @@ static inline void nvme_trace_bio_complete(struct request *req)
 		trace_block_bio_complete(ns->head->disk->queue, req->bio);
 }
 
+extern bool multipath;
 extern struct device_attribute dev_attr_ana_grpid;
 extern struct device_attribute dev_attr_ana_state;
 extern struct device_attribute subsys_attr_iopolicy;
 
 #else
+#define multipath false
 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 {
 	return false;
 }
-static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name,
-		int *flags)
-{
-	return false;
-}
 static inline void nvme_failover_req(struct request *req)
 {
 }
-- 
GitLab


From e559398f47e090394bbbd9006349c858e1ba80da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Mar 2022 15:53:59 +0100
Subject: [PATCH 1479/1586] nvme: remove nvme_alloc_request and
 nvme_alloc_request_qid

Just open code the allocation + initialization in the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
---
 drivers/nvme/host/core.c       | 46 ++++++++--------------------------
 drivers/nvme/host/ioctl.c      |  3 ++-
 drivers/nvme/host/nvme.h       |  8 ++++--
 drivers/nvme/host/pci.c        | 17 +++++++------
 drivers/nvme/target/passthru.c |  3 ++-
 5 files changed, 31 insertions(+), 46 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fd9878671d734..c868015c1c96b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -639,13 +639,8 @@ static inline void nvme_clear_nvme_request(struct request *req)
 	req->rq_flags |= RQF_DONTPREP;
 }
 
-static inline unsigned int nvme_req_op(struct nvme_command *cmd)
-{
-	return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
-}
-
-static inline void nvme_init_request(struct request *req,
-		struct nvme_command *cmd)
+/* initialize a passthrough request */
+void nvme_init_request(struct request *req, struct nvme_command *cmd)
 {
 	if (req->q->queuedata)
 		req->timeout = NVME_IO_TIMEOUT;
@@ -661,30 +656,7 @@ static inline void nvme_init_request(struct request *req,
 	nvme_clear_nvme_request(req);
 	memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
 }
-
-struct request *nvme_alloc_request(struct request_queue *q,
-		struct nvme_command *cmd, blk_mq_req_flags_t flags)
-{
-	struct request *req;
-
-	req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
-	if (!IS_ERR(req))
-		nvme_init_request(req, cmd);
-	return req;
-}
-EXPORT_SYMBOL_GPL(nvme_alloc_request);
-
-static struct request *nvme_alloc_request_qid(struct request_queue *q,
-		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
-{
-	struct request *req;
-
-	req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
-			qid ? qid - 1 : 0);
-	if (!IS_ERR(req))
-		nvme_init_request(req, cmd);
-	return req;
-}
+EXPORT_SYMBOL_GPL(nvme_init_request);
 
 /*
  * For something we're not in a state to send to the device the default action
@@ -1110,11 +1082,14 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	int ret;
 
 	if (qid == NVME_QID_ANY)
-		req = nvme_alloc_request(q, cmd, flags);
+		req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
 	else
-		req = nvme_alloc_request_qid(q, cmd, flags, qid);
+		req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
+						qid ? qid - 1 : 0);
+
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	nvme_init_request(req, cmd);
 
 	if (timeout)
 		req->timeout = timeout;
@@ -1304,14 +1279,15 @@ static void nvme_keep_alive_work(struct work_struct *work)
 		return;
 	}
 
-	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
-				BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+	rq = blk_mq_alloc_request(ctrl->admin_q, nvme_req_op(&ctrl->ka_cmd),
+				  BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(rq)) {
 		/* allocation failure, reset the controller */
 		dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq));
 		nvme_reset_ctrl(ctrl);
 		return;
 	}
+	nvme_init_request(rq, &ctrl->ka_cmd);
 
 	rq->timeout = ctrl->kato * HZ;
 	rq->end_io_data = ctrl;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index aaf3dfad26574..554566371ffa4 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -66,9 +66,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 	void *meta = NULL;
 	int ret;
 
-	req = nvme_alloc_request(q, cmd, 0);
+	req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	nvme_init_request(req, cmd);
 
 	if (timeout)
 		req->timeout = timeout;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 47badb76c654c..1ea908d43e174 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -698,9 +698,13 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl);
 int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
 void nvme_start_freeze(struct nvme_ctrl *ctrl);
 
+static inline unsigned int nvme_req_op(struct nvme_command *cmd)
+{
+	return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
+}
+
 #define NVME_QID_ANY -1
-struct request *nvme_alloc_request(struct request_queue *q,
-		struct nvme_command *cmd, blk_mq_req_flags_t flags);
+void nvme_init_request(struct request *req, struct nvme_command *cmd);
 void nvme_cleanup_cmd(struct request *req);
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
 blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6a99ed6809158..9f4f3884fefe9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -424,8 +424,9 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
-static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
-		unsigned int hctx_idx, unsigned int numa_node)
+static int nvme_pci_init_request(struct blk_mq_tag_set *set,
+		struct request *req, unsigned int hctx_idx,
+		unsigned int numa_node)
 {
 	struct nvme_dev *dev = set->driver_data;
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1428,12 +1429,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		"I/O %d QID %d timeout, aborting\n",
 		 req->tag, nvmeq->qid);
 
-	abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
-			BLK_MQ_REQ_NOWAIT);
+	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
+					 BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(abort_req)) {
 		atomic_inc(&dev->ctrl.abort_limit);
 		return BLK_EH_RESET_TIMER;
 	}
+	nvme_init_request(abort_req, &cmd);
 
 	abort_req->end_io_data = NULL;
 	blk_execute_rq_nowait(abort_req, false, abort_endio);
@@ -1722,7 +1724,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
-	.init_request	= nvme_init_request,
+	.init_request	= nvme_pci_init_request,
 	.timeout	= nvme_timeout,
 };
 
@@ -1732,7 +1734,7 @@ static const struct blk_mq_ops nvme_mq_ops = {
 	.complete	= nvme_pci_complete_rq,
 	.commit_rqs	= nvme_commit_rqs,
 	.init_hctx	= nvme_init_hctx,
-	.init_request	= nvme_init_request,
+	.init_request	= nvme_pci_init_request,
 	.map_queues	= nvme_pci_map_queues,
 	.timeout	= nvme_timeout,
 	.poll		= nvme_poll,
@@ -2475,9 +2477,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 	cmd.delete_queue.opcode = opcode;
 	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 
-	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+	req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	nvme_init_request(req, &cmd);
 
 	req->end_io_data = nvmeq;
 
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index a810bf569fff8..a4de1e0d518b5 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -254,11 +254,12 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
 		timeout = nvmet_req_subsys(req)->admin_timeout;
 	}
 
-	rq = nvme_alloc_request(q, req->cmd, 0);
+	rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0);
 	if (IS_ERR(rq)) {
 		status = NVME_SC_INTERNAL;
 		goto out_put_ns;
 	}
+	nvme_init_request(rq, req->cmd);
 
 	if (timeout)
 		rq->timeout = timeout;
-- 
GitLab


From e1bca853dddcb57cbf084acbf35e3654cef6fc75 Mon Sep 17 00:00:00 2001
From: Rabara Niravkumar L <niravkumar.l.rabara@intel.com>
Date: Sat, 5 Mar 2022 09:41:18 +0800
Subject: [PATCH 1480/1586] EDAC/altera: Add SDRAM ECC check for U-Boot

A bug in legacy U-Boot causes a crash during SDRAM boot if ECC is not
enabled in the bitstream but enabled in the Linux config.

Memory mapped read of the ECC Enabled bit was only enabled if U-Boot
determined ECC was enabled in the bitstream.

The Linux driver checks the ECC enable bit using a memory map read.
In the ECC disabled bitstream case, U-Boot didn't enable ECC register
memory map reads and since they are not allowed this results in a crash.

Always read the ECC Enable register through a SMC call which is always
allowed and it works with legacy and current U-Boot.

  [ bp: Massage commit message. ]

Signed-off-by: Rabara Niravkumar L <niravkumar.l.rabara@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dinh Nguyen <dinguyen@kernel.org>
Link: https://lore.kernel.org/r/20220305014118.4794-1-niravkumar.l.rabara@intel.com
---
 drivers/edac/altera_edac.c | 40 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 3a6d2416cb0f6..1c62e035e0054 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
 
 #ifdef CONFIG_EDAC_ALTERA_SDRAM
 
+/*
+ * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable
+ * register if ECC is enabled. Linux checks the ECC Enable register to
+ * determine ECC status.
+ * Use an SMC call (which always works) to determine ECC enablement.
+ */
+static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+	const struct edac_device_prv_data *prv = device->data;
+	unsigned long sdram_ecc_addr;
+	struct arm_smccc_res result;
+	struct device_node *np;
+	phys_addr_t sdram_addr;
+	u32 read_reg;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl");
+	if (!np)
+		goto sdram_err;
+
+	sdram_addr = of_translate_address(np, of_get_address(np, 0,
+							     NULL, NULL));
+	of_node_put(np);
+	sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst;
+	arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr,
+		      0, 0, 0, 0, 0, 0, &result);
+	read_reg = (unsigned int)result.a1;
+	ret = (int)result.a0;
+	if (!ret && (read_reg & prv->ecc_enable_mask))
+		return 0;
+
+sdram_err:
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s: No ECC present or ECC disabled.\n",
+		    device->edac_dev_name);
+	return -ENODEV;
+}
+
 static const struct edac_device_prv_data s10_sdramecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = altr_s10_sdram_check_ecc_deps,
 	.ce_clear_mask = ALTR_S10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_S10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_S10_ECC_EN,
-- 
GitLab


From d4c9df20a37d128f6acb3c6286db7e694554a51b Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Mon, 14 Mar 2022 09:25:18 +0100
Subject: [PATCH 1481/1586] x86/nmi: Remove the 'strange power saving mode'
 hint from unknown NMI handler

The

  Do you have a strange power saving mode enabled?

hint when unknown NMI happens dates back to i386 stone age, and isn't
currently really helpful.

Unknown NMIs are coming for many different reasons (broken firmware,
faulty hardware, ...) and rarely have anything to do with 'strange power
saving mode' (whatever that even is).

Just remove it as it's largerly misleading.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2203140924120.24795@cbobk.fhfr.pm
---
 arch/x86/kernel/nmi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4bce802d25fb1..e73f7df362f5d 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -292,7 +292,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 		 reason, smp_processor_id());
 
-	pr_emerg("Do you have a strange power saving mode enabled?\n");
 	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		nmi_panic(regs, "NMI: Not continuing");
 
-- 
GitLab


From 462ccc35a750f335c8456cde9120b8b593fff60f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Mar 2022 11:23:05 +0100
Subject: [PATCH 1482/1586] Revert "ACPI: scan: Do not add device IDs from _CID
 if _HID is not valid"

Revert commit e38f9ff63e6d ("ACPI: scan: Do not add device IDs from _CID
if _HID is not valid"), because it has introduced regressions on
multiple systems, even though it only has effect on clearly invalid
firmware.

Reported-by: Pierre-Louis Bossart <notifications@github.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1331756d4cfce..8b2e5ef155595 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1377,11 +1377,11 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
 		if (info->valid & ACPI_VALID_HID) {
 			acpi_add_id(pnp, info->hardware_id.string);
 			pnp->type.platform_id = 1;
-			if (info->valid & ACPI_VALID_CID) {
-				cid_list = &info->compatible_id_list;
-				for (i = 0; i < cid_list->count; i++)
-					acpi_add_id(pnp, cid_list->ids[i].string);
-			}
+		}
+		if (info->valid & ACPI_VALID_CID) {
+			cid_list = &info->compatible_id_list;
+			for (i = 0; i < cid_list->count; i++)
+				acpi_add_id(pnp, cid_list->ids[i].string);
 		}
 		if (info->valid & ACPI_VALID_ADR) {
 			pnp->bus_address = info->address;
-- 
GitLab


From 45b4eb7ee6aa1a55a50831b328aa5f46ac3a7187 Mon Sep 17 00:00:00 2001
From: Kalle Valo <quic_kvalo@quicinc.com>
Date: Tue, 15 Mar 2022 17:54:55 +0200
Subject: [PATCH 1483/1586] Revert "ath10k: drop beacon and probe response
 which leak from other channel"

This reverts commit 3bf2537ec2e33310b431b53fd84be8833736c256.

I was reported privately that this commit breaks AP and mesh mode on QCA9984
(firmware 10.4-3.9.0.2-00156). So revert the commit to fix the regression.

There was a conflict due to cfg80211 API changes but that was easy to fix.

Fixes: 3bf2537ec2e3 ("ath10k: drop beacon and probe response which leak from other channel")
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://lore.kernel.org/r/20220315155455.20446-1-kvalo@kernel.org
---
 drivers/net/wireless/ath/ath10k/wmi.c | 33 +--------------------------
 1 file changed, 1 insertion(+), 32 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 62c453a21e49e..7c1c2658cb5f8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
 		ath10k_mac_handle_beacon(ar, skb);
 
 	if (ieee80211_is_beacon(hdr->frame_control) ||
-	    ieee80211_is_probe_resp(hdr->frame_control)) {
-		struct ieee80211_mgmt *mgmt = (void *)skb->data;
-		enum cfg80211_bss_frame_type ftype;
-		u8 *ies;
-		int ies_ch;
-
+	    ieee80211_is_probe_resp(hdr->frame_control))
 		status->boottime_ns = ktime_get_boottime_ns();
 
-		if (!ar->scan_channel)
-			goto drop;
-
-		ies = mgmt->u.beacon.variable;
-
-		if (ieee80211_is_beacon(mgmt->frame_control))
-			ftype = CFG80211_BSS_FTYPE_BEACON;
-		else
-			ftype = CFG80211_BSS_FTYPE_PRESP;
-
-		ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable,
-							 skb_tail_pointer(skb) - ies,
-							 sband->band, ftype);
-
-		if (ies_ch > 0 && ies_ch != channel) {
-			ath10k_dbg(ar, ATH10K_DBG_MGMT,
-				   "channel mismatched ds channel %d scan channel %d\n",
-				   ies_ch, channel);
-			goto drop;
-		}
-	}
-
 	ath10k_dbg(ar, ATH10K_DBG_MGMT,
 		   "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
 		   skb, skb->len,
@@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
 	ieee80211_rx_ni(ar->hw, skb);
 
 	return 0;
-
-drop:
-	dev_kfree_skb(skb);
-	return 0;
 }
 
 static int freq_to_idx(struct ath10k *ar, int freq)
-- 
GitLab


From 052ebf1fbb1cab86b145a68d80219c8c57321cbd Mon Sep 17 00:00:00 2001
From: Dylan Yudaken <dylany@fb.com>
Date: Wed, 16 Mar 2022 02:52:04 -0700
Subject: [PATCH 1484/1586] io_uring: make tracing format consistent

Make the tracing formatting for user_data and flags consistent.

Having consistent formatting allows one for example to grep for a specific
user_data/flags and be able to trace a single sqe through easily.

Change user_data to 0x%llx and flags to 0x%x everywhere. The '0x' is
useful to disambiguate for example "user_data 100".

Additionally remove the '=' for flags in io_uring_req_failed, again for consistency.

Signed-off-by: Dylan Yudaken <dylany@fb.com>
Link: https://lore.kernel.org/r/20220316095204.2191498-1-dylany@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/io_uring.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 18d4341c581c3..cddf5b6fbeb45 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -44,7 +44,7 @@ TRACE_EVENT(io_uring_create,
 		__entry->flags		= flags;
 	),
 
-	TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d",
+	TP_printk("ring %p, fd %d sq size %d, cq size %d, flags 0x%x",
 			  __entry->ctx, __entry->fd, __entry->sq_entries,
 			  __entry->cq_entries, __entry->flags)
 );
@@ -125,7 +125,7 @@ TRACE_EVENT(io_uring_file_get,
 		__entry->fd		= fd;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, fd %d",
+	TP_printk("ring %p, req %p, user_data 0x%llx, fd %d",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->fd)
 );
 
@@ -169,7 +169,7 @@ TRACE_EVENT(io_uring_queue_async_work,
 		__entry->rw		= rw;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d, flags %d, %s queue, work %p",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
 );
@@ -205,7 +205,7 @@ TRACE_EVENT(io_uring_defer,
 		__entry->opcode	= opcode;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
 		__entry->ctx, __entry->req, __entry->data, __entry->opcode)
 );
 
@@ -305,7 +305,7 @@ TRACE_EVENT(io_uring_fail_link,
 		__entry->link		= link;
 	),
 
-	TP_printk("ring %p, request %p, user_data %llu, opcode %d, link %p",
+	TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->link)
 );
@@ -342,9 +342,9 @@ TRACE_EVENT(io_uring_complete,
 		__entry->cflags		= cflags;
 	),
 
-	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags %x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
 		__entry->ctx, __entry->req,
-		(unsigned long long)__entry->user_data,
+		__entry->user_data,
 		__entry->res, __entry->cflags)
 );
 
@@ -389,7 +389,7 @@ TRACE_EVENT(io_uring_submit_sqe,
 		__entry->sq_thread	= sq_thread;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, flags %u, "
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
 		  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
 		  __entry->user_data, __entry->opcode,
 		  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
@@ -433,7 +433,7 @@ TRACE_EVENT(io_uring_poll_arm,
 		__entry->events		= events;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask 0x%x, events 0x%x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
 		  __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		  __entry->mask, __entry->events)
 );
@@ -470,7 +470,7 @@ TRACE_EVENT(io_uring_task_add,
 		__entry->mask		= mask;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, opcode %d, mask %x",
+	TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
 		__entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
 		__entry->mask)
 );
@@ -529,8 +529,8 @@ TRACE_EVENT(io_uring_req_failed,
 		__entry->error		= error;
 	),
 
-	TP_printk("ring %p, req %p, user_data %llu, "
-		"op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+	TP_printk("ring %p, req %p, user_data 0x%llx, "
+		"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
 		  "len=%u, rw_flags=0x%x, buf_index=%d, "
 		  "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
 		  __entry->ctx, __entry->req, __entry->user_data,
-- 
GitLab


From 8ef22dc4a7cbef5870e612edfbf9cd058ddaaa50 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 15 Mar 2022 22:15:39 +0000
Subject: [PATCH 1485/1586] block/bfq-iosched: Fix spelling mistake "tenative"
 -> "tentative"

There is a spelling mistake in a bfq_log_bfqq message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20220315221539.2959167-1-colin.i.king@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 975fd30a07c93..3376ad03823e3 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2153,7 +2153,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfqq->waker_detection_started = now_ns;
 		bfq_bfqq_name(bfqq->tentative_waker_bfqq, waker_name,
 			      MAX_BFQQ_NAME_LENGTH);
-		bfq_log_bfqq(bfqd, bfqq, "set tenative waker %s", waker_name);
+		bfq_log_bfqq(bfqd, bfqq, "set tentative waker %s", waker_name);
 	} else /* Same tentative waker queue detected again */
 		bfqq->num_waker_detections++;
 
-- 
GitLab


From 6061806a863e8b65b109eb06a280041cc7525442 Mon Sep 17 00:00:00 2001
From: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Date: Tue, 1 Feb 2022 12:36:43 +0100
Subject: [PATCH 1486/1586] drm/imx: parallel-display: Remove bus flags check
 in imx_pd_bridge_atomic_check()

If display timings were read from the devicetree using
of_get_display_timing() and pixelclk-active is defined
there, the flag DISPLAY_FLAGS_SYNC_POSEDGE/NEGEDGE is
automatically generated. Through the function
drm_bus_flags_from_videomode() e.g. called in the
panel-simple driver this flag got into the bus flags,
but then in imx_pd_bridge_atomic_check() the bus flag
check failed and will not initialize the display. The
original commit fe141cedc433 does not explain why this
check was introduced. So remove the bus flags check,
because it stops the initialization of the display with
valid bus flags.

Fixes: fe141cedc433 ("drm/imx: pd: Use bus format/flags provided by the bridge when available")
Signed-off-by: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Cc: Marek Vasut <marex@denx.de>
Cc: Boris Brezillon <boris.brezillon@collabora.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: linux-arm-kernel@lists.infradead.org
To: dri-devel@lists.freedesktop.org
Tested-by: Max Krummenacher <max.krummenacher@toradex.com>
Acked-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20220201113643.4638-1-cniedermaier@dh-electronics.com
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/imx/parallel-display.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index a8aba0141ce71..06cb1a59b9bcd 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -217,14 +217,6 @@ static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge,
 	if (!imx_pd_format_supported(bus_fmt))
 		return -EINVAL;
 
-	if (bus_flags &
-	    ~(DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_DE_HIGH |
-	      DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE |
-	      DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)) {
-		dev_warn(imxpd->dev, "invalid bus_flags (%x)\n", bus_flags);
-		return -EINVAL;
-	}
-
 	bridge_state->output_bus_cfg.flags = bus_flags;
 	bridge_state->input_bus_cfg.flags = bus_flags;
 	imx_crtc_state->bus_flags = bus_flags;
-- 
GitLab


From fc1b6ef7bfb3d1d4df868b1c3e0480cacda6cd81 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 20 Feb 2022 05:07:18 +0100
Subject: [PATCH 1487/1586] drm/panel: simple: Fix Innolux G070Y2-L01 BPP
 settings

The Innolux G070Y2-L01 supports two modes of operation:
1) FRC=Low/NC ... MEDIA_BUS_FMT_RGB666_1X7X3_SPWG ... BPP=6
2) FRC=High ..... MEDIA_BUS_FMT_RGB888_1X7X4_SPWG ... BPP=8

Currently the panel description mixes both, BPP from 1) and bus
format from 2), which triggers a warning at panel-simple.c:615.

Pick the later, set bpp=8, fix the warning.

Fixes: a5d2ade627dca ("drm/panel: simple: Add support for Innolux G070Y2-L01")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Christoph Fritz <chf.fritz@googlemail.com>
Cc: Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
Cc: Maxime Ripard <maxime@cerno.tech>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220220040718.532866-1-marex@denx.de
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/panel/panel-simple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 3c08f9827acf1..b42c1d816e792 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2017,7 +2017,7 @@ static const struct display_timing innolux_g070y2_l01_timing = {
 static const struct panel_desc innolux_g070y2_l01 = {
 	.timings = &innolux_g070y2_l01_timing,
 	.num_timings = 1,
-	.bpc = 6,
+	.bpc = 8,
 	.size = {
 		.width = 152,
 		.height = 91,
-- 
GitLab


From ce8d78616a6b637d1b763eb18e32045687a84305 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Mar 2022 13:27:07 +0100
Subject: [PATCH 1488/1586] nvme: warn about shared namespaces without
 CONFIG_NVME_MULTIPATH

Start warning about exposing a namespace as multiple block devices,
and set a fixed deprecation release.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 drivers/block/loop.c     | 1 +
 drivers/nvme/host/core.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 8fb89d0624fc5..e733c48de2e90 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2092,6 +2092,7 @@ static void loop_remove(struct loop_device *lo)
 	del_gendisk(lo->lo_disk);
 	blk_cleanup_disk(lo->lo_disk);
 	blk_mq_free_tag_set(&lo->tag_set);
+
 	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, lo->lo_number);
 	mutex_unlock(&loop_ctl_mutex);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c868015c1c96b..3c0461129bcae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3855,6 +3855,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 					nsid);
 			goto out_put_ns_head;
 		}
+
+		if (!multipath && !list_empty(&head->list)) {
+			dev_warn(ctrl->device,
+				"Found shared namespace %d, but multipathing not supported.\n",
+				nsid);
+			dev_warn_once(ctrl->device,
+				"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
+		}
 	}
 
 	list_add_tail_rcu(&ns->siblings, &head->list);
-- 
GitLab


From ab474fccd04509db89fde8d3b28c39aa9a47db64 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Fri, 11 Mar 2022 02:27:42 +0800
Subject: [PATCH 1489/1586] erofs: clean up z_erofs_extent_lookback

Avoid the unnecessary tail recursion since it can be converted into
a loop directly in order to prevent potential stack overflow.

It's a pretty straightforward conversion.

Link: https://lore.kernel.org/r/20220310182743.102365-1-hsiangkao@linux.alibaba.com
Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zmap.c | 67 ++++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index b4059b9c3bacf..572f0b8151ba8 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -431,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
 				   unsigned int lookback_distance)
 {
 	struct erofs_inode *const vi = EROFS_I(m->inode);
-	struct erofs_map_blocks *const map = m->map;
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
-	unsigned long lcn = m->lcn;
-	int err;
 
-	if (lcn < lookback_distance) {
-		erofs_err(m->inode->i_sb,
-			  "bogus lookback distance @ nid %llu", vi->nid);
-		DBG_BUGON(1);
-		return -EFSCORRUPTED;
-	}
+	while (m->lcn >= lookback_distance) {
+		unsigned long lcn = m->lcn - lookback_distance;
+		int err;
 
-	/* load extent head logical cluster if needed */
-	lcn -= lookback_distance;
-	err = z_erofs_load_cluster_from_disk(m, lcn, false);
-	if (err)
-		return err;
+		/* load extent head logical cluster if needed */
+		err = z_erofs_load_cluster_from_disk(m, lcn, false);
+		if (err)
+			return err;
 
-	switch (m->type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
-		if (!m->delta[0]) {
+		switch (m->type) {
+		case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+			if (!m->delta[0]) {
+				erofs_err(m->inode->i_sb,
+					  "invalid lookback distance 0 @ nid %llu",
+					  vi->nid);
+				DBG_BUGON(1);
+				return -EFSCORRUPTED;
+			}
+			lookback_distance = m->delta[0];
+			continue;
+		case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+		case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
+		case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
+			m->headtype = m->type;
+			m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
+			return 0;
+		default:
 			erofs_err(m->inode->i_sb,
-				  "invalid lookback distance 0 @ nid %llu",
-				  vi->nid);
+				  "unknown type %u @ lcn %lu of nid %llu",
+				  m->type, lcn, vi->nid);
 			DBG_BUGON(1);
-			return -EFSCORRUPTED;
+			return -EOPNOTSUPP;
 		}
-		return z_erofs_extent_lookback(m, m->delta[0]);
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
-		m->headtype = m->type;
-		map->m_la = (lcn << lclusterbits) | m->clusterofs;
-		break;
-	default:
-		erofs_err(m->inode->i_sb,
-			  "unknown type %u @ lcn %lu of nid %llu",
-			  m->type, lcn, vi->nid);
-		DBG_BUGON(1);
-		return -EOPNOTSUPP;
 	}
-	return 0;
+
+	erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
+		  vi->nid);
+	DBG_BUGON(1);
+	return -EFSCORRUPTED;
 }
 
 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
-- 
GitLab


From 9f2731d6338a072b60ab1d9340847bde3306614b Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Fri, 11 Mar 2022 02:27:43 +0800
Subject: [PATCH 1490/1586] erofs: refine managed inode stuffs

Set up the correct gfp mask and use it instead of hard coding.
Also add comments about .invalidatepage() to show more details.

Link: https://lore.kernel.org/r/20220310182743.102365-2-hsiangkao@linux.alibaba.com
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 8 ++++++--
 fs/erofs/zdata.c | 7 +++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 12755217631f4..e178100c162a7 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -532,6 +532,11 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
 	return ret;
 }
 
+/*
+ * It will be called only on inode eviction. In case that there are still some
+ * decompression requests in progress, wait with rescheduling for a bit here.
+ * We could introduce an extra locking instead but it seems unnecessary.
+ */
 static void erofs_managed_cache_invalidatepage(struct page *page,
 					       unsigned int offset,
 					       unsigned int length)
@@ -565,8 +570,7 @@ static int erofs_init_managed_cache(struct super_block *sb)
 	inode->i_size = OFFSET_MAX;
 
 	inode->i_mapping->a_ops = &managed_cache_aops;
-	mapping_set_gfp_mask(inode->i_mapping,
-			     GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
+	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 	sbi->managed_cache = inode;
 	return 0;
 }
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 59aecf42e45c9..f5e17c03b2fb2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1091,10 +1091,10 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
 static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 					       unsigned int nr,
 					       struct page **pagepool,
-					       struct address_space *mc,
-					       gfp_t gfp)
+					       struct address_space *mc)
 {
 	const pgoff_t index = pcl->obj.index;
+	gfp_t gfp = mapping_gfp_mask(mc);
 	bool tocache = false;
 
 	struct address_space *mapping;
@@ -1350,8 +1350,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
 			struct page *page;
 
 			page = pickup_page_for_submission(pcl, i++, pagepool,
-							  MNGD_MAPPING(sbi),
-							  GFP_NOFS);
+							  MNGD_MAPPING(sbi));
 			if (!page)
 				continue;
 
-- 
GitLab


From a942da24abc5839c11a8fc2a4b7cb268ea94ba54 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Tue, 15 Mar 2022 21:28:14 +0800
Subject: [PATCH 1491/1586] fs: erofs: add sanity check for kobject in
 erofs_unregister_sysfs

Syzkaller hit 'WARNING: kobject bug in erofs_unregister_sysfs'. This bug
is triggered by injecting fault in kobject_init_and_add of
erofs_unregister_sysfs.

Fix this by adding sanity check for kobject in erofs_unregister_sysfs

Note that I've tested the patch and the crash does not occur any more.

Link: https://lore.kernel.org/r/20220315132814.12332-1-dzm91@hust.edu.cn
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Fixes: 168e9a76200c ("erofs: add sysfs interface")
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/sysfs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index dac252bc92281..f3babf1e66083 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb)
 {
 	struct erofs_sb_info *sbi = EROFS_SB(sb);
 
-	kobject_del(&sbi->s_kobj);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
+	if (sbi->s_kobj.state_in_sysfs) {
+		kobject_del(&sbi->s_kobj);
+		kobject_put(&sbi->s_kobj);
+		wait_for_completion(&sbi->s_kobj_unregister);
+	}
 }
 
 int __init erofs_init_sysfs(void)
-- 
GitLab


From fe5de5859d44eafd5828ec4910cd52cdad8ada1f Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 16 Mar 2022 09:22:45 +0800
Subject: [PATCH 1492/1586] erofs: use meta buffers for reading directories

Previously, directory inodes are directly handled with page cache
interfaces.

In order to support sub-page directory blocks and folios, let's
convert them into the latest metabuf infrastructure as well and
this patch addresses the readdir case first.

Link: https://lore.kernel.org/r/20220316012246.95131-1-hsiangkao@linux.alibaba.com
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/data.c     | 12 +++++++++---
 fs/erofs/dir.c      | 21 ++++++---------------
 fs/erofs/internal.h |  2 ++
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 226a57c57ee6f..780db1e5f4b72 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -28,10 +28,10 @@ void erofs_put_metabuf(struct erofs_buf *buf)
 	buf->page = NULL;
 }
 
-void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
-			erofs_blk_t blkaddr, enum erofs_kmap_type type)
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+		  erofs_blk_t blkaddr, enum erofs_kmap_type type)
 {
-	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
+	struct address_space *const mapping = inode->i_mapping;
 	erofs_off_t offset = blknr_to_addr(blkaddr);
 	pgoff_t index = offset >> PAGE_SHIFT;
 	struct page *page = buf->page;
@@ -60,6 +60,12 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
 	return buf->base + (offset & ~PAGE_MASK);
 }
 
+void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
+			 erofs_blk_t blkaddr, enum erofs_kmap_type type)
+{
+	return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
+}
+
 static int erofs_map_blocks_flatmode(struct inode *inode,
 				     struct erofs_map_blocks *map,
 				     int flags)
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index eee9b0b31b639..18e59821c5974 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
  */
 #include "internal.h"
 
@@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
 static int erofs_readdir(struct file *f, struct dir_context *ctx)
 {
 	struct inode *dir = file_inode(f);
-	struct address_space *mapping = dir->i_mapping;
+	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
 	const size_t dirsize = i_size_read(dir);
 	unsigned int i = ctx->pos / EROFS_BLKSIZ;
 	unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
@@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
 	bool initial = true;
 
 	while (ctx->pos < dirsize) {
-		struct page *dentry_page;
 		struct erofs_dirent *de;
 		unsigned int nameoff, maxsize;
 
-		dentry_page = read_mapping_page(mapping, i, NULL);
-		if (dentry_page == ERR_PTR(-ENOMEM)) {
-			err = -ENOMEM;
-			break;
-		} else if (IS_ERR(dentry_page)) {
+		de = erofs_bread(&buf, dir, i, EROFS_KMAP);
+		if (IS_ERR(de)) {
 			erofs_err(dir->i_sb,
 				  "fail to readdir of logical block %u of nid %llu",
 				  i, EROFS_I(dir)->nid);
-			err = -EFSCORRUPTED;
+			err = PTR_ERR(de);
 			break;
 		}
 
-		de = (struct erofs_dirent *)kmap(dentry_page);
-
 		nameoff = le16_to_cpu(de->nameoff);
-
 		if (nameoff < sizeof(struct erofs_dirent) ||
 		    nameoff >= PAGE_SIZE) {
 			erofs_err(dir->i_sb,
@@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
 		err = erofs_fill_dentries(dir, ctx, de, &ofs,
 					  nameoff, maxsize);
 skip_this:
-		kunmap(dentry_page);
-
-		put_page(dentry_page);
-
 		ctx->pos = blknr_to_addr(i) + ofs;
 
 		if (err)
@@ -130,6 +120,7 @@ skip_this:
 		++i;
 		ofs = 0;
 	}
+	erofs_put_metabuf(&buf);
 	return err < 0 ? err : 0;
 }
 
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 5aa2cf2c2f804..5298c4ee277d7 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -479,6 +479,8 @@ struct erofs_map_dev {
 extern const struct file_operations erofs_file_fops;
 void erofs_unmap_metabuf(struct erofs_buf *buf);
 void erofs_put_metabuf(struct erofs_buf *buf);
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+		  erofs_blk_t blkaddr, enum erofs_kmap_type type);
 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
 			 erofs_blk_t blkaddr, enum erofs_kmap_type type);
 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
-- 
GitLab


From 500edd0956487281a6fa0e1e34e931817e85d8b6 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Wed, 16 Mar 2022 09:22:46 +0800
Subject: [PATCH 1493/1586] erofs: use meta buffers for inode lookup

This converts the remaining inode lookup part by using metabuf in a
straight-forward way. Except that it doesn't use kmap_atomic()
anymore since we now have to maintain two metabufs together.

After this patch, all uncompressed paths are handled with metabuf
instead of page structure.

Link: https://lore.kernel.org/r/20220316012246.95131-2-hsiangkao@linux.alibaba.com
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/namei.c | 54 +++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 8629e616028cb..554efa363317f 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
  */
 #include "xattr.h"
 
@@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
 	return ERR_PTR(-ENOENT);
 }
 
-static struct page *find_target_block_classic(struct inode *dir,
-					      struct erofs_qstr *name,
-					      int *_ndirents)
+static void *find_target_block_classic(struct erofs_buf *target,
+				       struct inode *dir,
+				       struct erofs_qstr *name,
+				       int *_ndirents)
 {
 	unsigned int startprfx, endprfx;
 	int head, back;
-	struct address_space *const mapping = dir->i_mapping;
-	struct page *candidate = ERR_PTR(-ENOENT);
+	void *candidate = ERR_PTR(-ENOENT);
 
 	startprfx = endprfx = 0;
 	head = 0;
@@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir,
 
 	while (head <= back) {
 		const int mid = head + (back - head) / 2;
-		struct page *page = read_mapping_page(mapping, mid, NULL);
+		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+		struct erofs_dirent *de;
 
-		if (!IS_ERR(page)) {
-			struct erofs_dirent *de = kmap_atomic(page);
+		de = erofs_bread(&buf, dir, mid, EROFS_KMAP);
+		if (!IS_ERR(de)) {
 			const int nameoff = nameoff_from_disk(de->nameoff,
 							      EROFS_BLKSIZ);
 			const int ndirents = nameoff / sizeof(*de);
@@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir,
 			struct erofs_qstr dname;
 
 			if (!ndirents) {
-				kunmap_atomic(de);
-				put_page(page);
+				erofs_put_metabuf(&buf);
 				erofs_err(dir->i_sb,
 					  "corrupted dir block %d @ nid %llu",
 					  mid, EROFS_I(dir)->nid);
 				DBG_BUGON(1);
-				page = ERR_PTR(-EFSCORRUPTED);
+				de = ERR_PTR(-EFSCORRUPTED);
 				goto out;
 			}
 
@@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir,
 
 			/* string comparison without already matched prefix */
 			diff = erofs_dirnamecmp(name, &dname, &matched);
-			kunmap_atomic(de);
 
 			if (!diff) {
 				*_ndirents = 0;
@@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir,
 				startprfx = matched;
 
 				if (!IS_ERR(candidate))
-					put_page(candidate);
-				candidate = page;
+					erofs_put_metabuf(target);
+				*target = buf;
+				candidate = de;
 				*_ndirents = ndirents;
 			} else {
-				put_page(page);
+				erofs_put_metabuf(&buf);
 
 				back = mid - 1;
 				endprfx = matched;
@@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir,
 		}
 out:		/* free if the candidate is valid */
 		if (!IS_ERR(candidate))
-			put_page(candidate);
-		return page;
+			erofs_put_metabuf(target);
+		return de;
 	}
 	return candidate;
 }
@@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir,
 		erofs_nid_t *nid, unsigned int *d_type)
 {
 	int ndirents;
-	struct page *page;
-	void *data;
+	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
 	struct erofs_dirent *de;
 	struct erofs_qstr qn;
 
@@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir,
 	qn.end = name->name + name->len;
 
 	ndirents = 0;
-	page = find_target_block_classic(dir, &qn, &ndirents);
 
-	if (IS_ERR(page))
-		return PTR_ERR(page);
+	de = find_target_block_classic(&buf, dir, &qn, &ndirents);
+	if (IS_ERR(de))
+		return PTR_ERR(de);
 
-	data = kmap_atomic(page);
 	/* the target page has been mapped */
 	if (ndirents)
-		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
-	else
-		de = (struct erofs_dirent *)data;
+		de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
 
 	if (!IS_ERR(de)) {
 		*nid = le64_to_cpu(de->nid);
 		*d_type = de->file_type;
 	}
-
-	kunmap_atomic(data);
-	put_page(page);
-
+	erofs_put_metabuf(&buf);
 	return PTR_ERR_OR_ZERO(de);
 }
 
-- 
GitLab


From c9839acfcbe20ce43d363c2a9d0772472d9921c0 Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Tue, 15 Mar 2022 02:31:38 +0000
Subject: [PATCH 1494/1586] spi: tegra20: Use of_device_get_match_data()

Use of_device_get_match_data() to simplify the code.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Link: https://lore.kernel.org/r/20220315023138.2118293-1-chi.minghao@zte.com.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra20-slink.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 2a03739a0c609..80c3787deea9d 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1006,14 +1006,8 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	struct resource		*r;
 	int ret, spi_irq;
 	const struct tegra_slink_chip_data *cdata = NULL;
-	const struct of_device_id *match;
 
-	match = of_match_device(tegra_slink_of_match, &pdev->dev);
-	if (!match) {
-		dev_err(&pdev->dev, "Error: No device match found\n");
-		return -ENODEV;
-	}
-	cdata = match->data;
+	cdata = of_device_get_match_data(&pdev->dev);
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
 	if (!master) {
-- 
GitLab


From 3d13058ed2a6d0ddb55a45a979acc49a845be874 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 10 Mar 2022 14:42:23 -0800
Subject: [PATCH 1495/1586] cpufreq: intel_pstate: Use firmware default EPP

For some specific platforms (E.g. AlderLake) the balance performance
EPP is updated from the hard coded value in the driver. This acts as
the default and balance_performance EPP. The purpose of this EPP
update is to reach maximum 1 core turbo frequency (when possible) out
of the box.

Although we can achieve the objective by using hard coded value in the
driver, there can be other EPP which can be better in terms of power.
But that will be very subjective based on platform and use cases.
This is not practical to have a per platform specific default hard coded
in the driver.

If a platform wants to specify default EPP, it can be set in the firmware.
If this EPP is not the chipset default of 0x80 (balance_perf_epp unless
driver changed it) and more performance oriented but not 0, the driver
can use this as the default and balanced_perf EPP. In this case no driver
update is required every time there is some new platform and default EPP.

If the firmware didn't update the EPP from the chipset default then
the hard coded value is used as per existing implementation.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index bc7f7e6759bd6..846bb3a787880 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1692,6 +1692,37 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
 	}
 }
 
+static void intel_pstate_update_epp_defaults(struct cpudata *cpudata)
+{
+	cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+	/*
+	 * If this CPU gen doesn't call for change in balance_perf
+	 * EPP return.
+	 */
+	if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE)
+		return;
+
+	/*
+	 * If powerup EPP is something other than chipset default 0x80 and
+	 * - is more performance oriented than 0x80 (default balance_perf EPP)
+	 * - But less performance oriented than performance EPP
+	 *   then use this as new balance_perf EPP.
+	 */
+	if (cpudata->epp_default < HWP_EPP_BALANCE_PERFORMANCE &&
+	    cpudata->epp_default > HWP_EPP_PERFORMANCE) {
+		epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default;
+		return;
+	}
+
+	/*
+	 * Use hard coded value per gen to update the balance_perf
+	 * and default EPP.
+	 */
+	cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
+	intel_pstate_set_epp(cpudata, cpudata->epp_default);
+}
+
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt till we activate again */
@@ -1705,12 +1736,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 	if (cpudata->epp_default >= 0)
 		return;
 
-	if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) {
-		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
-	} else {
-		cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
-		intel_pstate_set_epp(cpudata, cpudata->epp_default);
-	}
+	intel_pstate_update_epp_defaults(cpudata);
 }
 
 static int atom_get_min_pstate(void)
-- 
GitLab


From f6c46b1d62f8ffbf2cf6eb43ab0d277bd3f7e948 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 11 Mar 2022 19:20:17 +0000
Subject: [PATCH 1496/1586] PM: hibernate: Honour ACPI hardware signature by
 default for virtual guests

The ACPI specification says that OSPM should refuse to restore from
hibernate if the hardware signature changes, and should boot from
scratch. However, real BIOSes often vary the hardware signature in cases
where we *do* want to resume from hibernate, so Linux doesn't follow the
spec by default.

However, in a virtual environment there's no reason for the VMM to vary
the hardware signature *unless* it wants to trigger a clean reboot as
defined by the ACPI spec. So enable the check by default if a hypervisor
is detected.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/sleep.c | 23 +++++++++++++++++++++--
 drivers/acpi/sleep.c         | 11 +++--------
 include/linux/acpi.h         |  2 +-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1e97f944b47d7..3b7f4cdbf2e0a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -15,6 +15,7 @@
 #include <asm/desc.h>
 #include <asm/cacheflush.h>
 #include <asm/realmode.h>
+#include <asm/hypervisor.h>
 
 #include <linux/ftrace.h>
 #include "../../realmode/rm/wakeup.h"
@@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str)
 			acpi_realmode_flags |= 4;
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_hwsig", 8) == 0)
-			acpi_check_s4_hw_signature(1);
+			acpi_check_s4_hw_signature = 1;
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
-			acpi_check_s4_hw_signature(0);
+			acpi_check_s4_hw_signature = 0;
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
@@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str)
 }
 
 __setup("acpi_sleep=", acpi_sleep_setup);
+
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST)
+static int __init init_s4_sigcheck(void)
+{
+	/*
+	 * If running on a hypervisor, honour the ACPI specification
+	 * by default and trigger a clean reboot when the hardware
+	 * signature in FACS is changed after hibernation.
+	 */
+	if (acpi_check_s4_hw_signature == -1 &&
+	    !hypervisor_is_type(X86_HYPER_NATIVE))
+		acpi_check_s4_hw_signature = 1;
+
+	return 0;
+}
+/* This must happen before acpi_init() which is a subsys initcall */
+arch_initcall(init_s4_sigcheck);
+#endif
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index d4fbea91ab6b8..74e54fd44b8e2 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -869,12 +869,7 @@ static inline void acpi_sleep_syscore_init(void) {}
 #ifdef CONFIG_HIBERNATION
 static unsigned long s4_hardware_signature;
 static struct acpi_table_facs *facs;
-static int sigcheck = -1; /* Default behaviour is just to warn */
-
-void __init acpi_check_s4_hw_signature(int check)
-{
-	sigcheck = check;
-}
+int acpi_check_s4_hw_signature = -1; /* Default behaviour is just to warn */
 
 static int acpi_hibernation_begin(pm_message_t stage)
 {
@@ -999,7 +994,7 @@ static void acpi_sleep_hibernate_setup(void)
 	hibernation_set_ops(old_suspend_ordering ?
 			&acpi_hibernation_ops_old : &acpi_hibernation_ops);
 	sleep_states[ACPI_STATE_S4] = 1;
-	if (!sigcheck)
+	if (!acpi_check_s4_hw_signature)
 		return;
 
 	acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
@@ -1011,7 +1006,7 @@ static void acpi_sleep_hibernate_setup(void)
 		 */
 		s4_hardware_signature = facs->hardware_signature;
 
-		if (sigcheck > 0) {
+		if (acpi_check_s4_hw_signature > 0) {
 			/*
 			 * If we're actually obeying the ACPI specification
 			 * then the signature is written out as part of the
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6274758648e37..766dbcb82df12 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -526,7 +526,7 @@ acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
 int acpi_resources_are_enforced(void);
 
 #ifdef CONFIG_HIBERNATION
-void __init acpi_check_s4_hw_signature(int check);
+extern int acpi_check_s4_hw_signature;
 #endif
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From 16c02447f3e1d7f86829ef66ce294aac38bd6b4c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 11 Mar 2022 14:14:13 -0800
Subject: [PATCH 1497/1586] Documentation: thermal: DPTF Documentation

Document Intel Dynamic Platform and Thermal Framework (DPTF)
ABI.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/thermal/index.rst    |   1 +
 .../driver-api/thermal/intel_dptf.rst         | 272 ++++++++++++++++++
 2 files changed, 273 insertions(+)
 create mode 100644 Documentation/driver-api/thermal/intel_dptf.rst

diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 4cb0b9b6bfb8b..030306ffa408b 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -17,3 +17,4 @@ Thermal
    intel_powerclamp
    nouveau_thermal
    x86_pkg_temperature_thermal
+   intel_dptf
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
new file mode 100644
index 0000000000000..96668dca753a8
--- /dev/null
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -0,0 +1,272 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface
+===============================================================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+Intel(R) Dynamic Platform and Thermal Framework (DPTF) is a platform
+level hardware/software solution for power and thermal management.
+
+As a container for multiple power/thermal technologies, DPTF provides
+a coordinated approach for different policies to effect the hardware
+state of a system.
+
+Since it is a platform level framework, this has several components.
+Some parts of the technology is implemented in the firmware and uses
+ACPI and PCI devices to expose various features for monitoring and
+control. Linux has a set of kernel drivers exposing hardware interface
+to user space. This allows user space thermal solutions like
+"Linux Thermal Daemon" to read platform specific thermal and power
+tables to deliver adequate performance while keeping the system under
+thermal limits.
+
+DPTF ACPI Drivers interface
+----------------------------
+
+:file:`/sys/bus/platform/devices/<N>/uuids`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``available_uuids`` (RO)
+	A set of UUIDs strings presenting available policies
+	which should be notified to the firmware when the
+	user space can support those policies.
+
+	UUID strings:
+
+	"42A441D6-AE6A-462b-A84B-4A8CE79027D3" : Passive 1
+
+	"3A95C389-E4B8-4629-A526-C52C88626BAE" : Active
+
+	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A" : Critical
+
+	"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D" : Adaptive performance
+
+	"5349962F-71E6-431D-9AE8-0A635B710AEE" : Emergency call
+
+	"9E04115A-AE87-4D1C-9500-0F3E340BFE75" : Passive 2
+
+	"F5A35014-C209-46A4-993A-EB56DE7530A1" : Power Boss
+
+	"6ED722A7-9240-48A5-B479-31EEF723D7CF" : Virtual Sensor
+
+	"16CAF1B7-DD38-40ED-B1C1-1B8A1913D531" : Cooling mode
+
+	"BE84BABF-C4D4-403D-B495-3128FD44dAC1" : HDC
+
+``current_uuid`` (RW)
+	User space can write strings from available UUIDs, one at a
+	time.
+
+:file:`/sys/bus/platform/devices/<N>/`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``imok`` (WO)
+	User space daemon write 1 to respond to firmware event
+	for sending keep alive notification. User space receives
+	THERMAL_EVENT_KEEP_ALIVE kobject uevent notification when
+	firmware calls for user space to respond with imok ACPI
+	method.
+
+``odvp*`` (RO)
+	Firmware thermal status variable values. Thermal tables
+	calls for different processing based on these variable
+	values.
+
+``data_vault`` (RO)
+	Binary thermal table. Refer to
+	https:/github.com/intel/thermal_daemon for decoding
+	thermal table.
+
+
+ACPI Thermal Relationship table interface
+------------------------------------------
+
+:file:`/dev/acpi_thermal_rel`
+
+	This device provides IOCTL interface to read standard ACPI
+	thermal relationship tables via ACPI methods _TRT and _ART.
+	These IOCTLs are defined in
+	drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
+
+	IOCTLs:
+
+	ACPI_THERMAL_GET_TRT_LEN: Get length of TRT table
+
+	ACPI_THERMAL_GET_ART_LEN: Get length of ART table
+
+	ACPI_THERMAL_GET_TRT_COUNT: Number of records in TRT table
+
+	ACPI_THERMAL_GET_ART_COUNT: Number of records in ART table
+
+	ACPI_THERMAL_GET_TRT: Read binary TRT table, length to read is
+	provided via argument to ioctl().
+
+	ACPI_THERMAL_GET_ART: Read binary ART table, length to read is
+	provided via argument to ioctl().
+
+DPTF ACPI Sensor drivers
+-------------------------
+
+DPTF Sensor drivers are presented as standard thermal sysfs thermal_zone.
+
+
+DPTF ACPI Cooling drivers
+--------------------------
+
+DPTF cooling drivers are presented as standard thermal sysfs cooling_device.
+
+
+DPTF Processor thermal PCI Driver interface
+--------------------------------------------
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/power_limits/`
+
+Refer to Documentation/power/powercap/powercap.rst for powercap
+ABI.
+
+``power_limit_0_max_uw`` (RO)
+	Maximum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_step_uw`` (RO)
+	Power limit increment/decrements for Intel RAPL constraint 0 power limit
+
+``power_limit_0_min_uw`` (RO)
+	Minimum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_tmin_us`` (RO)
+	Minimum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_0_tmax_us`` (RO)
+	Maximum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_1_max_uw`` (RO)
+	Maximum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_step_uw`` (RO)
+	Power limit increment/decrements for Intel RAPL constraint 1 power limit
+
+``power_limit_1_min_uw`` (RO)
+	Minimum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_tmin_us`` (RO)
+	Minimum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+``power_limit_1_tmax_us`` (RO)
+	Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/`
+
+``tcc_offset_degree_celsius`` (RW)
+	TCC offset from the critical temperature where hardware will throttle
+	CPU.
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/workload_request`
+
+``workload_available_types`` (RO)
+	Available workload types. User space can specify one of the workload type
+	it is currently executing via workload_type. For example: idle, bursty,
+	sustained etc.
+
+``workload_type`` (RW)
+	User space can specify any one of the available workload type using
+	this interface.
+
+DPTF Processor thermal RFIM interface
+--------------------------------------------
+
+RFIM interface allows adjustment of FIVR (Fully Integrated Voltage Regulator)
+and DDR (Double Data Rate)frequencies to avoid RF interference with WiFi and 5G.
+
+Switching voltage regulators (VR) generate radiated EMI or RFI at the
+fundamental frequency and its harmonics. Some harmonics may interfere
+with very sensitive wireless receivers such as Wi-Fi and cellular that
+are integrated into host systems like notebook PCs.  One of mitigation
+methods is requesting SOC integrated VR (IVR) switching frequency to a
+small % and shift away the switching noise harmonic interference from
+radio channels.  OEM or ODMs can use the driver to control SOC IVR
+operation within the range where it does not impact IVR performance.
+
+DRAM devices of DDR IO interface and their power plane can generate EMI
+at the data rates. Similar to IVR control mechanism, Intel offers a
+mechanism by which DDR data rates can be changed if several conditions
+are met: there is strong RFI interference because of DDR; CPU power
+management has no other restriction in changing DDR data rates;
+PC ODMs enable this feature (real time DDR RFI Mitigation referred to as
+DDR-RFIM) for Wi-Fi from BIOS.
+
+
+FIVR attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/fivr/`
+
+``vco_ref_code_lo`` (RW)
+	The VCO reference code is an 11-bit field and controls the FIVR
+	switching frequency. This is the 3-bit LSB field.
+
+``vco_ref_code_hi`` (RW)
+	The VCO reference code is an 11-bit field and controls the FIVR
+	switching frequency. This is the 8-bit MSB field.
+
+``spread_spectrum_pct`` (RW)
+	Set the FIVR spread spectrum clocking percentage
+
+``spread_spectrum_clk_enable`` (RW)
+	Enable/disable of the FIVR spread spectrum clocking feature
+
+``rfi_vco_ref_code`` (RW)
+	This field is a read only status register which reflects the
+	current FIVR switching frequency
+
+``fivr_fffc_rev`` (RW)
+	This field indicated the revision of the FIVR HW.
+
+
+DVFS attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/dvfs/`
+
+``rfi_restriction_run_busy`` (RW)
+	Request the restriction of specific DDR data rate and set this
+	value 1. Self reset to 0 after operation.
+
+``rfi_restriction_err_code`` (RW)
+	0 :Request is accepted, 1:Feature disabled,
+	2: the request restricts more points than it is allowed
+
+``rfi_restriction_data_rate_Delta`` (RW)
+	Restricted DDR data rate for RFI protection: Lower Limit
+
+``rfi_restriction_data_rate_Base`` (RW)
+	Restricted DDR data rate for RFI protection: Upper Limit
+
+``ddr_data_rate_point_0`` (RO)
+	DDR data rate selection 1st point
+
+``ddr_data_rate_point_1`` (RO)
+	DDR data rate selection 2nd point
+
+``ddr_data_rate_point_2`` (RO)
+	DDR data rate selection 3rd point
+
+``ddr_data_rate_point_3`` (RO)
+	DDR data rate selection 4th point
+
+``rfi_disable (RW)``
+	Disable DDR rate change feature
+
+DPTF Power supply and Battery Interface
+----------------------------------------
+
+Refer to Documentation/ABI/testing/sysfs-platform-dptf
+
+DPTF Fan Control
+----------------------------------------
+
+Refer to Documentation/admin-guide/acpi/fan_performance_states.rst
-- 
GitLab


From 668f69a5f863b877bc3ae129efe9a80b6f055141 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 14 Mar 2022 15:08:55 -0700
Subject: [PATCH 1498/1586] thermal: int340x: Increase bitmap size

The number of policies are 10, so can't be supported by the bitmap size
of u8.

Even though there are no platfoms with these many policies, but
for correctness increase to u32.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Fixes: 16fc8eca1975 ("thermal/int340x_thermal: Add additional UUIDs")
Cc: 5.1+ <stable@vger.kernel.org> # 5.1+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 6067bb0e75b4c..a0b599100106b 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -53,7 +53,7 @@ struct int3400_thermal_priv {
 	struct art *arts;
 	int trt_count;
 	struct trt *trts;
-	u8 uuid_bitmap;
+	u32 uuid_bitmap;
 	int rel_misc_dev_res;
 	int current_uuid_index;
 	char *data_vault;
-- 
GitLab


From c7ff29763989bd09c433f73fae3c1e1c15d9cda4 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 14 Mar 2022 15:09:37 -0700
Subject: [PATCH 1499/1586] thermal: int340x: Update OS policy capability
 handshake

Update the firmware with OS supported policies mask, so	that firmware can
relinquish its internal controls. Without this update several Tiger Lake
laptops gets performance limited with in few seconds of executing in
turbo region.

The existing way of enumerating firmware policies via IDSP method and
selecting policy by directly writing those policy UUIDS via _OSC method
is not supported in newer generation of hardware.

There is a new UUID "B23BA85D-C8B7-3542-88DE-8DE2FFCFD698" is defined for
updating policy capabilities. As part of ACPI _OSC method:

Arg0 - UUID: B23BA85D-C8B7-3542-88DE-8DE2FFCFD698
Arg1 - Rev ID: 1
Arg2 - Count: 2
Arg3 - Capability buffers: Array of Arg2 DWORDS

DWORD1: As defined in the ACPI 5.0 Specification
- Bit 0: Query Flag
- Bits 1-3: Always 0
- Bits 4-31: Reserved

DWORD2 and beyond:
- Bit0: set to 1 to indicate Intel(R) Dynamic Tuning is active, 0 to
indicate it is disabled and legacy thermal mechanism should
be enabled.
- Bit1: set to 1 to indicate Intel(R) Dynamic Tuning is controlling
active cooling, 0 to indicate bios shall enable legacy thermal
zone with active trip point.
- Bit2: set to 1 to indicate Intel(R) Dynamic Tuning is controlling
passive cooling, 0 to indicate bios shall enable legacy thermal
zone with passive trip point.
- Bit3: set to 1 to indicate Intel(R) Dynamic Tuning is handling
critical trip point, 0 to indicate bios shall enable legacy
thermal zone with critical trip point.
- Bits 4:31: Reserved

From sysfs interface, there is an existing interface to update policy
UUID using attribute "current_uuid". User space can write the same UUID
for ACTIVE, PASSIVE and CRITICAL policy. Driver converts these UUIDs to
DWORD2 Bit 1 to Bit 3. When any of the policy is activated by user
space it is assumed that dynamic tuning is active.

For example
$cd /sys/bus/platform/devices/INTC1040:00/uuids
To support active policy
$echo "3A95C389-E4B8-4629-A526-C52C88626BAE" > current_uuid
To support passive policy
$echo "42A441D6-AE6A-462b-A84B-4A8CE79027D3" > current_uuid
To support critical policy
$echo "97C68AE7-15FA-499c-B8C9-5DA81D606E0A" > current_uuid

To check all the supported policies
$cat current_uuid
3A95C389-E4B8-4629-A526-C52C88626BAE
42A441D6-AE6A-462b-A84B-4A8CE79027D3
97C68AE7-15FA-499c-B8C9-5DA81D606E0A

To match the bit format for DWORD2, rearranged enum int3400_thermal_uuid
and int3400_thermal_uuids[] by swapping current INT3400_THERMAL_ACTIVE
and INT3400_THERMAL_PASSIVE_1.

If the policies are enumerated via IDSP method then legacy method is
used, if not the new method is used to update policy support.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel/int340x_thermal/int3400_thermal.c   | 146 ++++++++++++------
 1 file changed, 97 insertions(+), 49 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index a0b599100106b..4954800b98502 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -17,8 +17,8 @@
 #define INT3400_KEEP_ALIVE 0xA0
 
 enum int3400_thermal_uuid {
+	INT3400_THERMAL_ACTIVE = 0,
 	INT3400_THERMAL_PASSIVE_1,
-	INT3400_THERMAL_ACTIVE,
 	INT3400_THERMAL_CRITICAL,
 	INT3400_THERMAL_ADAPTIVE_PERFORMANCE,
 	INT3400_THERMAL_EMERGENCY_CALL_MODE,
@@ -31,8 +31,8 @@ enum int3400_thermal_uuid {
 };
 
 static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
-	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"3A95C389-E4B8-4629-A526-C52C88626BAE",
+	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
 	"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D",
 	"5349962F-71E6-431D-9AE8-0A635B710AEE",
@@ -59,6 +59,7 @@ struct int3400_thermal_priv {
 	char *data_vault;
 	int odvp_count;
 	int *odvp;
+	u32 os_uuid_mask;
 	struct odvp_attr *odvp_attrs;
 };
 
@@ -142,12 +143,55 @@ static ssize_t current_uuid_show(struct device *dev,
 				 struct device_attribute *devattr, char *buf)
 {
 	struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+	int i, length = 0;
+
+	if (priv->current_uuid_index > 0)
+		return sprintf(buf, "%s\n",
+			       int3400_thermal_uuids[priv->current_uuid_index]);
+
+	for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
+		if (priv->os_uuid_mask & BIT(i))
+			length += scnprintf(&buf[length],
+					    PAGE_SIZE - length,
+					    "%s\n",
+					    int3400_thermal_uuids[i]);
+	}
 
-	if (priv->current_uuid_index == -1)
-		return sprintf(buf, "INVALID\n");
+	if (length)
+		return length;
 
-	return sprintf(buf, "%s\n",
-		       int3400_thermal_uuids[priv->current_uuid_index]);
+	return sprintf(buf, "INVALID\n");
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable)
+{
+	u32 ret, buf[2];
+	acpi_status status;
+	int result = 0;
+	struct acpi_osc_context context = {
+		.uuid_str = NULL,
+		.rev = 1,
+		.cap.length = 8,
+	};
+
+	context.uuid_str = uuid_str;
+
+	buf[OSC_QUERY_DWORD] = 0;
+	buf[OSC_SUPPORT_DWORD] = *enable;
+
+	context.cap.pointer = buf;
+
+	status = acpi_run_osc(handle, &context);
+	if (ACPI_SUCCESS(status)) {
+		ret = *((u32 *)(context.ret.pointer + 4));
+		if (ret != *enable)
+			result = -EPERM;
+	} else
+		result = -EPERM;
+
+	kfree(context.ret.pointer);
+
+	return result;
 }
 
 static ssize_t current_uuid_store(struct device *dev,
@@ -164,16 +208,47 @@ static ssize_t current_uuid_store(struct device *dev,
 			 * If we have a list of supported UUIDs, make sure
 			 * this one is supported.
 			 */
-			if (priv->uuid_bitmap &&
-			    !(priv->uuid_bitmap & (1 << i)))
+			if (priv->uuid_bitmap & BIT(i)) {
+				priv->current_uuid_index = i;
+				return count;
+			}
+
+			/*
+			 * There is support of only 3 policies via the new
+			 * _OSC to inform OS capability:
+			 * INT3400_THERMAL_ACTIVE
+			 * INT3400_THERMAL_PASSIVE_1
+			 * INT3400_THERMAL_CRITICAL
+			 */
+
+			if (i > INT3400_THERMAL_CRITICAL)
 				return -EINVAL;
 
-			priv->current_uuid_index = i;
-			return count;
+			priv->os_uuid_mask |= BIT(i);
+
+			break;
 		}
 	}
 
-	return -EINVAL;
+	if (priv->os_uuid_mask) {
+		int cap, ret;
+
+		/*
+		 * Capability bits:
+		 * Bit 0: set to 1 to indicate DPTF is active
+		 * Bi1 1: set to 1 to active cooling is supported by user space daemon
+		 * Bit 2: set to 1 to passive cooling is supported by user space daemon
+		 * Bit 3: set to 1 to critical trip is handled by user space daemon
+		 */
+		cap = ((priv->os_uuid_mask << 1) | 0x01);
+		ret = int3400_thermal_run_osc(priv->adev->handle,
+					      "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+					      &cap);
+		if (ret)
+			return ret;
+	}
+
+	return count;
 }
 
 static DEVICE_ATTR_RW(current_uuid);
@@ -236,41 +311,6 @@ end:
 	return result;
 }
 
-static int int3400_thermal_run_osc(acpi_handle handle,
-				enum int3400_thermal_uuid uuid, bool enable)
-{
-	u32 ret, buf[2];
-	acpi_status status;
-	int result = 0;
-	struct acpi_osc_context context = {
-		.uuid_str = NULL,
-		.rev = 1,
-		.cap.length = 8,
-	};
-
-	if (uuid < 0 || uuid >= INT3400_THERMAL_MAXIMUM_UUID)
-		return -EINVAL;
-
-	context.uuid_str = int3400_thermal_uuids[uuid];
-
-	buf[OSC_QUERY_DWORD] = 0;
-	buf[OSC_SUPPORT_DWORD] = enable;
-
-	context.cap.pointer = buf;
-
-	status = acpi_run_osc(handle, &context);
-	if (ACPI_SUCCESS(status)) {
-		ret = *((u32 *)(context.ret.pointer + 4));
-		if (ret != enable)
-			result = -EPERM;
-	} else
-		result = -EPERM;
-
-	kfree(context.ret.pointer);
-
-	return result;
-}
-
 static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
 			 char *buf)
 {
@@ -426,10 +466,18 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
 	if (!priv)
 		return -EINVAL;
 
-	if (mode != thermal->mode)
+	if (mode != thermal->mode) {
+		int enabled;
+
+		if (priv->current_uuid_index < 0 ||
+		    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+			return -EINVAL;
+
+		enabled = (mode == THERMAL_DEVICE_ENABLED);
 		result = int3400_thermal_run_osc(priv->adev->handle,
-						priv->current_uuid_index,
-						mode == THERMAL_DEVICE_ENABLED);
+						 int3400_thermal_uuids[priv->current_uuid_index],
+						 &enabled);
+	}
 
 
 	evaluate_odvp(priv);
-- 
GitLab


From 1447c635802fd0f5e213ad5277753108d56a4db3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Dec 2021 15:58:52 +0200
Subject: [PATCH 1500/1586] Revert "arm64: dts: freescale: Fix 'interrupt-map'
 parent address cells"

This reverts commit 869f0ec048dc8fd88c0b2003373bd985795179fb. That
updated the expected device tree binding format for the ls-extirq
driver, without also updating the parsing code (ls_extirq_parse_map)
to the new format.

The context is that the ls-extirq driver uses the standard
"interrupt-map" OF property in a non-standard way, as suggested by
Rob Herring during review:
https://lore.kernel.org/lkml/20190927161118.GA19333@bogus/

This has turned out to be problematic, as Marc Zyngier discovered
through commit 041284181226 ("of/irq: Allow matching of an interrupt-map
local to an interrupt controller"), later fixed through commit
de4adddcbcc2 ("of/irq: Add a quirk for controllers with their own
definition of interrupt-map"). Marc's position, expressed on multiple
opportunities, is that:

(a) [ making private use of the reserved "interrupt-map" name in a
    driver ] "is wrong, by the very letter of what an interrupt-map
    means. If the interrupt map points to an interrupt controller,
    that's the target for the interrupt."
https://lore.kernel.org/lkml/87k0g8jlmg.wl-maz@kernel.org/

(b) [ updating the driver's bindings to accept a non-reserved name for
    this property, as an alternative, is ] "is totally pointless. These
    machines have been in the wild for years, and existing DTs will be
    there *forever*."
https://lore.kernel.org/lkml/87ilvrk1r0.wl-maz@kernel.org/

Considering the above, the Linux kernel has quirks in place to deal with
the ls-extirq's non-standard use of the "interrupt-map". These quirks
may be needed in other operating systems that consume this device tree,
yet this is seen as the only viable solution.

Therefore, the premise of the patch being reverted here is invalid.
It doesn't matter whether the driver, in its non-standard use of the
property, complies to the standard format or not, since this property
isn't expected to be used for interrupt translation by the core.

This change restores LS1088A, LS2088A/LS2085A and LX2160A to their
previous bindings, which allows these systems to continue to use
external interrupt lines with the correct polarity.

Fixes: 869f0ec048dc ("arm64: dts: freescale: Fix 'interrupt-map' parent address cells")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 24 +++++++++----------
 .../arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 24 +++++++++----------
 .../arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 24 +++++++++----------
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index 3ed1f2c51cadf..18e529118476a 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -253,18 +253,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index 3cb9c21d2775a..1282b61da8a55 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -293,18 +293,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 7032505f5ef3a..3c611cb4f5fef 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -680,18 +680,18 @@
 				interrupt-controller;
 				reg = <0x14 4>;
 				interrupt-map =
-					<0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
-					<1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
-					<2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
-					<3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					<4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
-					<5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
-					<6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
-					<7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
-					<8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
-					<9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-					<10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
-					<11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+					<0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+					<1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+					<2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+					<3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					<4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+					<5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+					<6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+					<7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+					<8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+					<9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+					<10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+					<11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-map-mask = <0xffffffff 0x0>;
 			};
 		};
-- 
GitLab


From 521d61fc760aebdbf8938347a40c9538c0a70034 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 16 Mar 2022 12:53:23 -0600
Subject: [PATCH 1501/1586] io_uring: move req->poll_refs into previous struct
 hole

This serves two purposes:

- We now have the last cacheline mostly unused for generic workloads,
  instead of having to pull in the poll refs explicitly for workloads
  that rely on poll arming.

- It shrinks the io_kiocb from 232 to 224 bytes.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b17cf54653df6..fa4e2cb47e566 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -908,6 +908,7 @@ struct io_kiocb {
 	/* used by request caches, completion batching and iopoll */
 	struct io_wq_work_node		comp_list;
 	atomic_t			refs;
+	atomic_t			poll_refs;
 	struct io_kiocb			*link;
 	struct io_task_work		io_task_work;
 	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
@@ -916,12 +917,11 @@ struct io_kiocb {
 	struct async_poll		*apoll;
 	/* opcode allocated if it needs to store data for async defer */
 	void				*async_data;
-	struct io_wq_work		work;
 	/* custom credentials, valid IFF REQ_F_CREDS is set */
-	const struct cred		*creds;
 	/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
 	struct io_buffer		*kbuf;
-	atomic_t			poll_refs;
+	const struct cred		*creds;
+	struct io_wq_work		work;
 };
 
 struct io_tctx_node {
-- 
GitLab


From b57b84968865780262f191cc95d688d7f82e681c Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Sat, 12 Mar 2022 15:07:51 +0800
Subject: [PATCH 1502/1586] docs: scheduler: Convert schedutil.txt to ReST

All other scheduler documents have been converted to *.rst. Let's do
the same for schedutil.txt.

Also fixed some typos.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Link: https://lore.kernel.org/r/20220312070751.16844-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/index.rst             |  1 +
 .../{schedutil.txt => schedutil.rst}          | 30 +++++++++++--------
 2 files changed, 18 insertions(+), 13 deletions(-)
 rename Documentation/scheduler/{schedutil.txt => schedutil.rst} (92%)

diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 88900aabdbf7b..693eaac769a70 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -14,6 +14,7 @@ Linux Scheduler
     sched-domains
     sched-capacity
     sched-energy
+    schedutil
     sched-nice-design
     sched-rt-group
     sched-stats
diff --git a/Documentation/scheduler/schedutil.txt b/Documentation/scheduler/schedutil.rst
similarity index 92%
rename from Documentation/scheduler/schedutil.txt
rename to Documentation/scheduler/schedutil.rst
index 78f6b91e22914..32c7d69fc86c1 100644
--- a/Documentation/scheduler/schedutil.txt
+++ b/Documentation/scheduler/schedutil.rst
@@ -1,11 +1,15 @@
+=========
+Schedutil
+=========
 
+.. note::
 
-NOTE; all this assumes a linear relation between frequency and work capacity,
-we know this is flawed, but it is the best workable approximation.
+   All this assumes a linear relation between frequency and work capacity,
+   we know this is flawed, but it is the best workable approximation.
 
 
 PELT (Per Entity Load Tracking)
--------------------------------
+===============================
 
 With PELT we track some metrics across the various scheduler entities, from
 individual tasks to task-group slices to CPU runqueues. As the basis for this
@@ -38,8 +42,8 @@ while 'runnable' will increase to reflect the amount of contention.
 For more detail see: kernel/sched/pelt.c
 
 
-Frequency- / CPU Invariance
----------------------------
+Frequency / CPU Invariance
+==========================
 
 Because consuming the CPU for 50% at 1GHz is not the same as consuming the CPU
 for 50% at 2GHz, nor is running 50% on a LITTLE CPU the same as running 50% on
@@ -47,7 +51,7 @@ a big CPU, we allow architectures to scale the time delta with two ratios, one
 Dynamic Voltage and Frequency Scaling (DVFS) ratio and one microarch ratio.
 
 For simple DVFS architectures (where software is in full control) we trivially
-compute the ratio as:
+compute the ratio as::
 
 	    f_cur
   r_dvfs := -----
@@ -55,7 +59,7 @@ compute the ratio as:
 
 For more dynamic systems where the hardware is in control of DVFS we use
 hardware counters (Intel APERF/MPERF, ARMv8.4-AMU) to provide us this ratio.
-For Intel specifically, we use:
+For Intel specifically, we use::
 
 	   APERF
   f_cur := ----- * P0
@@ -87,7 +91,7 @@ For more detail see:
 
 
 UTIL_EST / UTIL_EST_FASTUP
---------------------------
+==========================
 
 Because periodic tasks have their averages decayed while they sleep, even
 though when running their expected utilization will be the same, they suffer a
@@ -106,7 +110,7 @@ For more detail see: kernel/sched/fair.c:util_est_dequeue()
 
 
 UCLAMP
-------
+======
 
 It is possible to set effective u_min and u_max clamps on each CFS or RT task;
 the runqueue keeps an max aggregate of these clamps for all running tasks.
@@ -115,7 +119,7 @@ For more detail see: include/uapi/linux/sched/types.h
 
 
 Schedutil / DVFS
-----------------
+================
 
 Every time the scheduler load tracking is updated (task wakeup, task
 migration, time progression) we call out to schedutil to update the hardware
@@ -123,7 +127,7 @@ DVFS state.
 
 The basis is the CPU runqueue's 'running' metric, which per the above it is
 the frequency invariant utilization estimate of the CPU. From this we compute
-a desired frequency like:
+a desired frequency like::
 
              max( running, util_est );	if UTIL_EST
   u_cfs := { running;			otherwise
@@ -135,7 +139,7 @@ a desired frequency like:
 
   f_des := min( f_max, 1.25 u * f_max )
 
-XXX IO-wait; when the update is due to a task wakeup from IO-completion we
+XXX IO-wait: when the update is due to a task wakeup from IO-completion we
 boost 'u' above.
 
 This frequency is then used to select a P-state/OPP or directly munged into a
@@ -153,7 +157,7 @@ For more information see: kernel/sched/cpufreq_schedutil.c
 
 
 NOTES
------
+=====
 
  - On low-load scenarios, where DVFS is most relevant, the 'running' numbers
    will closely reflect utilization.
-- 
GitLab


From c01c2e435db8b62bd67a50e9622d59ca7c7953b7 Mon Sep 17 00:00:00 2001
From: Tang Yizhou <tangyizhou@huawei.com>
Date: Sat, 12 Mar 2022 15:26:42 +0800
Subject: [PATCH 1503/1586] docs/zh_CN: Add sched-nice-design Chinese
 translation

Translate scheduler/sched-nice-design.rst into Chinese.

Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20220312072642.23118-1-tangyizhou@huawei.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/scheduler/index.rst    |  2 +-
 .../zh_CN/scheduler/sched-nice-design.rst     | 99 +++++++++++++++++++
 2 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/scheduler/sched-nice-design.rst

diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst
index c020f83827854..12bf3bd02ccfa 100644
--- a/Documentation/translations/zh_CN/scheduler/index.rst
+++ b/Documentation/translations/zh_CN/scheduler/index.rst
@@ -25,12 +25,12 @@ Linux调度器
     sched-domains
     sched-capacity
     sched-energy
+    sched-nice-design
     sched-stats
 
 TODOList:
 
     sched-deadline
-    sched-nice-design
     sched-rt-group
 
     text_files
diff --git a/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
new file mode 100644
index 0000000000000..9107f0c0b979f
--- /dev/null
+++ b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/scheduler/sched-nice-design.rst
+
+:翻译:
+
+  唐艺舟 Tang Yizhou <tangyeechou@gmail.com>
+
+=====================
+调度器nice值设计
+=====================
+
+本文档解释了新的Linux调度器中修改和精简后的nice级别的实现思路。
+
+Linux的nice级别总是非常脆弱，人们持续不断地缠着我们，让nice +19的任务占用
+更少的CPU时间。
+
+不幸的是，在旧的调度器中，这不是那么容易实现的（否则我们早就做到了），因为对
+nice级别的支持在历史上是与时间片长度耦合的，而时间片单位是由HZ滴答驱动的，
+所以最小的时间片是1/HZ。
+
+在O(1)调度器中（2003年），我们改变了负的nice级别，使它们比2.4内核更强
+（人们对这一变化很满意），而且我们还故意校正了线性时间片准则，使得nice +19
+的级别 _正好_ 是1 jiffy。为了让大家更好地理解它，时间片的图会是这样的（质量
+不佳的ASCII艺术提醒！）::
+
+
+                   A
+             \     | [timeslice length]
+              \    |
+               \   |
+                \  |
+                 \ |
+                  \|___100msecs
+                   |^ . _
+                   |      ^ . _
+                   |            ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20               |                +19
+                   |
+                   |
+
+因此，如果有人真的想renice任务，相较线性规则，+19会给出更大的效果（改变
+ABI来扩展优先级的解决方案在早期就被放弃了）。
+
+这种方法在一定程度上奏效了一段时间，但后来HZ=1000时，它导致1 jiffy为
+1ms，这意味着0.1%的CPU使用率，我们认为这有点过度。过度 _不是_ 因为它表示
+的CPU使用率过小，而是因为它引发了过于频繁（每毫秒1次）的重新调度（因此会
+破坏缓存，等等。请记住，硬件更弱、cache更小是很久以前的事了，当时人们在
+nice +19级别运行数量颇多的应用程序）。
+
+因此，对于HZ=1000，我们将nice +19改为5毫秒，因为这感觉像是正确的最小
+粒度——这相当于5%的CPU利用率。但nice +19的根本的HZ敏感属性依然保持不变，
+我们没有收到过关于nice +19在CPU利用率方面太 _弱_ 的任何抱怨，我们只收到
+过它（依然）太 _强_ 的抱怨 :-)。
+
+总结一下：我们一直想让nice各级别一致性更强，但在HZ和jiffies的限制下，以及
+nice级别与时间片、调度粒度耦合是令人讨厌的设计，这一目标并不真正可行。
+
+第二个关于Linux nice级别支持的抱怨是（不那么频繁，但仍然定期发生），它
+在原点周围的不对称性（你可以在上面的图片中看到），或者更准确地说：事实上
+nice级别的行为取决于 _绝对的_ nice级别，而nice应用程序接口本身从根本上
+说是“相对”的：
+
+   int nice(int inc);
+
+   asmlinkage long sys_nice(int increment)
+
+（第一个是glibc的应用程序接口，第二个是syscall的应用程序接口）
+注意，“inc”是相对当前nice级别而言的，类似bash的“nice”命令等工具是这个
+相对性应用程序接口的镜像。
+
+在旧的调度器中，举例来说，如果你以nice +1启动一个任务，并以nice +2启动
+另一个任务，这两个任务的CPU分配将取决于父外壳程序的nice级别——如果它是
+nice -10，那么CPU的分配不同于+5或+10。
+
+第三个关于Linux nice级别支持的抱怨是，负数nice级别“不够有力”，以很多人
+不得不诉诸于实时调度优先级来运行音频（和其它多媒体）应用程序，比如
+SCHED_FIFO。但这也造成了其它问题：SCHED_FIFO未被证明是免于饥饿的，一个
+有问题的SCHED_FIFO应用程序也会锁住运行良好的系统。
+
+v2.6.23版内核的新调度器解决了这三种类型的抱怨：
+
+为了解决第一个抱怨（nice级别不够“有力”），调度器与“时间片”、HZ的概念
+解耦（调度粒度被处理成一个和nice级别独立的概念），因此有可能实现更好、
+更一致的nice +19支持：在新的调度器中，nice +19的任务得到一个HZ无关的
+1.5%CPU使用率，而不是旧版调度器中3%-5%-9%的可变范围。
+
+为了解决第二个抱怨（nice各级别不一致），新调度器令调用nice(1)对各任务的
+CPU利用率有相同的影响，无论其绝对nice级别如何。所以在新调度器上，运行一个
+nice +10和一个nice +11的任务会与运行一个nice -5和一个nice -4的任务的
+CPU利用率分割是相同的（一个会得到55%的CPU，另一个会得到45%）。这是为什么
+nice级别被改为“乘法”（或指数）——这样的话，不管你从哪个级别开始，“相对”
+结果将总是一样的。
+
+第三个抱怨（负数nice级别不够“有力”，并迫使音频应用程序在更危险的
+SCHED_FIFO调度策略下运行）几乎被新的调度器自动解决了：更强的负数级别
+具有重新校正nice级别动态范围的自动化副作用。
-- 
GitLab


From 0a575497691486a75bdc3ae551094432b0761a84 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 12 Mar 2022 21:03:33 +0100
Subject: [PATCH 1504/1586] parisc: Avoid calling SMP cache flush functions on
 cache-less machines

At least the qemu virtual machine does not provide D- and I-caches,
so skip triggering SMP irqs to flush caches on such machines.

Further optimize the caching code by using static branches and making
some functions static.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h | 15 +++------
 arch/parisc/kernel/alternative.c     | 10 ++++++
 arch/parisc/kernel/cache.c           | 49 ++++++++++++++--------------
 3 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 859b8a34adcfb..e8b4a03343d39 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -9,16 +9,11 @@
 /* The usual comment is "Caches aren't brain-dead on the <architecture>".
  * Unfortunately, that doesn't apply to PA-RISC. */
 
-/* Internal implementation */
-void flush_data_cache_local(void *);  /* flushes local data-cache only */
-void flush_instruction_cache_local(void *); /* flushes local code-cache only */
-#ifdef CONFIG_SMP
-void flush_data_cache(void); /* flushes data-cache only (all processors) */
-void flush_instruction_cache(void); /* flushes i-cache only (all processors) */
-#else
-#define flush_data_cache() flush_data_cache_local(NULL)
-#define flush_instruction_cache() flush_instruction_cache_local(NULL)
-#endif
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_TRUE(parisc_has_cache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_dcache);
+DECLARE_STATIC_KEY_TRUE(parisc_has_icache);
 
 #define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 
diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c
index fa28c4c9f972a..daa1e9047275b 100644
--- a/arch/parisc/kernel/alternative.c
+++ b/arch/parisc/kernel/alternative.c
@@ -8,6 +8,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/alternative.h>
+#include <asm/cacheflush.h>
 
 #include <linux/module.h>
 
@@ -107,5 +108,14 @@ void __init apply_alternatives_all(void)
 	apply_alternatives((struct alt_instr *) &__alt_instructions,
 		(struct alt_instr *) &__alt_instructions_end, NULL);
 
+	if (cache_info.dc_size == 0 && cache_info.ic_size == 0) {
+		pr_info("alternatives: optimizing cache-flushes.\n");
+		static_branch_disable(&parisc_has_cache);
+	}
+	if (cache_info.dc_size == 0)
+		static_branch_disable(&parisc_has_dcache);
+	if (cache_info.ic_size == 0)
+		static_branch_disable(&parisc_has_icache);
+
 	set_kernel_text_rw(0);
 }
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 81f36d6407f0c..231cebb3fc0de 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -38,6 +38,9 @@ EXPORT_SYMBOL(flush_dcache_page_asm);
 void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 
+/* Internal implementation in arch/parisc/kernel/pacache.S */
+void flush_data_cache_local(void *);  /* flushes local data-cache only */
+void flush_instruction_cache_local(void); /* flushes local code-cache only */
 
 /* On some machines (i.e., ones with the Merced bus), there can be
  * only a single PxTLB broadcast at a time; this must be guaranteed
@@ -58,26 +61,35 @@ struct pdc_cache_info cache_info __ro_after_init;
 static struct pdc_btlb_info btlb_info __ro_after_init;
 #endif
 
-#ifdef CONFIG_SMP
-void
-flush_data_cache(void)
+DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_dcache);
+DEFINE_STATIC_KEY_TRUE(parisc_has_icache);
+
+static void cache_flush_local_cpu(void *dummy)
 {
-	on_each_cpu(flush_data_cache_local, NULL, 1);
+	if (static_branch_likely(&parisc_has_icache))
+		flush_instruction_cache_local();
+	if (static_branch_likely(&parisc_has_dcache))
+		flush_data_cache_local(NULL);
 }
-void 
-flush_instruction_cache(void)
+
+void flush_cache_all_local(void)
 {
-	on_each_cpu(flush_instruction_cache_local, NULL, 1);
+	cache_flush_local_cpu(NULL);
 }
-#endif
 
-void
-flush_cache_all_local(void)
+void flush_cache_all(void)
+{
+	if (static_branch_likely(&parisc_has_cache))
+		on_each_cpu(cache_flush_local_cpu, NULL, 1);
+}
+
+static inline void flush_data_cache(void)
 {
-	flush_instruction_cache_local(NULL);
-	flush_data_cache_local(NULL);
+	if (static_branch_likely(&parisc_has_dcache))
+		on_each_cpu(flush_data_cache_local, NULL, 1);
 }
-EXPORT_SYMBOL(flush_cache_all_local);
+
 
 /* Virtual address of pfn.  */
 #define pfn_va(pfn)	__va(PFN_PHYS(pfn))
@@ -375,7 +387,6 @@ EXPORT_SYMBOL(flush_dcache_page);
 
 /* Defined in arch/parisc/kernel/pacache.S */
 EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
@@ -517,16 +528,6 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
 	return 0;
 }
 
-static void cacheflush_h_tmp_function(void *dummy)
-{
-	flush_cache_all_local();
-}
-
-void flush_cache_all(void)
-{
-	on_each_cpu(cacheflush_h_tmp_function, NULL, 1);
-}
-
 static inline unsigned long mm_total_size(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
-- 
GitLab


From 75c05fabb873367d9e64f063dda8a310c4c58826 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 10 Mar 2022 10:27:36 +0200
Subject: [PATCH 1505/1586] docs/kernel-parameters: update description of mem=

The existing description of mem= does not cover all the cases and
differences between how architectures treat it.

Extend the description to match the code.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20220310082736.1346366-1-rppt@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../admin-guide/kernel-parameters.txt          | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f5a27f067db9e..5b84cca62990c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2827,6 +2827,9 @@
 
 			For details see: Documentation/admin-guide/hw-vuln/mds.rst
 
+	mem=nn[KMG]	[HEXAGON] Set the memory size.
+			Must be specified, otherwise memory size will be 0.
+
 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
 			Amount of memory to be used in cases as follows:
 
@@ -2834,6 +2837,13 @@
 			2 when the kernel is not able to see the whole system memory;
 			3 memory that lies after 'mem=' boundary is excluded from
 			 the hypervisor, then assigned to KVM guests.
+			4 to limit the memory available for kdump kernel.
+
+			[ARC,MICROBLAZE] - the limit applies only to low memory,
+			high memory is not affected.
+
+			[ARM64] - only limits memory covered by the linear
+			mapping. The NOMAP regions are not affected.
 
 			[X86] Work as limiting max address. Use together
 			with memmap= to avoid physical address space collisions.
@@ -2844,6 +2854,14 @@
 			in above case 3, memory may need be hot added after boot
 			if system memory of hypervisor is not sufficient.
 
+	mem=nn[KMG]@ss[KMG]
+			[ARM,MIPS] - override the memory layout reported by
+			firmware.
+			Define a memory region of size nn[KMG] starting at
+			ss[KMG].
+			Multiple different regions can be specified with
+			multiple mem= parameters on the command line.
+
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.
 
-- 
GitLab


From 81459350d581e958ee9c6e76031f77333881c23c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 16 Mar 2022 16:55:05 -0600
Subject: [PATCH 1506/1586] io_uring: cache req->apoll->events in req->cflags

When we arm poll on behalf of a different type of request, like a network
receive, then we allocate req->apoll as our poll entry. Running network
workloads shows io_poll_check_events() as the most expensive part of
io_uring, and it's all due to having to pull in req->apoll instead of
just the request which we have hot already.

Cache poll->events in req->cflags, which isn't used until the request
completes anyway. This isn't strictly needed for regular poll, where
req->poll.events is used and thus already hot, but for the sake of
unification we do it all around.

This saves 3-4% of overhead in certain request workloads.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fa4e2cb47e566..bfddad7a14ef2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5876,13 +5876,13 @@ static int io_poll_check_events(struct io_kiocb *req)
 			return -ECANCELED;
 
 		if (!req->result) {
-			struct poll_table_struct pt = { ._key = poll->events };
+			struct poll_table_struct pt = { ._key = req->cflags };
 
-			req->result = vfs_poll(req->file, &pt) & poll->events;
+			req->result = vfs_poll(req->file, &pt) & req->cflags;
 		}
 
 		/* multishot, just fill an CQE and proceed */
-		if (req->result && !(poll->events & EPOLLONESHOT)) {
+		if (req->result && !(req->cflags & EPOLLONESHOT)) {
 			__poll_t mask = mangle_poll(req->result & poll->events);
 			bool filled;
 
@@ -5953,9 +5953,16 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 		io_req_complete_failed(req, ret);
 }
 
-static void __io_poll_execute(struct io_kiocb *req, int mask)
+static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
 {
 	req->result = mask;
+	/*
+	 * This is useful for poll that is armed on behalf of another
+	 * request, and where the wakeup path could be on a different
+	 * CPU. We want to avoid pulling in req->apoll->events for that
+	 * case.
+	 */
+	req->cflags = events;
 	if (req->opcode == IORING_OP_POLL_ADD)
 		req->io_task_work.func = io_poll_task_func;
 	else
@@ -5965,17 +5972,17 @@ static void __io_poll_execute(struct io_kiocb *req, int mask)
 	io_req_task_work_add(req, false);
 }
 
-static inline void io_poll_execute(struct io_kiocb *req, int res)
+static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
 {
 	if (io_poll_get_ownership(req))
-		__io_poll_execute(req, res);
+		__io_poll_execute(req, res, events);
 }
 
 static void io_poll_cancel_req(struct io_kiocb *req)
 {
 	io_poll_mark_cancelled(req);
 	/* kick tw, which should complete the request */
-	io_poll_execute(req, 0);
+	io_poll_execute(req, 0, 0);
 }
 
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -5989,7 +5996,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (unlikely(mask & POLLFREE)) {
 		io_poll_mark_cancelled(req);
 		/* we have to kick tw in case it's not already */
-		io_poll_execute(req, 0);
+		io_poll_execute(req, 0, poll->events);
 
 		/*
 		 * If the waitqueue is being freed early but someone is already
@@ -6020,7 +6027,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			list_del_init(&poll->wait.entry);
 			poll->head = NULL;
 		}
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 	}
 	return 1;
 }
@@ -6124,7 +6131,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 		/* can't multishot if failed, just queue the event we've got */
 		if (unlikely(ipt->error || !ipt->nr_entries))
 			poll->events |= EPOLLONESHOT;
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 		return 0;
 	}
 	io_add_napi(req->file, req->ctx);
@@ -6135,7 +6142,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 	 */
 	v = atomic_dec_return(&req->poll_refs);
 	if (unlikely(v & IO_POLL_REF_MASK))
-		__io_poll_execute(req, 0);
+		__io_poll_execute(req, 0, poll->events);
 	return 0;
 }
 
@@ -6333,7 +6340,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 		return -EINVAL;
 
 	io_req_set_refcount(req);
-	poll->events = io_poll_parse_events(sqe, flags);
+	req->cflags = poll->events = io_poll_parse_events(sqe, flags);
 	return 0;
 }
 
-- 
GitLab


From 91eac1c69c202d9dad8bf717ae5b92db70bfe5cf Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 16 Mar 2022 16:59:10 -0600
Subject: [PATCH 1507/1586] io_uring: cache poll/double-poll state with a
 request flag

With commit "io_uring: cache req->apoll->events in req->cflags" applied,
we now have just io_poll_remove_entries() dipping into req->apoll when
it isn't strictly necessary.

Mark poll and double-poll with a flag, so we know if we need to look
at apoll->double_poll. This avoids pulling in those cachelines if we
don't need them. The common case is that the poll wake handler already
removed these entries while hot off the completion path.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bfddad7a14ef2..5b5f48f0f81e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -771,6 +771,8 @@ enum {
 	REQ_F_ARM_LTIMEOUT_BIT,
 	REQ_F_ASYNC_DATA_BIT,
 	REQ_F_SKIP_LINK_CQES_BIT,
+	REQ_F_SINGLE_POLL_BIT,
+	REQ_F_DOUBLE_POLL_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -829,6 +831,10 @@ enum {
 	REQ_F_ASYNC_DATA	= BIT(REQ_F_ASYNC_DATA_BIT),
 	/* don't post CQEs while failing linked requests */
 	REQ_F_SKIP_LINK_CQES	= BIT(REQ_F_SKIP_LINK_CQES_BIT),
+	/* single poll may be active */
+	REQ_F_SINGLE_POLL	= BIT(REQ_F_SINGLE_POLL_BIT),
+	/* double poll may active */
+	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 };
 
 struct async_poll {
@@ -5823,8 +5829,12 @@ static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
 
 static void io_poll_remove_entries(struct io_kiocb *req)
 {
-	struct io_poll_iocb *poll = io_poll_get_single(req);
-	struct io_poll_iocb *poll_double = io_poll_get_double(req);
+	/*
+	 * Nothing to do if neither of those flags are set. Avoid dipping
+	 * into the poll/apoll/double cachelines if we can.
+	 */
+	if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
+		return;
 
 	/*
 	 * While we hold the waitqueue lock and the waitqueue is nonempty,
@@ -5842,9 +5852,10 @@ static void io_poll_remove_entries(struct io_kiocb *req)
 	 * In that case, only RCU prevents the queue memory from being freed.
 	 */
 	rcu_read_lock();
-	io_poll_remove_entry(poll);
-	if (poll_double)
-		io_poll_remove_entry(poll_double);
+	if (req->flags & REQ_F_SINGLE_POLL)
+		io_poll_remove_entry(io_poll_get_single(req));
+	if (req->flags & REQ_F_DOUBLE_POLL)
+		io_poll_remove_entry(io_poll_get_double(req));
 	rcu_read_unlock();
 }
 
@@ -6026,6 +6037,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		if (mask && poll->events & EPOLLONESHOT) {
 			list_del_init(&poll->wait.entry);
 			poll->head = NULL;
+			req->flags &= ~REQ_F_SINGLE_POLL;
 		}
 		__io_poll_execute(req, mask, poll->events);
 	}
@@ -6062,12 +6074,14 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 			pt->error = -ENOMEM;
 			return;
 		}
+		req->flags |= REQ_F_DOUBLE_POLL;
 		io_init_poll_iocb(poll, first->events, first->wait.func);
 		*poll_ptr = poll;
 		if (req->opcode == IORING_OP_POLL_ADD)
 			req->flags |= REQ_F_ASYNC_DATA;
 	}
 
+	req->flags |= REQ_F_SINGLE_POLL;
 	pt->nr_entries++;
 	poll->head = head;
 	poll->wait.private = req;
-- 
GitLab


From ae4da18941c1c13a9bd6f1d39888ca9a4ff3db91 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:36 +0000
Subject: [PATCH 1508/1586] io_uring: normilise naming for fill_cqe*

Restore consistency in __io_fill_cqe* like helpers, always honouring
"io_" prefix and adding "req" when we're passing in a request.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/bd016ff5c1a4f74687828069d2619d8a65e0c6d7.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5b5f48f0f81e7..b9013fc14f74e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2019,7 +2019,7 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 	return true;
 }
 
-static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
+static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 				 s32 res, u32 cflags)
 {
 	struct io_uring_cqe *cqe;
@@ -2039,16 +2039,16 @@ static inline bool __fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
 	return io_cqring_event_overflow(ctx, user_data, res, cflags);
 }
 
-static inline bool __io_fill_cqe(struct io_kiocb *req, s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
-	return __fill_cqe(req->ctx, req->user_data, res, cflags);
+	return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
 }
 
 static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
 {
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(req, res, cflags);
+		__io_fill_cqe_req(req, res, cflags);
 }
 
 static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
@@ -2056,7 +2056,7 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 {
 	ctx->cq_extra++;
 	trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
-	return __fill_cqe(ctx, user_data, res, cflags);
+	return __io_fill_cqe(ctx, user_data, res, cflags);
 }
 
 static void __io_req_complete_post(struct io_kiocb *req, s32 res,
@@ -2065,7 +2065,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
 	struct io_ring_ctx *ctx = req->ctx;
 
 	if (!(req->flags & REQ_F_CQE_SKIP))
-		__io_fill_cqe(req, res, cflags);
+		__io_fill_cqe_req(req, res, cflags);
 	/*
 	 * If we're the last reference to this request, add to our locked
 	 * free_list cache.
@@ -2657,7 +2657,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 						    comp_list);
 
 			if (!(req->flags & REQ_F_CQE_SKIP))
-				__io_fill_cqe(req, req->result, req->cflags);
+				__io_fill_cqe_req(req, req->result, req->cflags);
 			if ((req->flags & REQ_F_POLLED) && req->apoll) {
 				struct async_poll *apoll = req->apoll;
 
@@ -2788,7 +2788,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (unlikely(req->flags & REQ_F_CQE_SKIP))
 			continue;
 
-		__io_fill_cqe(req, req->result, io_put_kbuf(req, 0));
+		__io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
 		nr_events++;
 	}
 
-- 
GitLab


From 6695490dc85781fe98b782f36f27c13710dbc921 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:37 +0000
Subject: [PATCH 1509/1586] io_uring: refactor timeout cancellation cqe posting

io_fill_cqe*() is not always the best way to post CQEs just because
there is enough of infrastructure on top. Replace a raw call to a
variant of it inside of io_timeout_cancel(), which also saves us some
bloating and might help with batching later.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/46113ec4345764b4aef3b384ce38cceabaeedcbb.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b9013fc14f74e..15de14d4331b0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6471,10 +6471,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 
 	if (IS_ERR(req))
 		return PTR_ERR(req);
-
-	req_set_fail(req);
-	io_fill_cqe_req(req, -ECANCELED, 0);
-	io_put_req_deferred(req);
+	io_req_task_queue_fail(req, -ECANCELED);
 	return 0;
 }
 
-- 
GitLab


From 3b2b78a8eb7cc38d207c5ee516769bc3f44d19ea Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:38 +0000
Subject: [PATCH 1510/1586] io_uring: extend provided buf return to fails

It's never a good idea to put provided buffers without notifying the
userspace, it'll lead to userspace leaks, so add io_put_kbuf() in
io_req_complete_failed(). The fail helper is called by all sorts of
requests, but it's still safe to do as io_put_kbuf() will return 0 in
for all requests that don't support and so don't expect provided buffers.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a4880106fcf199d5810707fe2d17126fcdf18bc4.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 15de14d4331b0..a90844bf9b9c8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2124,7 +2124,7 @@ static inline void io_req_complete(struct io_kiocb *req, s32 res)
 static void io_req_complete_failed(struct io_kiocb *req, s32 res)
 {
 	req_set_fail(req);
-	io_req_complete_post(req, res, 0);
+	io_req_complete_post(req, res, io_put_kbuf(req, 0));
 }
 
 static void io_req_complete_fail_submit(struct io_kiocb *req)
-- 
GitLab


From b91ef1872869d99cd42e908eb9754b81115c2c05 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 16 Mar 2022 20:24:47 -0600
Subject: [PATCH 1511/1586] io_uring: fix provided buffer return on failure for
 kiocb_done()

Use io_req_complete_failed() in kiocb_done(). This cleans up the code,
but also ensures that a provided buffers is correctly freed on failure.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a4880106fcf199d5810707fe2d17126fcdf18bc4.1647481208.git.asml.silence@gmail.com
[axboe: split from previous patch]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a90844bf9b9c8..f039706bce0e9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3242,14 +3242,10 @@ static void kiocb_done(struct io_kiocb *req, ssize_t ret,
 
 	if (req->flags & REQ_F_REISSUE) {
 		req->flags &= ~REQ_F_REISSUE;
-		if (io_resubmit_prep(req)) {
+		if (io_resubmit_prep(req))
 			io_req_task_queue_reissue(req);
-		} else {
-			req_set_fail(req);
-			req->result = ret;
-			req->io_task_work.func = io_req_task_complete;
-			io_req_task_work_add(req, false);
-		}
+		else
+			io_req_task_queue_fail(req, ret);
 	}
 }
 
-- 
GitLab


From 0f84747177b962c32243a57cb454193bdba4fe8d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:39 +0000
Subject: [PATCH 1512/1586] io_uring: remove extra barrier for non-sqpoll
 iopoll

smp_mb() in io_cqring_ev_posted_iopoll() is only there because of
waitqueue_active(). However, non-SQPOLL IOPOLL ring doesn't wake the CQ
and so the barrier there is useless. Kill it, it's usually pretty
expensive.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d72e8ef6f7a3f6a72e18fad8409f7d47afc8da7d.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f039706bce0e9..692dbe7b98e92 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1877,11 +1877,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
-	/* see waitqueue_active() comment */
-	smp_mb();
-
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (waitqueue_active(&ctx->cq_wait))
+		if (wq_has_sleeper(&ctx->cq_wait))
 			wake_up_all(&ctx->cq_wait);
 	}
 	io_eventfd_signal(ctx);
-- 
GitLab


From 66fc25ca6b7ec4124606e0d59c71c6bcf14e05bb Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:40 +0000
Subject: [PATCH 1513/1586] io_uring: shuffle io_eventfd_signal() bits around

A preparation patch, which moves a fast ->io_ev_fd check out of
io_eventfd_signal() into ev_posted*(). Compilers are smart enough for it
to not change anything, but will need it later.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ec4091ac76d43912b73917e8db651c2dac4b7b01.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 692dbe7b98e92..31c625f61fd82 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1828,10 +1828,6 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd;
 
-	/* Return quickly if ctx->io_ev_fd doesn't exist */
-	if (likely(!rcu_dereference_raw(ctx->io_ev_fd)))
-		return;
-
 	rcu_read_lock();
 	/*
 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
@@ -1851,7 +1847,6 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
 
 	if (!ev_fd->eventfd_async || io_wq_current_is_worker())
 		eventfd_signal(ev_fd->cq_ev_fd, 1);
-
 out:
 	rcu_read_unlock();
 }
@@ -1863,7 +1858,7 @@ out:
  * 1:1 relationship between how many times this function is called (and
  * hence the eventfd count) and number of CQEs posted to the CQ ring.
  */
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
 	/*
 	 * wake_up_all() may seem excessive, but io_wake_function() and
@@ -1872,7 +1867,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 	 */
 	if (wq_has_sleeper(&ctx->cq_wait))
 		wake_up_all(&ctx->cq_wait);
-	io_eventfd_signal(ctx);
+	if (unlikely(rcu_dereference_raw(ctx->io_ev_fd)))
+		io_eventfd_signal(ctx);
 }
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
@@ -1881,7 +1877,8 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 		if (wq_has_sleeper(&ctx->cq_wait))
 			wake_up_all(&ctx->cq_wait);
 	}
-	io_eventfd_signal(ctx);
+	if (unlikely(rcu_dereference_raw(ctx->io_ev_fd)))
+		io_eventfd_signal(ctx);
 }
 
 /* Returns true if there are no backlogged entries after the flush */
-- 
GitLab


From 9333f6b4628c8037a89ed23e1188d4b7dc5d74e4 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:41 +0000
Subject: [PATCH 1514/1586] io_uring: thin down io_commit_cqring()

io_commit_cqring() is currently always under spinlock section, so it's
always better to keep it as slim as possible. Move
__io_commit_cqring_flush() out of it into ev_posted*(). If fast checks
do fail and this post-processing is required, we'll reacquire
->completion_lock, which is fine as we don't care about performance of
draining and offset timeouts.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ec4e81fd720d3bc7bca8cb9152e080dad1a052f1.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 31c625f61fd82..e69531ccd2090 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1779,20 +1779,21 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
 	spin_unlock_irq(&ctx->timeout_lock);
 }
 
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	/* order cqe stores with ring update */
+	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+}
+
 static __cold void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
+	spin_lock(&ctx->completion_lock);
 	if (ctx->off_timeout_used)
 		io_flush_timeouts(ctx);
 	if (ctx->drain_active)
 		io_queue_deferred(ctx);
-}
-
-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
-{
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
-		__io_commit_cqring_flush(ctx);
-	/* order cqe stores with ring update */
-	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
 }
 
 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
@@ -1860,6 +1861,9 @@ out:
  */
 static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+		__io_commit_cqring_flush(ctx);
+
 	/*
 	 * wake_up_all() may seem excessive, but io_wake_function() and
 	 * io_should_wake() handle the termination of the loop and only
@@ -1873,6 +1877,9 @@ static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+		__io_commit_cqring_flush(ctx);
+
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		if (wq_has_sleeper(&ctx->cq_wait))
 			wake_up_all(&ctx->cq_wait);
-- 
GitLab


From 9aa8dfde4869ccdec0a7290b686dbc10e079e163 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Mar 2022 02:03:42 +0000
Subject: [PATCH 1515/1586] io_uring: fold evfd signalling under a slower path

Add ->has_evfd flag, which is true IFF there is an eventfd attached, and
use it to hide io_eventfd_signal() into __io_commit_cqring_flush() and
combine fast checks in a single if. Also, gcc 11.2 wasn't inlining
io_cqring_ev_posted() without this change, so helps with that as well.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f6168471997decded475a063f92915787975a30b.1647481208.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 60 +++++++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e69531ccd2090..68498cebc0ef7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -347,6 +347,7 @@ struct io_ring_ctx {
 		unsigned int		off_timeout_used: 1;
 		unsigned int		drain_active: 1;
 		unsigned int		drain_disabled: 1;
+		unsigned int		has_evfd: 1;
 	} ____cacheline_aligned_in_smp;
 
 	/* submission data */
@@ -1181,6 +1182,7 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
 static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
+static void io_eventfd_signal(struct io_ring_ctx *ctx);
 
 static struct kmem_cache *req_cachep;
 
@@ -1785,15 +1787,19 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx)
 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
 }
 
-static __cold void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
+static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
-	spin_lock(&ctx->completion_lock);
-	if (ctx->off_timeout_used)
-		io_flush_timeouts(ctx);
-	if (ctx->drain_active)
-		io_queue_deferred(ctx);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
+	if (ctx->off_timeout_used || ctx->drain_active) {
+		spin_lock(&ctx->completion_lock);
+		if (ctx->off_timeout_used)
+			io_flush_timeouts(ctx);
+		if (ctx->drain_active)
+			io_queue_deferred(ctx);
+		io_commit_cqring(ctx);
+		spin_unlock(&ctx->completion_lock);
+	}
+	if (ctx->has_evfd)
+		io_eventfd_signal(ctx);
 }
 
 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
@@ -1852,6 +1858,17 @@ out:
 	rcu_read_unlock();
 }
 
+static inline void io_cqring_wake(struct io_ring_ctx *ctx)
+{
+	/*
+	 * wake_up_all() may seem excessive, but io_wake_function() and
+	 * io_should_wake() handle the termination of the loop and only
+	 * wake as many waiters as we need to.
+	 */
+	if (wq_has_sleeper(&ctx->cq_wait))
+		wake_up_all(&ctx->cq_wait);
+}
+
 /*
  * This should only get called when at least one event has been posted.
  * Some applications rely on the eventfd notification count only changing
@@ -1861,31 +1878,21 @@ out:
  */
 static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+		     ctx->has_evfd))
 		__io_commit_cqring_flush(ctx);
 
-	/*
-	 * wake_up_all() may seem excessive, but io_wake_function() and
-	 * io_should_wake() handle the termination of the loop and only
-	 * wake as many waiters as we need to.
-	 */
-	if (wq_has_sleeper(&ctx->cq_wait))
-		wake_up_all(&ctx->cq_wait);
-	if (unlikely(rcu_dereference_raw(ctx->io_ev_fd)))
-		io_eventfd_signal(ctx);
+	io_cqring_wake(ctx);
 }
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
+		     ctx->has_evfd))
 		__io_commit_cqring_flush(ctx);
 
-	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (wq_has_sleeper(&ctx->cq_wait))
-			wake_up_all(&ctx->cq_wait);
-	}
-	if (unlikely(rcu_dereference_raw(ctx->io_ev_fd)))
-		io_eventfd_signal(ctx);
+	if (ctx->flags & IORING_SETUP_SQPOLL)
+		io_cqring_wake(ctx);
 }
 
 /* Returns true if there are no backlogged entries after the flush */
@@ -9898,7 +9905,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 		return ret;
 	}
 	ev_fd->eventfd_async = eventfd_async;
-
+	ctx->has_evfd = true;
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
 	return 0;
 }
@@ -9918,6 +9925,7 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 					lockdep_is_held(&ctx->uring_lock));
 	if (ev_fd) {
+		ctx->has_evfd = false;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
 		call_rcu(&ev_fd->rcu, io_eventfd_put);
 		return 0;
-- 
GitLab


From 4ee06de7729d795773145692e246a06448b1eb7a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 15 Mar 2022 10:20:08 +0100
Subject: [PATCH 1516/1586] net: handle ARPHRD_PIMREG in
 dev_is_mac_header_xmit()

This kind of interface doesn't have a mac header. This patch fixes
bpf_redirect() to a PIM interface.

Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://lore.kernel.org/r/20220315092008.31423-1-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index b712217f70304..1ed52441972f9 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
 	case ARPHRD_RAWIP:
+	case ARPHRD_PIMREG:
 		return false;
 	default:
 		return true;
-- 
GitLab


From e3ee9fb22652f228225c352bd4fabec330cac5f0 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 16 Mar 2022 22:08:43 -0500
Subject: [PATCH 1517/1586] smb3: fix incorrect session setup check for
 multiuser mounts

A recent change to how the SMB3 server (socket) and session status
is managed regressed multiuser mounts by changing the check
for whether session setup is needed to the socket (TCP_Server_info)
structure instead of the session struct (cifs_ses). Add additional
check in cifs_setup_sesion to fix this.

Fixes: 73f9bfbe3d81 ("cifs: maintain a state machine for tcp/smb/tcon sessions")
Reported-by: Ronnie Sahlberg <lsahlber@redhat.com>
Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/connect.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 053cb449eb167..d3020abfe404a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3924,7 +3924,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 
 	/* only send once per connect */
 	spin_lock(&cifs_tcp_ses_lock);
-	if (server->tcpStatus != CifsNeedSessSetup) {
+	if ((server->tcpStatus != CifsNeedSessSetup) &&
+	    (ses->status == CifsGood)) {
 		spin_unlock(&cifs_tcp_ses_lock);
 		return 0;
 	}
-- 
GitLab


From 3c3384050d68570f9de0fec9e58824decfefba7a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 15 Mar 2022 09:45:59 +0100
Subject: [PATCH 1518/1586] drm: Don't make DRM_PANEL_BRIDGE dependent on
 DRM_KMS_HELPERS

Fix a number of undefined references to drm_kms_helper.ko in
drm_dp_helper.ko:

  arm-suse-linux-gnueabi-ld: drivers/gpu/drm/dp/drm_dp_mst_topology.o: in function `drm_dp_mst_duplicate_state':
  drm_dp_mst_topology.c:(.text+0x2df0): undefined reference to `__drm_atomic_helper_private_obj_duplicate_state'
  arm-suse-linux-gnueabi-ld: drivers/gpu/drm/dp/drm_dp_mst_topology.o: in function `drm_dp_delayed_destroy_work':
  drm_dp_mst_topology.c:(.text+0x370c): undefined reference to `drm_kms_helper_hotplug_event'
  arm-suse-linux-gnueabi-ld: drivers/gpu/drm/dp/drm_dp_mst_topology.o: in function `drm_dp_mst_up_req_work':
  drm_dp_mst_topology.c:(.text+0x7938): undefined reference to `drm_kms_helper_hotplug_event'
  arm-suse-linux-gnueabi-ld: drivers/gpu/drm/dp/drm_dp_mst_topology.o: in function `drm_dp_mst_link_probe_work':
  drm_dp_mst_topology.c:(.text+0x82e0): undefined reference to `drm_kms_helper_hotplug_event'

This happens if panel-edp.ko has been configured with

  DRM_PANEL_EDP=y
  DRM_DP_HELPER=y
  DRM_KMS_HELPER=m

which builds DP helpers into the kernel and KMS helpers sa a module.
Making DRM_PANEL_EDP select DRM_KMS_HELPER resolves this problem.

To avoid a resulting cyclic dependency with DRM_PANEL_BRIDGE, don't
make the latter depend on DRM_KMS_HELPER and fix the one DRM bridge
drivers that doesn't already select DRM_KMS_HELPER. As KMS helpers
cannot be selected directly by the user, config symbols should avoid
depending on it anyway.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 3755d35ee1d2 ("drm/panel: Select DRM_DP_HELPER for DRM_PANEL_EDP")
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Tested-by: Brian Masney <bmasney@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: Linux Kernel Functional Testing <lkft@linaro.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: dri-devel@lists.freedesktop.org
Cc: Dave Airlie <airlied@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Link: https://patchwork.freedesktop.org/patch/478296/
---
 drivers/gpu/drm/bridge/Kconfig | 2 +-
 drivers/gpu/drm/panel/Kconfig  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 61db5a66b4934..44ad709396630 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -8,7 +8,6 @@ config DRM_BRIDGE
 config DRM_PANEL_BRIDGE
 	def_bool y
 	depends on DRM_BRIDGE
-	depends on DRM_KMS_HELPER
 	select DRM_PANEL
 	help
 	  DRM bridge wrapper of DRM panels
@@ -30,6 +29,7 @@ config DRM_CDNS_DSI
 config DRM_CHIPONE_ICN6211
 	tristate "Chipone ICN6211 MIPI-DSI/RGB Converter bridge"
 	depends on OF
+	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL_BRIDGE
 	help
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 0aec5a10b064b..9989a316fe886 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -107,6 +107,7 @@ config DRM_PANEL_EDP
 	select VIDEOMODE_HELPERS
 	select DRM_DP_AUX_BUS
 	select DRM_DP_HELPER
+	select DRM_KMS_HELPER
 	help
 	  DRM panel driver for dumb eDP panels that need at most a regulator and
 	  a GPIO to be powered up. Optionally a backlight can be attached so
-- 
GitLab


From cb0b430b4e3acc88c85e0ad2e25f2a25a5765262 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Wed, 16 Mar 2022 08:26:02 +0000
Subject: [PATCH 1519/1586] net: dsa: Add missing of_node_put() in
 dsa_port_parse_of

The device_node pointer is returned by of_parse_phandle()  with refcount
incremented. We should use of_node_put() on it when done.

Fixes: 6d4e5c570c2d ("net: dsa: get port type at parse time")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://lore.kernel.org/r/20220316082602.10785-1-linmq006@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/dsa/dsa2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 074e4a69a7284..88e2808019b47 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1436,6 +1436,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
 		const char *user_protocol;
 
 		master = of_find_net_device_by_node(ethernet);
+		of_node_put(ethernet);
 		if (!master)
 			return -EPROBE_DEFER;
 
-- 
GitLab


From c844d22fe0c0b37dc809adbdde6ceb6462c43acf Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse@tuxedocomputers.com>
Date: Tue, 15 Mar 2022 20:02:28 +0100
Subject: [PATCH 1520/1586] ACPI: video: Force backlight native for Clevo
 NL5xRU and NL5xNU

Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a working
native and video interface. However the default detection mechanism first
registers the video interface before unregistering it again and switching
to the native interface during boot. This results in a dangling SBIOS
request for backlight change for some reason, causing the backlight to
switch to ~2% once per boot on the first power cord connect or disconnect
event. Setting the native interface explicitly circumvents this buggy
behaviour by avoiding the unregistering process.

Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 75 +++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 4f64713e9917b..becc198e4c224 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -415,6 +415,81 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "GA503"),
 		},
 	},
+	/*
+	 * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a
+	 * working native and video interface. However the default detection
+	 * mechanism first registers the video interface before unregistering
+	 * it again and switching to the native interface during boot. This
+	 * results in a dangling SBIOS request for backlight change for some
+	 * reason, causing the backlight to switch to ~2% once per boot on the
+	 * first power cord connect or disconnect event. Setting the native
+	 * interface explicitly circumvents this buggy behaviour, by avoiding
+	 * the unregistering process.
+	 */
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xRU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
+	{
+	.callback = video_detect_force_native,
+	.ident = "Clevo NL5xNU",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+		DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
+		},
+	},
 
 	/*
 	 * Desktops which falsely report a backlight and which our heuristics
-- 
GitLab


From a335b1e6bb29300d3bc6749763a4298627e594ba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 15 Mar 2022 20:35:47 +0100
Subject: [PATCH 1521/1586] cpuidle: intel_idle: Update intel_idle() kerneldoc
 comment

Commit bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic")
moved the leave_mm() call away from intel_idle(), but it didn't update
its kerneldoc comment accordingly, so do that now.

Fixes: bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index e385ddf15b323..4ba4ab974dbeb 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -122,9 +122,6 @@ static unsigned int mwait_substates __initdata;
  * If the local APIC timer is not known to be reliable in the target idle state,
  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
  *
- * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
- * flushing user TLBs.
- *
  * Must be called under local_irq_disable().
  */
 static __cpuidle int intel_idle(struct cpuidle_device *dev,
-- 
GitLab


From 03eb65224e5711e7a2f34b500d44866b322a249a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 15 Mar 2022 20:36:42 +0100
Subject: [PATCH 1522/1586] cpuidle: intel_idle: Drop redundant backslash at
 line end

Drop a redundant backslash character at the end of a line in the
spr_cstates[] definition.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/idle/intel_idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 4ba4ab974dbeb..b7640cfe00201 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -781,7 +781,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
 	{
 		.name = "C1E",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
 					   CPUIDLE_FLAG_UNUSABLE,
 		.exit_latency = 2,
 		.target_residency = 4,
-- 
GitLab


From 2ca8e6285250c07a2e5a22ecbfd59b5a4ef73484 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Mar 2022 13:37:44 +0100
Subject: [PATCH 1523/1586] Revert "ACPI: Pass the same capabilities to the
 _OSC regardless of the query flag"

Revert commit 159d8c274fd9 ("ACPI: Pass the same capabilities to the
_OSC regardless of the query flag") which caused legitimate usage
scenarios (when the platform firmware does not want the OS to control
certain platform features controlled by the system bus scope _OSC) to
break and was misguided by some misleading language in the _OSC
definition in the ACPI specification (in particular, Section 6.2.11.1.3
"Sequence of _OSC Calls" that contradicts other perts of the _OSC
definition).

Link: https://lore.kernel.org/linux-acpi/CAJZ5v0iStA0JmO0H3z+VgQsVuQONVjKPpw0F5HKfiq=Gb6B5yw@mail.gmail.com
Reported-by: Mario Limonciello <Mario.Limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/bus.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 647791d34c19f..c7670d441ad9e 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -332,21 +332,32 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
-	kfree(context.ret.pointer);
+	capbuf_ret = context.ret.pointer;
+	if (context.ret.length <= OSC_SUPPORT_DWORD) {
+		kfree(context.ret.pointer);
+		return;
+	}
 
-	/* Now run _OSC again with query flag clear */
+	/*
+	 * Now run _OSC again with query flag clear and with the caps
+	 * supported by both the OS and the platform.
+	 */
 	capbuf[OSC_QUERY_DWORD] = 0;
+	capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
+	kfree(context.ret.pointer);
 
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
 	capbuf_ret = context.ret.pointer;
-	osc_sb_apei_support_acked =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
-	osc_pc_lpi_support_confirmed =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
-	osc_sb_native_usb4_support_confirmed =
-		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+	if (context.ret.length > OSC_SUPPORT_DWORD) {
+		osc_sb_apei_support_acked =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+		osc_pc_lpi_support_confirmed =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+		osc_sb_native_usb4_support_confirmed =
+			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+	}
 
 	kfree(context.ret.pointer);
 }
-- 
GitLab


From c42fa24b44751c62c86e98430ef915c0609a2ab8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Mar 2022 13:39:03 +0100
Subject: [PATCH 1524/1586] ACPI: bus: Avoid using CPPC if not supported by
 firmware

If the platform firmware indicates that it does not support CPPC by
clearing the OSC_SB_CPC_SUPPORT and OSC_SB_CPCV2_SUPPORT bits in the
platform _OSC capabilities mask, avoid attempting to evaluate _CPC
which may fail in that case.

Because the OSC_SB_CPC_SUPPORT and OSC_SB_CPCV2_SUPPORT bits are only
added to the supported platform capabilities mask on x86, when
X86_FEATURE_HWP is supported, allow _CPC to be evaluated regardless
in the other cases.

Link: https://lore.kernel.org/linux-acpi/CAJZ5v0i=ecAksq0TV+iLVObm-=fUfdqPABzzkgm9K6KxO1ZCcg@mail.gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/bus.c       | 8 ++++++++
 drivers/acpi/cppc_acpi.c | 3 +++
 include/linux/acpi.h     | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index c7670d441ad9e..8aa7860602e7a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -283,6 +283,8 @@ EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
 bool osc_sb_native_usb4_support_confirmed;
 EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed);
 
+bool osc_sb_cppc_not_supported;
+
 static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
 static void acpi_bus_osc_negotiate_platform_control(void)
 {
@@ -338,6 +340,12 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 		return;
 	}
 
+#ifdef CONFIG_X86
+	if (boot_cpu_has(X86_FEATURE_HWP))
+		osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+				(OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#endif
+
 	/*
 	 * Now run _OSC again with query flag clear and with the caps
 	 * supported by both the OS and the platform.
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 866560cbb082c..3e89ad246b60f 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -656,6 +656,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 	acpi_status status;
 	int ret = -EFAULT;
 
+	if (osc_sb_cppc_not_supported)
+		return -ENODEV;
+
 	/* Parse the ACPI _CPC table for this CPU. */
 	status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output,
 			ACPI_TYPE_PACKAGE);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6274758648e37..690b0533104ae 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -580,6 +580,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 extern bool osc_sb_apei_support_acked;
 extern bool osc_pc_lpi_support_confirmed;
 extern bool osc_sb_native_usb4_support_confirmed;
+extern bool osc_sb_cppc_not_supported;
 
 /* USB4 Capabilities */
 #define OSC_USB_USB3_TUNNELING			0x00000001
-- 
GitLab


From 3870a44d50feeb5118a2698617d251123d9cde4c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 16 Mar 2022 16:55:48 -0500
Subject: [PATCH 1525/1586] cpufreq: powernow-k8: Re-order the init checks

The powernow-k8 driver will do checks at startup that the current
active driver is acpi-cpufreq and show a warning when they're not
expected.

Because of this the following warning comes up on systems that
support amd-pstate and compiled in both drivers:
`WTF driver: amd-pstate`

The systems that support powernow-k8 will not support amd-pstate,
so re-order the checks to validate the CPU model number first to
avoid this warning being displayed on modern SOCs.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernow-k8.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 12ab4014af712..d289036beff23 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1172,14 +1172,14 @@ static int powernowk8_init(void)
 	unsigned int i, supported_cpus = 0;
 	int ret;
 
+	if (!x86_match_cpu(powernow_k8_ids))
+		return -ENODEV;
+
 	if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) {
 		__request_acpi_cpufreq();
 		return -ENODEV;
 	}
 
-	if (!x86_match_cpu(powernow_k8_ids))
-		return -ENODEV;
-
 	cpus_read_lock();
 	for_each_online_cpu(i) {
 		smp_call_function_single(i, check_supported_cpu, &ret, 1);
-- 
GitLab


From a1108dcd9373a98f7018aa4310076260b8ecfc0b Mon Sep 17 00:00:00 2001
From: David Anderson <dvander@google.com>
Date: Thu, 17 Mar 2022 19:49:59 +0800
Subject: [PATCH 1526/1586] erofs: rename ctime to mtime

EROFS images should inherit modification time rather than change time,
since users and host tooling have no easy way to control change time.

To reflect the new timestamp meaning, i_ctime and i_ctime_nsec are
renamed to i_mtime and i_mtime_nsec.

Link: https://lore.kernel.org/r/20220311041829.3109511-1-dvander@google.com # v1
Signed-off-by: David Anderson <dvander@google.com>
[ Gao Xiang: update document as well. ]
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20220317114959.106787-1-hsiangkao@linux.alibaba.com # v2
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 Documentation/filesystems/erofs.rst | 2 +-
 fs/erofs/erofs_fs.h                 | 5 +++--
 fs/erofs/inode.c                    | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 7119aa213be71..bef6d3040ce4a 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -40,7 +40,7 @@ Here is the main features of EROFS:
    Inode metadata size    32 bytes      64 bytes
    Max file size          4 GB          16 EB (also limited by max. vol size)
    Max uids/gids          65536         4294967296
-   File change time       no            yes (64 + 32-bit timestamp)
+   Per-inode timestamp    no            yes (64 + 32-bit timestamp)
    Max hardlinks          65536         4294967296
    Metadata reserved      4 bytes       14 bytes
    =====================  ============  =====================================
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 3ea62c6fb00a7..1238ca104f099 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -12,6 +12,7 @@
 #define EROFS_SUPER_OFFSET      1024
 
 #define EROFS_FEATURE_COMPAT_SB_CHKSUM          0x00000001
+#define EROFS_FEATURE_COMPAT_MTIME              0x00000002
 
 /*
  * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -186,8 +187,8 @@ struct erofs_inode_extended {
 
 	__le32 i_uid;
 	__le32 i_gid;
-	__le64 i_ctime;
-	__le32 i_ctime_nsec;
+	__le64 i_mtime;
+	__le32 i_mtime_nsec;
 	__le32 i_nlink;
 	__u8   i_reserved2[16];
 };
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index ff62f84f47d30..e8b37ba5e9ad0 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -113,8 +113,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		set_nlink(inode, le32_to_cpu(die->i_nlink));
 
 		/* extended inode has its own timestamp */
-		inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
-		inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
+		inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
+		inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
 
 		inode->i_size = le64_to_cpu(die->i_size);
 
-- 
GitLab


From f1858c277ba40172005b76a31e6bb931bfc19d9c Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Wed, 16 Mar 2022 16:18:35 +0100
Subject: [PATCH 1527/1586] net: phy: mscc: Add MODULE_FIRMWARE macros

The driver requires firmware so define MODULE_FIRMWARE so that modinfo
provides the details.

Fixes: fa164e40c53b ("net: phy: mscc: split the driver into separate files")
Signed-off-by: Juerg Haefliger <juergh@canonical.com>
Link: https://lore.kernel.org/r/20220316151835.88765-1-juergh@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mscc/mscc_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index ebfeeb3c67c1d..7e3017e7a1c03 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
 MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
 MODULE_AUTHOR("Nagaraju Lakkaraju");
 MODULE_LICENSE("Dual MIT/GPL");
+
+MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW);
+MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW);
-- 
GitLab


From bcfe9b6cbb4438b8c1cc4bd475221652c8f9301b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 16 Mar 2022 12:20:02 -0700
Subject: [PATCH 1528/1586] virtio_blk: eliminate anonymous module_init &
 module_exit

Eliminate anonymous module_init() and module_exit(), which can lead to
confusion or ambiguity when reading System.map, crashes/oops/bugs,
or an initcall_debug log.

Give each of these init and exit functions unique driver-specific
names to eliminate the anonymous names.

Example 1: (System.map)
 ffffffff832fc78c t init
 ffffffff832fc79e t init
 ffffffff832fc8f8 t init

Example 2: (initcall_debug log)
 calling  init+0x0/0x12 @ 1
 initcall init+0x0/0x12 returned 0 after 15 usecs
 calling  init+0x0/0x60 @ 1
 initcall init+0x0/0x60 returned 0 after 2 usecs
 calling  init+0x0/0x9a @ 1
 initcall init+0x0/0x9a returned 0 after 74 usecs

Fixes: e467cde23818 ("Block driver using virtio.")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220316192010.19001-2-rdunlap@infradead.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/virtio_blk.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5c636ca7f1a7f..fa8abf982c640 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1022,7 +1022,7 @@ static struct virtio_driver virtio_blk = {
 #endif
 };
 
-static int __init init(void)
+static int __init virtio_blk_init(void)
 {
 	int error;
 
@@ -1048,14 +1048,14 @@ out_destroy_workqueue:
 	return error;
 }
 
-static void __exit fini(void)
+static void __exit virtio_blk_fini(void)
 {
 	unregister_virtio_driver(&virtio_blk);
 	unregister_blkdev(major, "virtblk");
 	destroy_workqueue(virtblk_wq);
 }
-module_init(init);
-module_exit(fini);
+module_init(virtio_blk_init);
+module_exit(virtio_blk_fini);
 
 MODULE_DEVICE_TABLE(virtio, id_table);
 MODULE_DESCRIPTION("Virtio block driver");
-- 
GitLab


From 98237fcda4a24e67b0a4498c17d5aa4ad4537bc7 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 7 Mar 2022 17:27:46 -0800
Subject: [PATCH 1529/1586] f2fs: use spin_lock to avoid hang

[14696.634553] task:cat             state:D stack:    0 pid:1613738 ppid:1613735 flags:0x00000004
[14696.638285] Call Trace:
[14696.639038]  <TASK>
[14696.640032]  __schedule+0x302/0x930
[14696.640969]  schedule+0x58/0xd0
[14696.641799]  schedule_preempt_disabled+0x18/0x30
[14696.642890]  __mutex_lock.constprop.0+0x2fb/0x4f0
[14696.644035]  ? mod_objcg_state+0x10c/0x310
[14696.645040]  ? obj_cgroup_charge+0xe1/0x170
[14696.646067]  __mutex_lock_slowpath+0x13/0x20
[14696.647126]  mutex_lock+0x34/0x40
[14696.648070]  stat_show+0x25/0x17c0 [f2fs]
[14696.649218]  seq_read_iter+0x120/0x4b0
[14696.650289]  ? aa_file_perm+0x12a/0x500
[14696.651357]  ? lru_cache_add+0x1c/0x20
[14696.652470]  seq_read+0xfd/0x140
[14696.653445]  full_proxy_read+0x5c/0x80
[14696.654535]  vfs_read+0xa0/0x1a0
[14696.655497]  ksys_read+0x67/0xe0
[14696.656502]  __x64_sys_read+0x1a/0x20
[14696.657580]  do_syscall_64+0x3b/0xc0
[14696.658671]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[14696.660068] RIP: 0033:0x7efe39df1cb2
[14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2
[14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003
[14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010
[14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000
[14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[14696.671965]  </TASK>
[14696.672826] task:umount          state:D stack:    0 pid:1614985 ppid:1614984 flags:0x00004000
[14696.674930] Call Trace:
[14696.675903]  <TASK>
[14696.676780]  __schedule+0x302/0x930
[14696.677927]  schedule+0x58/0xd0
[14696.679019]  schedule_preempt_disabled+0x18/0x30
[14696.680412]  __mutex_lock.constprop.0+0x2fb/0x4f0
[14696.681783]  ? destroy_inode+0x65/0x80
[14696.683006]  __mutex_lock_slowpath+0x13/0x20
[14696.684305]  mutex_lock+0x34/0x40
[14696.685442]  f2fs_destroy_stats+0x1e/0x60 [f2fs]
[14696.686803]  f2fs_put_super+0x158/0x390 [f2fs]
[14696.688238]  generic_shutdown_super+0x7a/0x120
[14696.689621]  kill_block_super+0x27/0x50
[14696.690894]  kill_f2fs_super+0x7f/0x100 [f2fs]
[14696.692311]  deactivate_locked_super+0x35/0xa0
[14696.693698]  deactivate_super+0x40/0x50
[14696.694985]  cleanup_mnt+0x139/0x190
[14696.696209]  __cleanup_mnt+0x12/0x20
[14696.697390]  task_work_run+0x64/0xa0
[14696.698587]  exit_to_user_mode_prepare+0x1b7/0x1c0
[14696.700053]  syscall_exit_to_user_mode+0x27/0x50
[14696.701418]  do_syscall_64+0x48/0xc0
[14696.702630]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index cba5eab24595f..6d26872c7364d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,7 +21,7 @@
 #include "gc.h"
 
 static LIST_HEAD(f2fs_stat_list);
-static DEFINE_MUTEX(f2fs_stat_mutex);
+static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
 #ifdef CONFIG_DEBUG_FS
 static struct dentry *f2fs_debugfs_root;
 #endif
@@ -345,8 +345,9 @@ static int stat_show(struct seq_file *s, void *v)
 {
 	struct f2fs_stat_info *si;
 	int i = 0, j = 0;
+	unsigned long flags;
 
-	mutex_lock(&f2fs_stat_mutex);
+	raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
 	list_for_each_entry(si, &f2fs_stat_list, stat_list) {
 		update_general_status(si->sbi);
 
@@ -577,7 +578,7 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - paged : %llu KB\n",
 				si->page_mem >> 10);
 	}
-	mutex_unlock(&f2fs_stat_mutex);
+	raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
 	return 0;
 }
 
@@ -588,6 +589,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_stat_info *si;
+	unsigned long flags;
 	int i;
 
 	si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -623,9 +625,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	atomic_set(&sbi->max_aw_cnt, 0);
 	atomic_set(&sbi->max_vw_cnt, 0);
 
-	mutex_lock(&f2fs_stat_mutex);
+	raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
-	mutex_unlock(&f2fs_stat_mutex);
+	raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
 
 	return 0;
 }
@@ -633,10 +635,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_stat_info *si = F2FS_STAT(sbi);
+	unsigned long flags;
 
-	mutex_lock(&f2fs_stat_mutex);
+	raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
 	list_del(&si->stat_list);
-	mutex_unlock(&f2fs_stat_mutex);
+	raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
 
 	kfree(si);
 }
-- 
GitLab


From 646f64b576f7a80af0dd555e25c79ee8af058681 Mon Sep 17 00:00:00 2001
From: Wang Xiaojun <wangxiaojun11@huawei.com>
Date: Mon, 14 Mar 2022 15:15:15 +0800
Subject: [PATCH 1530/1586] f2fs: remove redundant parameter judgment

iput() has already judged the incoming parameter, so there is
no need to repeat the judgment here.

Signed-off-by: Wang Xiaojun <wangxiaojun11@huawei.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2b23a76bdae92..906e9e301ac80 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1111,8 +1111,7 @@ out_dir:
 out_old:
 	f2fs_put_page(old_page, 0);
 out:
-	if (whiteout)
-		iput(whiteout);
+	iput(whiteout);
 	return err;
 }
 
-- 
GitLab


From d284af43f703760e261b1601378a0c13a19d5f1f Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Wed, 16 Mar 2022 18:20:00 +0800
Subject: [PATCH 1531/1586] f2fs: compress: fix to print raw data size in error
 path of lz4 decompression

In lz4_decompress_pages(), if size of decompressed data is not equal to
expected one, we should print the size rather than size of target buffer
for decompressed data, fix it.

Signed-off-by: Chao Yu <chao.yu@oppo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 67bac2792e571..11e99bf6286c7 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
 	}
 
 	if (ret != PAGE_SIZE << dic->log_cluster_size) {
-		printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, "
+		printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, "
 					"expected:%lu\n", KERN_ERR,
-					F2FS_I_SB(dic->inode)->sb->s_id,
-					dic->rlen,
+					F2FS_I_SB(dic->inode)->sb->s_id, ret,
 					PAGE_SIZE << dic->log_cluster_size);
 		return -EIO;
 	}
-- 
GitLab


From d98af5f4552058a5c22030641ef79cee92c61f54 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Tue, 15 Mar 2022 21:14:14 -0700
Subject: [PATCH 1532/1586] f2fs: introduce gc_urgent_mid mode

We need a mid level of gc urgent mode to do GC forcibly in a period
of given gc_urgent_sleep_time, but not like using greedy GC approach
and switching to SSR mode such as gc urgent high mode. This can be
used for more aggressive periodic storage clean up.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 17 +++++++++++------
 fs/f2fs/debug.c                         |  4 +++-
 fs/f2fs/f2fs.h                          |  4 ++++
 fs/f2fs/gc.c                            |  3 +++
 fs/f2fs/sysfs.c                         |  7 +++++++
 5 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 5a5f3c5445f66..9b583dd0298b7 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -297,11 +297,16 @@ Description:	Shows current reserved blocks in system, it may be temporarily
 What:		/sys/fs/f2fs/<disk>/gc_urgent
 Date:		August 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	Do background GC aggressively when set. When gc_urgent = 1,
-		background thread starts to do GC by given gc_urgent_sleep_time
-		interval. When gc_urgent = 2, F2FS will lower the bar of
-		checking idle in order to process outstanding discard commands
-		and GC a little bit aggressively. It is set to 0 by default.
+Description:	Do background GC aggressively when set. Set to 0 by default.
+		gc urgent high(1): does GC forcibly in a period of given
+		gc_urgent_sleep_time and ignores I/O idling check. uses greedy
+		GC approach and turns SSR mode on.
+		gc urgent low(2): lowers the bar of checking I/O idling in
+		order to process outstanding discard commands and GC a
+		little bit aggressively. uses cost benefit GC approach.
+		gc urgent mid(3): does GC forcibly in a period of given
+		gc_urgent_sleep_time and executes a mid level of I/O idling check.
+		uses cost benefit GC approach.
 
 What:		/sys/fs/f2fs/<disk>/gc_urgent_sleep_time
 Date:		August 2017
@@ -532,7 +537,7 @@ Date:		July 2021
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	Show how many segments have been reclaimed by GC during a specific
 		GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
-		3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
+		3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
 		You can re-initialize this value to "0".
 
 What:		/sys/fs/f2fs/<disk>/gc_segment_mode
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 6d26872c7364d..fcdf253cd211e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -476,12 +476,14 @@ static int stat_show(struct seq_file *s, void *v)
 				si->node_segs, si->bg_node_segs);
 		seq_printf(s, "  - Reclaimed segs : Normal (%d), Idle CB (%d), "
 				"Idle Greedy (%d), Idle AT (%d), "
-				"Urgent High (%d), Urgent Low (%d)\n",
+				"Urgent High (%d), Urgent Mid (%d), "
+				"Urgent Low (%d)\n",
 				si->sbi->gc_reclaimed_segs[GC_NORMAL],
 				si->sbi->gc_reclaimed_segs[GC_IDLE_CB],
 				si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY],
 				si->sbi->gc_reclaimed_segs[GC_IDLE_AT],
 				si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH],
+				si->sbi->gc_reclaimed_segs[GC_URGENT_MID],
 				si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
 		seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
 				si->bg_data_blks + si->bg_node_blks);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index da729f53daa8a..3016b6354a68a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1313,6 +1313,7 @@ enum {
 	GC_IDLE_AT,
 	GC_URGENT_HIGH,
 	GC_URGENT_LOW,
+	GC_URGENT_MID,
 	MAX_GC_MODE,
 };
 
@@ -2784,6 +2785,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 	if (is_inflight_io(sbi, type))
 		return false;
 
+	if (sbi->gc_mode == GC_URGENT_MID)
+		return true;
+
 	if (sbi->gc_mode == GC_URGENT_LOW &&
 			(type == DISCARD_TIME || type == GC_TIME))
 		return true;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2d53ef121e76e..ea5b93b689cd5 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -103,7 +103,10 @@ static int gc_thread_func(void *data)
 				sbi->gc_urgent_high_remaining--;
 			}
 			spin_unlock(&sbi->gc_urgent_high_lock);
+		}
 
+		if (sbi->gc_mode == GC_URGENT_HIGH ||
+				sbi->gc_mode == GC_URGENT_MID) {
 			wait_ms = gc_th->urgent_sleep_time;
 			f2fs_down_write(&sbi->gc_lock);
 			goto do_gc;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 47efcf233afdd..fe29bcb70f469 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -468,6 +468,13 @@ out:
 			}
 		} else if (t == 2) {
 			sbi->gc_mode = GC_URGENT_LOW;
+		} else if (t == 3) {
+			sbi->gc_mode = GC_URGENT_MID;
+			if (sbi->gc_thread) {
+				sbi->gc_thread->gc_wake = 1;
+				wake_up_interruptible_all(
+					&sbi->gc_thread->gc_wait_queue_head);
+			}
 		} else {
 			return -EINVAL;
 		}
-- 
GitLab


From 424e7834e293936a54fcf05173f2884171adc5a3 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Wed, 16 Mar 2022 14:46:13 -0700
Subject: [PATCH 1533/1586] bnx2x: fix built-in kernel driver load failure

Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
added request_firmware() logic in probe() which caused
load failure when firmware file is not present in initrd (below),
as access to firmware file is not feasible during probe.

  Direct firmware load for bnx2x/bnx2x-e2-7.13.15.0.fw failed with error -2
  Direct firmware load for bnx2x/bnx2x-e2-7.13.21.0.fw failed with error -2

This patch fixes this issue by -

1. Removing request_firmware() logic from the probe()
   such that .ndo_open() handle it as it used to handle
   it earlier

2. Given request_firmware() is removed from probe(), so
   driver has to relax FW version comparisons a bit against
   the already loaded FW version (by some other PFs of same
   adapter) to allow different compatible/close enough FWs with which
   multiple PFs may run with (in different environments), as the
   given PF who is in probe flow has no idea now with which firmware
   file version it is going to initialize the device in ndo_open()

Link: https://lore.kernel.org/all/46f2d9d9-ae7f-b332-ddeb-b59802be2bab@molgen.mpg.de/
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Paul Menzel <pmenzel@molgen.mpg.de>
Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0")
Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Link: https://lore.kernel.org/r/20220316214613.6884-1-manishc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h   |  2 --
 .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   | 28 +++++++++++--------
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 15 ++--------
 3 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index a19dd6797070c..2209d99b34047 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp);
  * Meant for implicit re-load flows.
  */
 int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
-int bnx2x_init_firmware(struct bnx2x *bp);
-void bnx2x_release_firmware(struct bnx2x *bp);
 #endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8d36ebbf08e16..5729a5ab059d7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
 	/* is another pf loaded on this engine? */
 	if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
 	    load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
-		/* build my FW version dword */
-		u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
-				(bp->fw_rev << 16) + (bp->fw_eng << 24);
+		u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng;
+		u32 loaded_fw;
 
 		/* read loaded FW from chip */
-		u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+		loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
 
-		DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n",
-		   loaded_fw, my_fw);
+		loaded_fw_major = loaded_fw & 0xff;
+		loaded_fw_minor = (loaded_fw >> 8) & 0xff;
+		loaded_fw_rev = (loaded_fw >> 16) & 0xff;
+		loaded_fw_eng = (loaded_fw >> 24) & 0xff;
+
+		DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n",
+		   loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng);
 
 		/* abort nic load if version mismatch */
-		if (my_fw != loaded_fw) {
+		if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION ||
+		    loaded_fw_minor != BCM_5710_FW_MINOR_VERSION ||
+		    loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION ||
+		    loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) {
 			if (print_err)
-				BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n",
-					  loaded_fw, my_fw);
+				BNX2X_ERR("loaded FW incompatible. Aborting\n");
 			else
-				BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n",
-					       loaded_fw, my_fw);
+				BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n");
+
 			return -EBUSY;
 		}
 	}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index eedb48d945ed0..c19b072f3a237 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12319,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 
 	bnx2x_read_fwinfo(bp);
 
-	if (IS_PF(bp)) {
-		rc = bnx2x_init_firmware(bp);
-
-		if (rc) {
-			bnx2x_free_mem_bp(bp);
-			return rc;
-		}
-	}
-
 	func = BP_FUNC(bp);
 
 	/* need to reset chip if undi was active */
@@ -12340,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 
 		rc = bnx2x_prev_unload(bp);
 		if (rc) {
-			bnx2x_release_firmware(bp);
 			bnx2x_free_mem_bp(bp);
 			return rc;
 		}
@@ -13409,7 +13399,7 @@ do {									\
 	     (u8 *)bp->arr, len);					\
 } while (0)
 
-int bnx2x_init_firmware(struct bnx2x *bp)
+static int bnx2x_init_firmware(struct bnx2x *bp)
 {
 	const char *fw_file_name, *fw_file_name_v15;
 	struct bnx2x_fw_file_hdr *fw_hdr;
@@ -13509,7 +13499,7 @@ request_firmware_exit:
 	return rc;
 }
 
-void bnx2x_release_firmware(struct bnx2x *bp)
+static void bnx2x_release_firmware(struct bnx2x *bp)
 {
 	kfree(bp->init_ops_offsets);
 	kfree(bp->init_ops);
@@ -14026,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 	return 0;
 
 init_one_freemem:
-	bnx2x_release_firmware(bp);
 	bnx2x_free_mem_bp(bp);
 
 init_one_exit:
-- 
GitLab


From 0f643c88c8d240eba0ea25c2e095a46515ff46e9 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 16 Mar 2022 18:28:12 -0700
Subject: [PATCH 1534/1586] net: bcmgenet: skip invalid partial checksums

The RXCHK block will return a partial checksum of 0 if it encounters
a problem while receiving a packet. Since a 1's complement sum can
only produce this result if no bits are set in the received data
stream it is fair to treat it as an invalid partial checksum and
not pass it up the stack.

Fixes: 810155397890 ("net: bcmgenet: use CHECKSUM_COMPLETE for NETIF_F_RXCSUM")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220317012812.1313196-1-opendmb@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 87f1056e29ff2..2da804f84b480 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 		dma_length_status = status->length_status;
 		if (dev->features & NETIF_F_RXCSUM) {
 			rx_csum = (__force __be16)(status->rx_csum & 0xffff);
-			skb->csum = (__force __wsum)ntohs(rx_csum);
-			skb->ip_summed = CHECKSUM_COMPLETE;
+			if (rx_csum) {
+				skb->csum = (__force __wsum)ntohs(rx_csum);
+				skb->ip_summed = CHECKSUM_COMPLETE;
+			}
 		}
 
 		/* DMA flags and length are still valid no matter how
-- 
GitLab


From 8e0341aefcc9133f3f48683873284b169581315b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 16 Mar 2022 21:21:17 +0200
Subject: [PATCH 1535/1586] net: mscc: ocelot: fix backwards compatibility with
 single-chain tc-flower offload

ACL rules can be offloaded to VCAP IS2 either through chain 0, or, since
the blamed commit, through a chain index whose number encodes a specific
PAG (Policy Action Group) and lookup number.

The chain number is translated through ocelot_chain_to_pag() into a PAG,
and through ocelot_chain_to_lookup() into a lookup number.

The problem with the blamed commit is that the above 2 functions don't
have special treatment for chain 0. So ocelot_chain_to_pag(0) returns
filter->pag = 224, which is in fact -32, but the "pag" field is an u8.

So we end up programming the hardware with VCAP IS2 entries having a PAG
of 224. But the way in which the PAG works is that it defines a subset
of VCAP IS2 filters which should match on a packet. The default PAG is
0, and previous VCAP IS1 rules (which we offload using 'goto') can
modify it. So basically, we are installing filters with a PAG on which
no packet will ever match. This is the hardware equivalent of adding
filters to a chain which has no 'goto' to it.

Restore the previous functionality by making ACL filters offloaded to
chain 0 go to PAG 0 and lookup number 0. The choice of PAG is clearly
correct, but the choice of lookup number isn't "as before" (which was to
leave the lookup a "don't care"). However, lookup 0 should be fine,
since even though there are ACL actions (policers) which have a
requirement to be used in a specific lookup, that lookup is 0.

Fixes: 226e9cd82a96 ("net: mscc: ocelot: only install TCAM entries into a specific lookup and PAG")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20220316192117.2568261-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mscc/ocelot_flower.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 949858891973d..fdb4d7e7296c8 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -60,6 +60,12 @@ static int ocelot_chain_to_block(int chain, bool ingress)
  */
 static int ocelot_chain_to_lookup(int chain)
 {
+	/* Backwards compatibility with older, single-chain tc-flower
+	 * offload support in Ocelot
+	 */
+	if (chain == 0)
+		return 0;
+
 	return (chain / VCAP_LOOKUP) % 10;
 }
 
@@ -68,7 +74,15 @@ static int ocelot_chain_to_lookup(int chain)
  */
 static int ocelot_chain_to_pag(int chain)
 {
-	int lookup = ocelot_chain_to_lookup(chain);
+	int lookup;
+
+	/* Backwards compatibility with older, single-chain tc-flower
+	 * offload support in Ocelot
+	 */
+	if (chain == 0)
+		return 0;
+
+	lookup = ocelot_chain_to_lookup(chain);
 
 	/* calculate PAG value as chain index relative to the first PAG */
 	return chain - VCAP_IS2_CHAIN(lookup, 0);
-- 
GitLab


From b04683ff8f0823b869c219c78ba0d974bddea0b5 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Thu, 17 Mar 2022 11:45:24 +0100
Subject: [PATCH 1536/1586] iavf: Fix hang during reboot/shutdown

Recent commit 974578017fc1 ("iavf: Add waiting so the port is
initialized in remove") adds a wait-loop at the beginning of
iavf_remove() to ensure that port initialization is finished
prior unregistering net device. This causes a regression
in reboot/shutdown scenario because in this case callback
iavf_shutdown() is called and this callback detaches the device,
makes it down if it is running and sets its state to __IAVF_REMOVE.
Later shutdown callback of associated PF driver (e.g. ice_shutdown)
is called. That callback calls among other things sriov_disable()
that calls indirectly iavf_remove() (see stack trace below).
As the adapter state is already __IAVF_REMOVE then the mentioned
loop is end-less and shutdown process hangs.

The patch fixes this by checking adapter's state at the beginning
of iavf_remove() and skips the rest of the function if the adapter
is already in remove state (shutdown is in progress).

Reproducer:
1. Create VF on PF driven by ice or i40e driver
2. Ensure that the VF is bound to iavf driver
3. Reboot

[52625.981294] sysrq: SysRq : Show Blocked State
[52625.988377] task:reboot          state:D stack:    0 pid:17359 ppid:     1 f2
[52625.996732] Call Trace:
[52625.999187]  __schedule+0x2d1/0x830
[52626.007400]  schedule+0x35/0xa0
[52626.010545]  schedule_hrtimeout_range_clock+0x83/0x100
[52626.020046]  usleep_range+0x5b/0x80
[52626.023540]  iavf_remove+0x63/0x5b0 [iavf]
[52626.027645]  pci_device_remove+0x3b/0xc0
[52626.031572]  device_release_driver_internal+0x103/0x1f0
[52626.036805]  pci_stop_bus_device+0x72/0xa0
[52626.040904]  pci_stop_and_remove_bus_device+0xe/0x20
[52626.045870]  pci_iov_remove_virtfn+0xba/0x120
[52626.050232]  sriov_disable+0x2f/0xe0
[52626.053813]  ice_free_vfs+0x7c/0x340 [ice]
[52626.057946]  ice_remove+0x220/0x240 [ice]
[52626.061967]  ice_shutdown+0x16/0x50 [ice]
[52626.065987]  pci_device_shutdown+0x34/0x60
[52626.070086]  device_shutdown+0x165/0x1c5
[52626.074011]  kernel_restart+0xe/0x30
[52626.077593]  __do_sys_reboot+0x1d2/0x210
[52626.093815]  do_syscall_64+0x5b/0x1a0
[52626.097483]  entry_SYSCALL_64_after_hwframe+0x65/0xca

Fixes: 974578017fc1 ("iavf: Add waiting so the port is initialized in remove")
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Link: https://lore.kernel.org/r/20220317104524.2802848-1-ivecera@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 45570e3f782e8..0e178a0a59c5d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -4620,6 +4620,13 @@ static void iavf_remove(struct pci_dev *pdev)
 	struct iavf_hw *hw = &adapter->hw;
 	int err;
 
+	/* When reboot/shutdown is in progress no need to do anything
+	 * as the adapter is already REMOVE state that was set during
+	 * iavf_shutdown() callback.
+	 */
+	if (adapter->state == __IAVF_REMOVE)
+		return;
+
 	set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
 	/* Wait until port initialization is complete.
 	 * There are flows where register/unregister netdev may race.
-- 
GitLab


From ebc4cb43ea5ada3db46c80156fca58a54b9bbca8 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 16 Mar 2022 17:53:17 +0000
Subject: [PATCH 1537/1586] spi: Fix erroneous sgs value with min_t()

While computing sgs in spi_map_buf(), the data type
used in min_t() for max_seg_size is 'unsigned int' where
as that of ctlr->max_dma_len is 'size_t'.

min_t(unsigned int,x,y) gives wrong results if one of x/y is
'size_t'

Consider the below examples on a 64-bit machine (ie size_t is
64-bits, and unsigned int is 32-bit).
    case 1) min_t(unsigned int, 5, 0x100000001);
    case 2) min_t(size_t, 5, 0x100000001);

Case 1 returns '1', where as case 2 returns '5'. As you can see
the result from case 1 is wrong.

This patch fixes the above issue by using the data type of the
parameters that are used in min_t with maximum data length.

Fixes: commit 1a4e53d2fc4f68aa ("spi: Fix invalid sgs value")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20220316175317.465-1-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 6937cf2d59e07..c4dd1200fe991 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1000,10 +1000,10 @@ int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
 	int i, ret;
 
 	if (vmalloced_buf || kmap_buf) {
-		desc_len = min_t(unsigned int, max_seg_size, PAGE_SIZE);
+		desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE);
 		sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
 	} else if (virt_addr_valid(buf)) {
-		desc_len = min_t(unsigned int, max_seg_size, ctlr->max_dma_len);
+		desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len);
 		sgs = DIV_ROUND_UP(len, desc_len);
 	} else {
 		return -EINVAL;
-- 
GitLab


From 029c4628b2eb2ca969e9bf979b05dc18d8d5575e Mon Sep 17 00:00:00 2001
From: Guo Ziliang <guo.ziliang@zte.com.cn>
Date: Wed, 16 Mar 2022 16:15:03 -0700
Subject: [PATCH 1538/1586] mm: swap: get rid of livelock in swapin readahead

In our testing, a livelock task was found.  Through sysrq printing, same
stack was found every time, as follows:

  __swap_duplicate+0x58/0x1a0
  swapcache_prepare+0x24/0x30
  __read_swap_cache_async+0xac/0x220
  read_swap_cache_async+0x58/0xa0
  swapin_readahead+0x24c/0x628
  do_swap_page+0x374/0x8a0
  __handle_mm_fault+0x598/0xd60
  handle_mm_fault+0x114/0x200
  do_page_fault+0x148/0x4d0
  do_translation_fault+0xb0/0xd4
  do_mem_abort+0x50/0xb0

The reason for the livelock is that swapcache_prepare() always returns
EEXIST, indicating that SWAP_HAS_CACHE has not been cleared, so that it
cannot jump out of the loop.  We suspect that the task that clears the
SWAP_HAS_CACHE flag never gets a chance to run.  We try to lower the
priority of the task stuck in a livelock so that the task that clears
the SWAP_HAS_CACHE flag will run.  The results show that the system
returns to normal after the priority is lowered.

In our testing, multiple real-time tasks are bound to the same core, and
the task in the livelock is the highest priority task of the core, so
the livelocked task cannot be preempted.

Although cond_resched() is used by __read_swap_cache_async, it is an
empty function in the preemptive system and cannot achieve the purpose
of releasing the CPU.  A high-priority task cannot release the CPU
unless preempted by a higher-priority task.  But when this task is
already the highest priority task on this core, other tasks will not be
able to be scheduled.  So we think we should replace cond_resched() with
schedule_timeout_uninterruptible(1), schedule_timeout_interruptible will
call set_current_state first to set the task state, so the task will be
removed from the running queue, so as to achieve the purpose of giving
up the CPU and prevent it from running in kernel mode for too long.

(akpm: ugly hack becomes uglier.  But it fixes the issue in a
backportable-to-stable fashion while we hopefully work on something
better)

Link: https://lkml.kernel.org/r/20220221111749.1928222-1-cgel.zte@gmail.com
Signed-off-by: Guo Ziliang <guo.ziliang@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roger Quadros <rogerq@kernel.org>
Cc: Ziliang Guo <guo.ziliang@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8d41042421000..ee67164531c06 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
 		 * in swap_map, but not yet added its page to swap cache.
 		 */
-		cond_resched();
+		schedule_timeout_uninterruptible(1);
 	}
 
 	/*
-- 
GitLab


From 8208257d2d04d4953a8cb9f1426d245a95c4fea2 Mon Sep 17 00:00:00 2001
From: Qian Cai <quic_qiancai@quicinc.com>
Date: Wed, 16 Mar 2022 16:15:06 -0700
Subject: [PATCH 1539/1586] configs/debug: restore DEBUG_INFO=y for overriding

Previously, I failed to realize that Kees' patch [1] has not been merged
into the mainline yet, and dropped DEBUG_INFO=y too eagerly from the
mainline.  As the results, "make debug.config" won't be able to flip
DEBUG_INFO=n from the existing .config.  This should close the gaps of a
few weeks before Kees' patch is there, and work regardless of their
merging status anyway.

Link: https://lore.kernel.org/all/20220125075126.891825-1-keescook@chromium.org/ [1]
Link: https://lkml.kernel.org/r/20220308153524.8618-1-quic_qiancai@quicinc.com
Signed-off-by: Qian Cai <quic_qiancai@quicinc.com>
Reported-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/configs/debug.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 07df6d93c4df2..e8db8d9386617 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -16,6 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
 #
 # Compile-time checks and compiler options
 #
+CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_FRAME_WARN=2048
-- 
GitLab


From 7b0b1332cfdb94489836b67d088a779699f8e47e Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Wed, 16 Mar 2022 16:15:09 -0700
Subject: [PATCH 1540/1586] ocfs2: fix crash when initialize filecheck kobj
 fails

Once s_root is set, genric_shutdown_super() will be called if
fill_super() fails.  That means, we will call ocfs2_dismount_volume()
twice in such case, which can lead to kernel crash.

Fix this issue by initializing filecheck kobj before setting s_root.

Link: https://lkml.kernel.org/r/20220310081930.86305-1-joseph.qi@linux.alibaba.com
Fixes: 5f483c4abb50 ("ocfs2: add kobject for online file check")
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/super.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2772dec9dcea4..8bde30fa5387d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1105,17 +1105,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 		goto read_super_error;
 	}
 
-	root = d_make_root(inode);
-	if (!root) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto read_super_error;
-	}
-
-	sb->s_root = root;
-
-	ocfs2_complete_mount_recovery(osb);
-
 	osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
 						&ocfs2_kset->kobj);
 	if (!osb->osb_dev_kset) {
@@ -1133,6 +1122,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 		goto read_super_error;
 	}
 
+	root = d_make_root(inode);
+	if (!root) {
+		status = -ENOMEM;
+		mlog_errno(status);
+		goto read_super_error;
+	}
+
+	sb->s_root = root;
+
+	ocfs2_complete_mount_recovery(osb);
+
 	if (ocfs2_mount_local(osb))
 		snprintf(nodestr, sizeof(nodestr), "local");
 	else
-- 
GitLab


From 1c4debc443ef7037dcb7c4f08c33b9caebd21d2e Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Wed, 16 Mar 2022 16:15:12 -0700
Subject: [PATCH 1541/1586] selftests: vm: fix clang build error multiple
 output files

When building the vm selftests using clang, some errors are seen due to
having headers in the compilation command:

  clang -Wall -I ../../../../usr/include  -no-pie    gup_test.c ../../../../mm/gup_test.h -lrt -lpthread -o .../tools/testing/selftests/vm/gup_test
  clang: error: cannot specify -o when generating multiple output files
  make[1]: *** [../lib.mk:146: .../tools/testing/selftests/vm/gup_test] Error 1

Rework to add the header files to LOCAL_HDRS before including ../lib.mk,
since the dependency is evaluated in '$(OUTPUT)/%:%.c $(LOCAL_HDRS)' in
file lib.mk.

Link: https://lkml.kernel.org/r/20220304000645.1888133-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1607322a112c9..a14b5b8008970 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for vm selftests
 
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
 include local_config.mk
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -140,10 +142,6 @@ endif
 
 $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
 
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
 # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
 $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
 
-- 
GitLab


From 2b41be484abd81c6a43d4b171943d67af6f7c5c3 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 17 Mar 2022 22:36:30 +0100
Subject: [PATCH 1542/1586] parisc: Enable ARCH_HAS_DEBUG_VM_PGTABLE

Allow to enable page table boot-up checks.

Suggested-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fbb114049a6b2..864b4756dcdf5 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,6 +15,7 @@ config PARISC
 	select ARCH_SUPPORTS_HUGETLBFS if PA20
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_STACKWALK
+	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select HAVE_RELIABLE_STACKTRACE
 	select DMA_OPS
 	select RTC_CLASS
-- 
GitLab


From 46162ac2a6a425674b0e20a3e0668b21187f94bf Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 17 Mar 2022 23:01:09 +0100
Subject: [PATCH 1543/1586] parisc: Improve CPU socket and core bootup info
 text

Improve CPU bootup info text from:
 CPU1: thread -1, cpu 0, socket 1
to
 CPU1: cpu core 0 of socket 1

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/topology.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c
index 0a10e4ddc528f..e88a6ce7c96d5 100644
--- a/arch/parisc/kernel/topology.c
+++ b/arch/parisc/kernel/topology.c
@@ -101,8 +101,8 @@ void __init store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
-	pr_info("CPU%u: thread %d, cpu %d, socket %d\n",
-		cpuid, cpu_topology[cpuid].thread_id,
+	pr_info("CPU%u: cpu core %d of socket %d\n",
+		cpuid,
 		cpu_topology[cpuid].core_id,
 		cpu_topology[cpuid].socket_id);
 }
-- 
GitLab


From dbc7d452e7cf7d3ebc0064e68d30e28d86d3939a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 17 Mar 2022 17:20:10 -0600
Subject: [PATCH 1544/1586] io_uring: manage provided buffers strictly ordered

Workloads using provided buffers benefit from using and returning buffers
in the right order, and so does TLBs for that matter. Manage the internal
buffer list in a straight list, rather than use the head buffer as the
insertion node. Use a hashed list for the buffer group IDs instead of
xarray, the overhead is much lower this way. xarray provides internal
locking and other trickery that is handy for some uses cases, but
io_uring already locks internally for the buffer manipulation and needs
none of that.

This is good for about a 2% reduction in overhead, combination of the
improved management and the fact that the workload has an easier time
bundling back provided buffers.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 154 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 92 insertions(+), 62 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 68498cebc0ef7..690bfeaa609aa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -264,6 +264,12 @@ struct io_rsrc_data {
 	bool				quiesce;
 };
 
+struct io_buffer_list {
+	struct list_head list;
+	struct list_head buf_list;
+	__u16 bgid;
+};
+
 struct io_buffer {
 	struct list_head list;
 	__u64 addr;
@@ -334,6 +340,8 @@ struct io_ev_fd {
 	struct rcu_head		rcu;
 };
 
+#define IO_BUFFERS_HASH_BITS	5
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -386,7 +394,7 @@ struct io_ring_ctx {
 		struct list_head	timeout_list;
 		struct list_head	ltimeout_list;
 		struct list_head	cq_overflow_list;
-		struct xarray		io_buffers;
+		struct list_head	*io_buffers;
 		struct list_head	io_buffers_cache;
 		struct list_head	apoll_cache;
 		struct xarray		personalities;
@@ -1361,10 +1369,25 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
 	return cflags;
 }
 
+static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+						 unsigned int bgid)
+{
+	struct list_head *hash_list;
+	struct io_buffer_list *bl;
+
+	hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	list_for_each_entry(bl, hash_list, list)
+		if (bl->bgid == bgid || bgid == -1U)
+			return bl;
+
+	return NULL;
+}
+
 static void io_kbuf_recycle(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head, *buf;
+	struct io_buffer_list *bl;
+	struct io_buffer *buf;
 
 	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
 		return;
@@ -1372,21 +1395,8 @@ static void io_kbuf_recycle(struct io_kiocb *req)
 	lockdep_assert_held(&ctx->uring_lock);
 
 	buf = req->kbuf;
-
-	head = xa_load(&ctx->io_buffers, buf->bgid);
-	if (head) {
-		list_add(&buf->list, &head->list);
-	} else {
-		int ret;
-
-		INIT_LIST_HEAD(&buf->list);
-
-		/* if we fail, just leave buffer attached */
-		ret = xa_insert(&ctx->io_buffers, buf->bgid, buf, GFP_KERNEL);
-		if (unlikely(ret < 0))
-			return;
-	}
-
+	bl = io_buffer_get_list(ctx, buf->bgid);
+	list_add(&buf->list, &bl->buf_list);
 	req->flags &= ~REQ_F_BUFFER_SELECTED;
 	req->kbuf = NULL;
 }
@@ -1501,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
 static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
 	struct io_ring_ctx *ctx;
-	int hash_bits;
+	int i, hash_bits;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -1528,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	/* set invalid range, so io_import_fixed() fails meeting it */
 	ctx->dummy_ubuf->ubuf = -1UL;
 
+	ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
+					sizeof(struct list_head), GFP_KERNEL);
+	if (!ctx->io_buffers)
+		goto err;
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
+		INIT_LIST_HEAD(&ctx->io_buffers[i]);
+
 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
 		goto err;
@@ -1539,7 +1556,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 	INIT_LIST_HEAD(&ctx->apoll_cache);
 	init_completion(&ctx->ref_comp);
-	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->cq_wait);
@@ -1568,6 +1584,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 err:
 	kfree(ctx->dummy_ubuf);
 	kfree(ctx->cancel_hash);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 	return NULL;
 }
@@ -3351,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
 		mutex_lock(&ctx->uring_lock);
 }
 
+static void io_buffer_add_list(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned int bgid)
+{
+	struct list_head *list;
+
+	list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
+	INIT_LIST_HEAD(&bl->buf_list);
+	bl->bgid = bgid;
+	list_add(&bl->list, list);
+}
+
 static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 					  int bgid, unsigned int issue_flags)
 {
 	struct io_buffer *kbuf = req->kbuf;
-	struct io_buffer *head;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer_list *bl;
 
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		return kbuf;
 
-	io_ring_submit_lock(req->ctx, needs_lock);
+	io_ring_submit_lock(ctx, needs_lock);
 
-	lockdep_assert_held(&req->ctx->uring_lock);
+	lockdep_assert_held(&ctx->uring_lock);
 
-	head = xa_load(&req->ctx->io_buffers, bgid);
-	if (head) {
-		if (!list_empty(&head->list)) {
-			kbuf = list_last_entry(&head->list, struct io_buffer,
-							list);
-			list_del(&kbuf->list);
-		} else {
-			kbuf = head;
-			xa_erase(&req->ctx->io_buffers, bgid);
-		}
+	bl = io_buffer_get_list(ctx, bgid);
+	if (bl && !list_empty(&bl->buf_list)) {
+		kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
+		list_del(&kbuf->list);
 		if (*len > kbuf->len)
 			*len = kbuf->len;
 		req->flags |= REQ_F_BUFFER_SELECTED;
@@ -4669,8 +4692,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
 	return 0;
 }
 
-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
-			       int bgid, unsigned nbufs)
+static int __io_remove_buffers(struct io_ring_ctx *ctx,
+			       struct io_buffer_list *bl, unsigned nbufs)
 {
 	unsigned i = 0;
 
@@ -4679,17 +4702,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 		return 0;
 
 	/* the head kbuf is the list itself */
-	while (!list_empty(&buf->list)) {
+	while (!list_empty(&bl->buf_list)) {
 		struct io_buffer *nxt;
 
-		nxt = list_first_entry(&buf->list, struct io_buffer, list);
+		nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
 		list_del(&nxt->list);
 		if (++i == nbufs)
 			return i;
 		cond_resched();
 	}
 	i++;
-	xa_erase(&ctx->io_buffers, bgid);
 
 	return i;
 }
@@ -4698,7 +4720,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
@@ -4707,9 +4729,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	lockdep_assert_held(&ctx->uring_lock);
 
 	ret = -ENOENT;
-	head = xa_load(&ctx->io_buffers, p->bgid);
-	if (head)
-		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
+	bl = io_buffer_get_list(ctx, p->bgid);
+	if (bl)
+		ret = __io_remove_buffers(ctx, bl, p->nbufs);
 	if (ret < 0)
 		req_set_fail(req);
 
@@ -4798,7 +4820,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
 }
 
 static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
-			  struct io_buffer **head)
+			  struct io_buffer_list *bl)
 {
 	struct io_buffer *buf;
 	u64 addr = pbuf->addr;
@@ -4810,30 +4832,24 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
 			break;
 		buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
 					list);
-		list_del(&buf->list);
+		list_move_tail(&buf->list, &bl->buf_list);
 		buf->addr = addr;
 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
 		buf->bid = bid;
 		buf->bgid = pbuf->bgid;
 		addr += pbuf->len;
 		bid++;
-		if (!*head) {
-			INIT_LIST_HEAD(&buf->list);
-			*head = buf;
-		} else {
-			list_add_tail(&buf->list, &(*head)->list);
-		}
 		cond_resched();
 	}
 
-	return i ? i : -ENOMEM;
+	return i ? 0 : -ENOMEM;
 }
 
 static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_buffer *head, *list;
+	struct io_buffer_list *bl;
 	int ret = 0;
 	bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
 
@@ -4841,14 +4857,18 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
 	lockdep_assert_held(&ctx->uring_lock);
 
-	list = head = xa_load(&ctx->io_buffers, p->bgid);
-
-	ret = io_add_buffers(ctx, p, &head);
-	if (ret >= 0 && !list) {
-		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
-		if (ret < 0)
-			__io_remove_buffers(ctx, head, p->bgid, -1U);
+	bl = io_buffer_get_list(ctx, p->bgid);
+	if (unlikely(!bl)) {
+		bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+		if (!bl) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		io_buffer_add_list(ctx, bl, p->bgid);
 	}
+
+	ret = io_add_buffers(ctx, p, bl);
+err:
 	if (ret < 0)
 		req_set_fail(req);
 	/* complete before unlock, IOPOLL may need the lock */
@@ -9936,11 +9956,20 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-	struct io_buffer *buf;
-	unsigned long index;
+	int i;
+
+	for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
+		struct list_head *list = &ctx->io_buffers[i];
 
-	xa_for_each(&ctx->io_buffers, index, buf)
-		__io_remove_buffers(ctx, buf, index, -1U);
+		while (!list_empty(list)) {
+			struct io_buffer_list *bl;
+
+			bl = list_first_entry(list, struct io_buffer_list, list);
+			__io_remove_buffers(ctx, bl, -1U);
+			list_del(&bl->list);
+			kfree(bl);
+		}
+	}
 
 	while (!list_empty(&ctx->io_buffers_pages)) {
 		struct page *page;
@@ -10049,6 +10078,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_free_napi_list(ctx);
 	kfree(ctx->cancel_hash);
 	kfree(ctx->dummy_ubuf);
+	kfree(ctx->io_buffers);
 	kfree(ctx);
 }
 
-- 
GitLab


From 4e1b04af4fe6679e63d1bc09f750424882ab701c Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 17 Mar 2022 11:42:22 -0700
Subject: [PATCH 1545/1586] nfsd: use correct format characters

When compiling with -Wformat, clang emits the following warnings:

fs/nfsd/flexfilelayout.c:120:27: warning: format specifies type 'unsigned
char' but the argument has type 'int' [-Wformat]
                         "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
                             ~~~~              ^~~~~~~~~
                             %d
fs/nfsd/flexfilelayout.c:120:38: warning: format specifies type 'unsigned
char' but the argument has type 'int' [-Wformat]
                         "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
                                  ~~~~                    ^~~~~~~~~~~
                                  %d

The types of these arguments are unconditionally defined, so this patch
updates the format character to the correct ones for ints and unsigned
ints.

Link: https://github.com/ClangBuiltLinux/linux/issues/378
Signed-off-by: Bill Wendling <morbo@google.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Tom Haynes <loghyr@hammerspace.com>
---
 fs/nfsd/flexfilelayout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index 2e2f1d5e9f623..070f90ed09b61 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -117,7 +117,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
 
 	da->netaddr.addr_len =
 		snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
-			 "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
+			 "%s.%d.%d", addr, port >> 8, port & 0xff);
 
 	da->tightly_coupled = false;
 
-- 
GitLab


From 572299f03afd676dd4e20669cdaf5ed0fe1379d4 Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Date: Fri, 18 Mar 2022 11:26:41 +0900
Subject: [PATCH 1546/1586] block: limit request dispatch loop duration

When IO requests are made continuously and the target block device
handles requests faster than request arrival, the request dispatch loop
keeps on repeating to dispatch the arriving requests very long time,
more than a minute. Since the loop runs as a workqueue worker task, the
very long loop duration triggers workqueue watchdog timeout and BUG [1].

To avoid the very long loop duration, break the loop periodically. When
opportunity to dispatch requests still exists, check need_resched(). If
need_resched() returns true, the dispatch loop already consumed its time
slice, then reschedule the dispatch work and break the loop. With heavy
IO load, need_resched() does not return true for 20~30 seconds. To cover
such case, check time spent in the dispatch loop with jiffies. If more
than 1 second is spent, reschedule the dispatch work and break the loop.

[1]

[  609.691437] BUG: workqueue lockup - pool cpus=10 node=1 flags=0x0 nice=-20 stuck for 35s!
[  609.701820] Showing busy workqueues and worker pools:
[  609.707915] workqueue events: flags=0x0
[  609.712615]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[  609.712626]     pending: drm_fb_helper_damage_work [drm_kms_helper]
[  609.712687] workqueue events_freezable: flags=0x4
[  609.732943]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[  609.732952]     pending: pci_pme_list_scan
[  609.732968] workqueue events_power_efficient: flags=0x80
[  609.751947]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[  609.751955]     pending: neigh_managed_work
[  609.752018] workqueue kblockd: flags=0x18
[  609.769480]   pwq 21: cpus=10 node=1 flags=0x0 nice=-20 active=3/256 refcnt=4
[  609.769488]     in-flight: 1020:blk_mq_run_work_fn
[  609.769498]     pending: blk_mq_timeout_work, blk_mq_run_work_fn
[  609.769744] pool 21: cpus=10 node=1 flags=0x0 nice=-20 hung=35s workers=2 idle: 67
[  639.899730] BUG: workqueue lockup - pool cpus=10 node=1 flags=0x0 nice=-20 stuck for 66s!
[  639.909513] Showing busy workqueues and worker pools:
[  639.915404] workqueue events: flags=0x0
[  639.920197]   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
[  639.920215]     pending: drm_fb_helper_damage_work [drm_kms_helper]
[  639.920365] workqueue kblockd: flags=0x18
[  639.939932]   pwq 21: cpus=10 node=1 flags=0x0 nice=-20 active=3/256 refcnt=4
[  639.939942]     in-flight: 1020:blk_mq_run_work_fn
[  639.939955]     pending: blk_mq_timeout_work, blk_mq_run_work_fn
[  639.940212] pool 21: cpus=10 node=1 flags=0x0 nice=-20 hung=66s workers=2 idle: 67

Fixes: 6e6fcbc27e778 ("blk-mq: support batching dispatch in case of io")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lore.kernel.org/linux-block/20220310091649.zypaem5lkyfadymg@shindev/
Link: https://lore.kernel.org/r/20220318022641.133484-1-shinichiro.kawasaki@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index e6ad8f7614742..9e56a69422b65 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -180,11 +180,18 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 
 static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 {
+	unsigned long end = jiffies + HZ;
 	int ret;
 
 	do {
 		ret = __blk_mq_do_dispatch_sched(hctx);
-	} while (ret == 1);
+		if (ret != 1)
+			break;
+		if (need_resched() || time_is_before_jiffies(end)) {
+			blk_mq_delay_run_hw_queue(hctx, 0);
+			break;
+		}
+	} while (1);
 
 	return ret;
 }
-- 
GitLab


From a5436af598779219b375c1977555c82def1c35d0 Mon Sep 17 00:00:00 2001
From: Brandon Wyman <bjwyman@gmail.com>
Date: Thu, 17 Mar 2022 23:21:23 +0000
Subject: [PATCH 1547/1586] hwmon: (pmbus) Add Vin unit off handling

If there is an input undervoltage fault, reported in STATUS_INPUT
command response, there is quite likely a "Unit Off For Insufficient
Input Voltage" condition as well.

Add a constant for bit 3 of STATUS_INPUT. Update the Vin limit
attributes to include both bits in the mask for clearing faults.

If an input undervoltage fault occurs, causing a unit off for
insufficient input voltage, but the unit is off bit is not cleared, the
STATUS_WORD will not be updated to clear the input fault condition.
Including the unit is off bit (bit 3) allows for the input fault
condition to completely clear.

Signed-off-by: Brandon Wyman <bjwyman@gmail.com>
Link: https://lore.kernel.org/r/20220317232123.2103592-1-bjwyman@gmail.com
Fixes: b4ce237b7f7d3 ("hwmon: (pmbus) Introduce infrastructure to detect sensors and limit registers")
[groeck: Dropped unnecessary ()]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus.h      | 1 +
 drivers/hwmon/pmbus/pmbus_core.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index 38f049d68d329..e74b6ef070f3e 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -319,6 +319,7 @@ enum pmbus_fan_mode { percent = 0, rpm };
 /*
  * STATUS_VOUT, STATUS_INPUT
  */
+#define PB_VOLTAGE_VIN_OFF		BIT(3)
 #define PB_VOLTAGE_UV_FAULT		BIT(4)
 #define PB_VOLTAGE_UV_WARNING		BIT(5)
 #define PB_VOLTAGE_OV_WARNING		BIT(6)
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 8eed7968a50ef..b2618b1d529e2 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -1373,7 +1373,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = {
 		.reg = PMBUS_VIN_UV_FAULT_LIMIT,
 		.attr = "lcrit",
 		.alarm = "lcrit_alarm",
-		.sbit = PB_VOLTAGE_UV_FAULT,
+		.sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF,
 	}, {
 		.reg = PMBUS_VIN_OV_WARN_LIMIT,
 		.attr = "max",
-- 
GitLab


From c86868bbc22be9487d10d3c6336dd8ccb49e8a62 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Thu, 17 Mar 2022 16:33:15 +0800
Subject: [PATCH 1548/1586] f2fs: initialize sbi->gc_mode explicitly

It needs to initialized sbi->gc_mode to GC_NORMAL explicitly.

Signed-off-by: Chao Yu <chao.yu@oppo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4b570b5c26748..9176597fdf940 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3579,6 +3579,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
 	F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
 	sbi->cur_victim_sec = NULL_SECNO;
+	sbi->gc_mode = GC_NORMAL;
 	sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
 	sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
 	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
-- 
GitLab


From 9b046d0245cec982c72a65d3ea1b834959a9708b Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 9 Nov 2021 22:25:02 +0100
Subject: [PATCH 1549/1586] parisc: Avoid using hardware single-step in kprobes

This patch changes the kprobe and kretprobe feature to use another
break instruction instead of relying on the hardware single-step
feature.
That way those kprobes now work in qemu as well, because in qemu we
don't emulate yet single-stepping.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/kprobes.h |  5 +++--
 arch/parisc/kernel/kprobes.c      | 28 +++++++++++++++++-----------
 arch/parisc/kernel/traps.c        | 10 ++++------
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index 904034da49745..0a175ac876980 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -18,8 +18,9 @@
 #include <linux/notifier.h>
 
 #define PARISC_KPROBES_BREAK_INSN	0x3ff801f
+#define PARISC_KPROBES_BREAK_INSN2	0x3ff801e
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE 1
+#define MAX_INSN_SIZE 2
 
 typedef u32 kprobe_opcode_t;
 struct kprobe;
@@ -29,7 +30,7 @@ void arch_remove_kprobe(struct kprobe *p);
 #define flush_insn_slot(p) \
 	flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \
 			   (unsigned long)&(p)->ainsn.insn[0] + \
-			   sizeof(kprobe_opcode_t))
+			   MAX_INSN_SIZE*sizeof(kprobe_opcode_t))
 
 #define kretprobe_blacklist_size    0
 
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index e2bdb5a5f93e9..3343d2fb78897 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -5,6 +5,7 @@
  * PA-RISC kprobes implementation
  *
  * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ * Copyright (c) 2022 Helge Deller <deller@gmx.de>
  */
 
 #include <linux/types.h>
@@ -25,9 +26,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	if (!p->ainsn.insn)
 		return -ENOMEM;
 
-	memcpy(p->ainsn.insn, p->addr,
-		MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	/*
+	 * Set up new instructions. Second break instruction will
+	 * trigger call of parisc_kprobe_ss_handler().
+	 */
 	p->opcode = *p->addr;
+	p->ainsn.insn[0] = p->opcode;
+	p->ainsn.insn[1] = PARISC_KPROBES_BREAK_INSN2;
+
 	flush_insn_slot(p);
 	return 0;
 }
@@ -73,9 +79,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
 {
 	kcb->iaoq[0] = regs->iaoq[0];
 	kcb->iaoq[1] = regs->iaoq[1];
-	regs->iaoq[0] = (unsigned long)p->ainsn.insn;
-	mtctl(0, 0);
-	regs->gr[0] |= PSW_R;
+	instruction_pointer_set(regs, (unsigned long)p->ainsn.insn);
 }
 
 int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs)
@@ -165,9 +169,8 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
 		regs->iaoq[0] = kcb->iaoq[1];
 		break;
 	default:
-		regs->iaoq[1] = kcb->iaoq[0];
-		regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4;
 		regs->iaoq[0] = kcb->iaoq[1];
+		regs->iaoq[1] = regs->iaoq[0] + 4;
 		break;
 	}
 	kcb->kprobe_status = KPROBE_HIT_SSDONE;
@@ -191,14 +194,17 @@ static struct kprobe trampoline_p = {
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
 					      struct pt_regs *regs)
 {
-	unsigned long orig_ret_address;
-
-	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
-	instruction_pointer_set(regs, orig_ret_address);
+	__kretprobe_trampoline_handler(regs, NULL);
 
 	return 1;
 }
 
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+				 kprobe_opcode_t *correct_ret_addr)
+{
+	regs->gr[2] = (unsigned long)correct_ret_addr;
+}
+
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 39576a9245c7f..a6e61cf2cad04 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -302,7 +302,10 @@ static void handle_break(struct pt_regs *regs)
 		parisc_kprobe_break_handler(regs);
 		return;
 	}
-
+	if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) {
+		parisc_kprobe_ss_handler(regs);
+		return;
+	}
 #endif
 
 #ifdef CONFIG_KGDB
@@ -539,11 +542,6 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 		/* Recovery counter trap */
 		regs->gr[0] &= ~PSW_R;
 
-#ifdef CONFIG_KPROBES
-		if (parisc_kprobe_ss_handler(regs))
-			return;
-#endif
-
 #ifdef CONFIG_KGDB
 		if (kgdb_single_step) {
 			kgdb_handle_exception(0, SIGTRAP, 0, regs);
-- 
GitLab


From 7f34b43e07cb512b28543fdcb9f35d1fbfda9ebc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 16 Mar 2022 19:38:18 +0100
Subject: [PATCH 1550/1586] arm64: fix clang warning about TRAMP_VALIAS

The newly introduced TRAMP_VALIAS definition causes a build warning
with clang-14:

arch/arm64/include/asm/vectors.h:66:31: error: arithmetic on a null pointer treated as a cast from integer to pointer is a GNU extension [-Werror,-Wnull-pointer-arithmetic]
                return (char *)TRAMP_VALIAS + SZ_2K * slot;

Change the addition to something clang does not complain about.

Fixes: bd09128d16fa ("arm64: Add percpu vectors for EL1")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20220316183833.1563139-1-arnd@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/vectors.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
index f64613a96d530..bc9a2145f4194 100644
--- a/arch/arm64/include/asm/vectors.h
+++ b/arch/arm64/include/asm/vectors.h
@@ -56,14 +56,14 @@ enum arm64_bp_harden_el1_vectors {
 DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
 
 #ifndef CONFIG_UNMAP_KERNEL_AT_EL0
-#define TRAMP_VALIAS	0
+#define TRAMP_VALIAS	0ul
 #endif
 
 static inline const char *
 arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
 {
 	if (arm64_kernel_unmapped_at_el0())
-		return (char *)TRAMP_VALIAS + SZ_2K * slot;
+		return (char *)(TRAMP_VALIAS + SZ_2K * slot);
 
 	WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
 
-- 
GitLab


From 6a861abceecb68497dd82a324fee45a5332dcece Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 17 Mar 2022 08:39:39 -0700
Subject: [PATCH 1551/1586] clocksource: acpi_pm: fix return value of __setup
 handler

__setup() handlers should return 1 to obsolete_checksetup() in
init/main.c to indicate that the boot option has been handled.
A return of 0 causes the boot option/value to be listed as an Unknown
kernel parameter and added to init's (limited) environment strings.

The __setup() handler interface isn't meant to handle negative return
values -- they are non-zero, so they mean "handled" (like a return
value of 1 does), but that's just a quirk. So return 1 from
parse_pmtmr(). Also print a warning message if kstrtouint() returns
an error.

Fixes: 6b148507d3d0 ("pmtmr: allow command line override of ioport")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/clocksource/acpi_pm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index eb596ff9e7bb3..279ddff81ab49 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg)
 	int ret;
 
 	ret = kstrtouint(arg, 16, &base);
-	if (ret)
-		return ret;
+	if (ret) {
+		pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg);
+		return 1;
+	}
 
 	pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport,
 		base);
-- 
GitLab


From 316e46f65a5497839857db08b6fbf60f568b165a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 16 Mar 2022 19:37:45 +0100
Subject: [PATCH 1552/1586] arm64: errata: avoid duplicate field initializer

The '.type' field is initialized both in place and in the macro
as reported by this W=1 warning:

arch/arm64/include/asm/cpufeature.h:281:9: error: initialized field overwritten [-Werror=override-init]
  281 |         (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
      |         ^
arch/arm64/kernel/cpu_errata.c:136:17: note: in expansion of macro 'ARM64_CPUCAP_LOCAL_CPU_ERRATUM'
  136 |         .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,                         \
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/arm64/kernel/cpu_errata.c:145:9: note: in expansion of macro 'ERRATA_MIDR_RANGE'
  145 |         ERRATA_MIDR_RANGE(m, var, r_min, var, r_max)
      |         ^~~~~~~~~~~~~~~~~
arch/arm64/kernel/cpu_errata.c:613:17: note: in expansion of macro 'ERRATA_MIDR_REV_RANGE'
  613 |                 ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
      |                 ^~~~~~~~~~~~~~~~~~~~~
arch/arm64/include/asm/cpufeature.h:281:9: note: (near initialization for 'arm64_errata[18].type')
  281 |         (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
      |         ^

Remove the extranous initializer.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 1dd498e5e26a ("KVM: arm64: Workaround Cortex-A510's single-step and PAC trap errata")
Link: https://lore.kernel.org/r/20220316183800.1546731-1-arnd@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a401180e8d66c..146fa2e76834d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -611,7 +611,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "ARM erratum 2077057",
 		.capability = ARM64_WORKAROUND_2077057,
-		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
 	},
 #endif
-- 
GitLab


From ee37eddbfa9e0401f13a01691cf4bbbacd2d16c9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 18 Mar 2022 21:01:42 +0800
Subject: [PATCH 1553/1586] block: avoid use-after-free on throttle data

In throtl_pending_timer_fn(), request queue is retrieved from throttle
data. And tg's pending timer is deleted synchronously when releasing the
associated blkg, at that time, throttle data may have been freed since
commit 1059699f87eb ("block: move blkcg initialization/destroy into disk
allocation/release handler") moves freeing q->td to disk_release() from
blk_release_queue(). So use-after-free on q->td in throtl_pending_timer_fn
can be triggered.

Fixes the issue by:

- do nothing in case that disk is released, when there isn't any bio to
  dispatch

- retrieve request queue from blkg instead of throttle data for
non top-level pending timer.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220318130144.1066064-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a3b3ebc72dd47..880701d0106fb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1137,12 +1137,22 @@ static void throtl_pending_timer_fn(struct timer_list *t)
 	struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
 	struct throtl_grp *tg = sq_to_tg(sq);
 	struct throtl_data *td = sq_to_td(sq);
-	struct request_queue *q = td->queue;
 	struct throtl_service_queue *parent_sq;
+	struct request_queue *q;
 	bool dispatched;
 	int ret;
 
+	/* throtl_data may be gone, so figure out request queue by blkg */
+	if (tg)
+		q = tg->pd.blkg->q;
+	else
+		q = td->queue;
+
 	spin_lock_irq(&q->queue_lock);
+
+	if (!q->root_blkg)
+		goto out_unlock;
+
 	if (throtl_can_upgrade(td, NULL))
 		throtl_upgrade_state(td);
 
-- 
GitLab


From 0a9a25ca78437b39e691bcc3dc8240455b803d8d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 18 Mar 2022 21:01:43 +0800
Subject: [PATCH 1554/1586] block: let blkcg_gq grab request queue's refcnt

In the whole lifetime of blkcg_gq instance, ->q will be referred, such
as, ->pd_free_fn() is called in blkg_free, and throtl_pd_free() still
may touch the request queue via &tg->service_queue.pending_timer which
is handled by throtl_pending_timer_fn(), so it is reasonable to grab
request queue's refcnt by blkcg_gq instance.

Previously blkcg_exit_queue() is called from blk_release_queue, and it
is hard to avoid the use-after-free. But recently commit 1059699f87eb ("block:
move blkcg initialization/destroy into disk allocation/release handler")
is merged to for-5.18/block, it becomes simple to fix the issue by simply
grabbing request queue's refcnt.

Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220318130144.1066064-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fa063c6c0338e..d53b0d69dd736 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -82,6 +82,8 @@ static void blkg_free(struct blkcg_gq *blkg)
 		if (blkg->pd[i])
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
+	if (blkg->q)
+		blk_put_queue(blkg->q);
 	free_percpu(blkg->iostat_cpu);
 	percpu_ref_exit(&blkg->refcnt);
 	kfree(blkg);
@@ -167,6 +169,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	if (!blkg->iostat_cpu)
 		goto err_free;
 
+	if (!blk_get_queue(q))
+		goto err_free;
+
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	spin_lock_init(&blkg->async_bio_lock);
-- 
GitLab


From 8f9e7b65f833cb9a4b2e2f54a049d74df394d906 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 18 Mar 2022 21:01:44 +0800
Subject: [PATCH 1555/1586] block: cancel all throttled bios in del_gendisk()

Throttled bios can't be issued after del_gendisk() is done, thus
it's better to cancel them immediately rather than waiting for
throttle is done.

For example, if user thread is throttled with low bps while it's
issuing large io, and the device is deleted. The user thread will
wait for a long time for io to return.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220318130144.1066064-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 36 +++++++++++++++++++++++++++++++++++-
 block/blk-throttle.h |  3 +++
 block/genhd.c        |  3 +++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 880701d0106fb..469c483719bea 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -874,7 +874,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
 
 	/* If tg->bps = -1, then BW is unlimited */
-	if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
+	if ((bps_limit == U64_MAX && iops_limit == UINT_MAX) ||
+	    tg->flags & THROTL_TG_CANCELING) {
 		if (wait)
 			*wait = 0;
 		return true;
@@ -1776,6 +1777,39 @@ static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
 	return false;
 }
 
+void blk_throtl_cancel_bios(struct request_queue *q)
+{
+	struct cgroup_subsys_state *pos_css;
+	struct blkcg_gq *blkg;
+
+	spin_lock_irq(&q->queue_lock);
+	/*
+	 * queue_lock is held, rcu lock is not needed here technically.
+	 * However, rcu lock is still held to emphasize that following
+	 * path need RCU protection and to prevent warning from lockdep.
+	 */
+	rcu_read_lock();
+	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+		struct throtl_grp *tg = blkg_to_tg(blkg);
+		struct throtl_service_queue *sq = &tg->service_queue;
+
+		/*
+		 * Set the flag to make sure throtl_pending_timer_fn() won't
+		 * stop until all throttled bios are dispatched.
+		 */
+		blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+		/*
+		 * Update disptime after setting the above flag to make sure
+		 * throtl_select_dispatch() won't exit without dispatching.
+		 */
+		tg_update_disptime(tg);
+
+		throtl_schedule_pending_timer(sq, jiffies + 1);
+	}
+	rcu_read_unlock();
+	spin_unlock_irq(&q->queue_lock);
+}
+
 static bool throtl_can_upgrade(struct throtl_data *td,
 	struct throtl_grp *this_tg)
 {
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index b23a9f3abb824..c1b6029961272 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -56,6 +56,7 @@ enum tg_state_flags {
 	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
 	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
 	THROTL_TG_HAS_IOPS_LIMIT = 1 << 2,	/* tg has iops limit */
+	THROTL_TG_CANCELING	= 1 << 3,	/* starts to cancel bio */
 };
 
 enum {
@@ -162,11 +163,13 @@ static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
 static inline void blk_throtl_register_queue(struct request_queue *q) { }
 static inline bool blk_throtl_bio(struct bio *bio) { return false; }
+static inline void blk_throtl_cancel_bios(struct request_queue *q) { }
 #else /* CONFIG_BLK_DEV_THROTTLING */
 int blk_throtl_init(struct request_queue *q);
 void blk_throtl_exit(struct request_queue *q);
 void blk_throtl_register_queue(struct request_queue *q);
 bool __blk_throtl_bio(struct bio *bio);
+void blk_throtl_cancel_bios(struct request_queue *q);
 static inline bool blk_throtl_bio(struct bio *bio)
 {
 	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
diff --git a/block/genhd.c b/block/genhd.c
index 56f66c6fee943..8244f09e058dd 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -25,6 +25,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/badblocks.h>
 #include <linux/part_stat.h>
+#include "blk-throttle.h"
 
 #include "blk.h"
 #include "blk-mq-sched.h"
@@ -627,6 +628,8 @@ void del_gendisk(struct gendisk *disk)
 
 	blk_mq_freeze_queue_wait(q);
 
+	blk_throtl_cancel_bios(disk->queue);
+
 	blk_sync_queue(q);
 	blk_flush_integrity();
 	/*
-- 
GitLab


From e34855b99696433a26d86179552553c6c6fa69b8 Mon Sep 17 00:00:00 2001
From: Rohit Agarwal <quic_rohiagar@quicinc.com>
Date: Wed, 16 Mar 2022 11:37:12 +0530
Subject: [PATCH 1556/1586] regulator: dt-bindings: Add PMX65 compatibles

Add PMX65 compatibles for PMIC found in SDX65 platform.

Signed-off-by: Rohit Agarwal <quic_rohiagar@quicinc.com>
Link: https://lore.kernel.org/r/1647410837-22537-2-git-send-email-quic_rohiagar@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.yaml      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index 5c73d3f639c78..e28ee9e467888 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -48,6 +48,7 @@ description: |
       For PMI8998, bob
       For PMR735A, smps1 - smps3, ldo1 - ldo7
       For PMX55, smps1 - smps7, ldo1 - ldo16
+      For PMX65, smps1 - smps8, ldo1 - ldo21
 
 properties:
   compatible:
@@ -70,6 +71,7 @@ properties:
       - qcom,pmm8155au-rpmh-regulators
       - qcom,pmr735a-rpmh-regulators
       - qcom,pmx55-rpmh-regulators
+      - qcom,pmx65-rpmh-regulators
 
   qcom,pmic-id:
     description: |
-- 
GitLab


From 5999f85ddeb436b4007878f251a30ccc8b9c638b Mon Sep 17 00:00:00 2001
From: Rohit Agarwal <quic_rohiagar@quicinc.com>
Date: Wed, 16 Mar 2022 11:37:13 +0530
Subject: [PATCH 1557/1586] regulator: qcom-rpmh: Add support for SDX65

Add support from RPMH regulators found in SDX65 platform.

Signed-off-by: Rohit Agarwal <quic_rohiagar@quicinc.com>
Link: https://lore.kernel.org/r/1647410837-22537-3-git-send-email-quic_rohiagar@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 37 +++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index a3bc0eb6ceb8c..561de6b2e6e36 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1121,6 +1121,39 @@ static const struct rpmh_vreg_init_data pmx55_vreg_data[] = {
 	{}
 };
 
+static const struct rpmh_vreg_init_data pmx65_vreg_data[] = {
+	RPMH_VREG("smps1",   "smp%s1",    &pmic5_ftsmps510, "vdd-s1"),
+	RPMH_VREG("smps2",   "smp%s2",    &pmic5_hfsmps510, "vdd-s2"),
+	RPMH_VREG("smps3",   "smp%s3",    &pmic5_hfsmps510, "vdd-s3"),
+	RPMH_VREG("smps4",   "smp%s4",    &pmic5_hfsmps510, "vdd-s4"),
+	RPMH_VREG("smps5",   "smp%s5",    &pmic5_hfsmps510, "vdd-s5"),
+	RPMH_VREG("smps6",   "smp%s6",    &pmic5_ftsmps510, "vdd-s6"),
+	RPMH_VREG("smps7",   "smp%s7",    &pmic5_hfsmps510, "vdd-s7"),
+	RPMH_VREG("smps8",   "smp%s8",    &pmic5_hfsmps510, "vdd-s8"),
+	RPMH_VREG("ldo1",    "ldo%s1",    &pmic5_nldo,      "vdd-l1"),
+	RPMH_VREG("ldo2",    "ldo%s2",    &pmic5_nldo,      "vdd-l2-l18"),
+	RPMH_VREG("ldo3",    "ldo%s3",    &pmic5_nldo,      "vdd-l3"),
+	RPMH_VREG("ldo4",    "ldo%s4",    &pmic5_nldo,      "vdd-l4"),
+	RPMH_VREG("ldo5",    "ldo%s5",    &pmic5_pldo,      "vdd-l5-l6-l16"),
+	RPMH_VREG("ldo6",    "ldo%s6",    &pmic5_pldo,      "vdd-l5-l6-l16"),
+	RPMH_VREG("ldo7",    "ldo%s7",    &pmic5_nldo,      "vdd-l7"),
+	RPMH_VREG("ldo8",    "ldo%s8",    &pmic5_nldo,      "vdd-l8-l9"),
+	RPMH_VREG("ldo9",    "ldo%s9",    &pmic5_nldo,      "vdd-l8-l9"),
+	RPMH_VREG("ldo10",   "ldo%s10",   &pmic5_pldo,      "vdd-l10"),
+	RPMH_VREG("ldo11",   "ldo%s11",   &pmic5_pldo,      "vdd-l11-l13"),
+	RPMH_VREG("ldo12",   "ldo%s12",   &pmic5_nldo,      "vdd-l12"),
+	RPMH_VREG("ldo13",   "ldo%s13",   &pmic5_pldo,      "vdd-l11-l13"),
+	RPMH_VREG("ldo14",   "ldo%s14",   &pmic5_nldo,      "vdd-l14"),
+	RPMH_VREG("ldo15",   "ldo%s15",   &pmic5_nldo,      "vdd-l15"),
+	RPMH_VREG("ldo16",   "ldo%s16",   &pmic5_pldo,      "vdd-l5-l6-l16"),
+	RPMH_VREG("ldo17",   "ldo%s17",   &pmic5_nldo,      "vdd-l17"),
+	/* ldo18 not configured */
+	RPMH_VREG("ldo19",   "ldo%s19",   &pmic5_nldo,      "vdd-l19"),
+	RPMH_VREG("ldo20",   "ldo%s20",   &pmic5_nldo,      "vdd-l20"),
+	RPMH_VREG("ldo21",   "ldo%s21",   &pmic5_nldo,      "vdd-l21"),
+	{}
+};
+
 static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
 	RPMH_VREG("smps1",  "smp%s1",  &pmic5_hfsmps510, "vdd-s1"),
 	RPMH_VREG("smps2",  "smp%s2",  &pmic5_ftsmps520, "vdd-s2"),
@@ -1276,6 +1309,10 @@ static const struct of_device_id __maybe_unused rpmh_regulator_match_table[] = {
 		.compatible = "qcom,pmx55-rpmh-regulators",
 		.data = pmx55_vreg_data,
 	},
+	{
+		.compatible = "qcom,pmx65-rpmh-regulators",
+		.data = pmx65_vreg_data,
+	},
 	{
 		.compatible = "qcom,pm7325-rpmh-regulators",
 		.data = pm7325_vreg_data,
-- 
GitLab


From 89b35e3f28514087d3f1e28e8f5634fbfd07c554 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Thu, 17 Mar 2022 16:14:26 -0500
Subject: [PATCH 1558/1586] spi: fsi: Implement a timeout for polling status

The data transfer routines must poll the status register to
determine when more data can be shifted in or out. If the hardware
gets into a bad state, these polling loops may never exit. Prevent
this by returning an error if a timeout is exceeded.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20220317211426.38940-1-eajames@linux.ibm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
index b6c7467f0b590..d403a7a3021d0 100644
--- a/drivers/spi/spi-fsi.c
+++ b/drivers/spi/spi-fsi.c
@@ -25,6 +25,7 @@
 
 #define SPI_FSI_BASE			0x70000
 #define SPI_FSI_INIT_TIMEOUT_MS		1000
+#define SPI_FSI_STATUS_TIMEOUT_MS	100
 #define SPI_FSI_MAX_RX_SIZE		8
 #define SPI_FSI_MAX_TX_SIZE		40
 
@@ -299,6 +300,7 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
 				 struct spi_transfer *transfer)
 {
 	int rc = 0;
+	unsigned long end;
 	u64 status = 0ULL;
 
 	if (transfer->tx_buf) {
@@ -315,10 +317,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
 			if (rc)
 				return rc;
 
+			end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS);
 			do {
 				rc = fsi_spi_status(ctx, &status, "TX");
 				if (rc)
 					return rc;
+
+				if (time_after(jiffies, end))
+					return -ETIMEDOUT;
 			} while (status & SPI_FSI_STATUS_TDR_FULL);
 
 			sent += nb;
@@ -329,10 +335,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx,
 		u8 *rx = transfer->rx_buf;
 
 		while (transfer->len > recv) {
+			end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS);
 			do {
 				rc = fsi_spi_status(ctx, &status, "RX");
 				if (rc)
 					return rc;
+
+				if (time_after(jiffies, end))
+					return -ETIMEDOUT;
 			} while (!(status & SPI_FSI_STATUS_RDR_FULL));
 
 			rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in);
-- 
GitLab


From 9b56adcf525522e9ffa52471260298d91fc1d395 Mon Sep 17 00:00:00 2001
From: Fengnan Chang <changfengnan@vivo.com>
Date: Fri, 18 Mar 2022 09:23:04 +0800
Subject: [PATCH 1559/1586] f2fs: fix compressed file start atomic write may
 cause data corruption

When compressed file has blocks, f2fs_ioc_start_atomic_write will succeed,
but compressed flag will be remained in inode. If write partial compreseed
cluster and commit atomic write will cause data corruption.

This is the reproduction process:
Step 1:
create a compressed file ,write 64K data , call fsync(), then the blocks
are write as compressed cluster.
Step2:
iotcl(F2FS_IOC_START_ATOMIC_WRITE)  --- this should be fail, but not.
write page 0 and page 3.
iotcl(F2FS_IOC_COMMIT_ATOMIC_WRITE)  -- page 0 and 3 write as normal file,
Step3:
drop cache.
read page 0-4   -- Since page 0 has a valid block address, read as
non-compressed cluster, page 1 and 2 will be filled with compressed data
or zero.

The root cause is, after commit 7eab7a696827 ("f2fs: compress: remove
unneeded read when rewrite whole cluster"), in step 2, f2fs_write_begin()
only set target page dirty, and in f2fs_commit_inmem_pages(), we will write
partial raw pages into compressed cluster, result in corrupting compressed
cluster layout.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Fixes: 7eab7a696827 ("f2fs: compress: remove unneeded read when rewrite whole cluster")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Fengnan Chang <changfengnan@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 fs/f2fs/file.c | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b09f401f89606..5675af1b69168 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3363,7 +3363,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 		*fsdata = NULL;
 
-		if (len == PAGE_SIZE)
+		if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
 			goto repeat;
 
 		ret = f2fs_prepare_compress_overwrite(inode, pagep,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7049be29bc2ed..68ddf4c7ca64e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2009,7 +2009,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
 	inode_lock(inode);
 
-	f2fs_disable_compressed_file(inode);
+	if (!f2fs_disable_compressed_file(inode)) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	if (f2fs_is_atomic_file(inode)) {
 		if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
-- 
GitLab


From 98e92867b99761979f6d4c155b7d5a5b523412a3 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 18 Mar 2022 17:02:30 +0800
Subject: [PATCH 1560/1586] f2fs: use aggressive GC policy during
 f2fs_disable_checkpoint()

Let's enable GC_URGENT_HIGH mode during f2fs_disable_checkpoint(),
so that we can use SSR allocator for GCed data/node persistence,
it can improve the performance due to it avoiding migration of
data/node locates in selected target segment of SSR allocator.

Signed-off-by: Chao Yu <chao.yu@oppo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9176597fdf940..caceb80b87ec5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2080,6 +2080,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 {
 	unsigned int s_flags = sbi->sb->s_flags;
 	struct cp_control cpc;
+	unsigned int gc_mode;
 	int err = 0;
 	int ret;
 	block_t unusable;
@@ -2092,6 +2093,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 
 	f2fs_update_time(sbi, DISABLE_TIME);
 
+	gc_mode = sbi->gc_mode;
+	sbi->gc_mode = GC_URGENT_HIGH;
+
 	while (!f2fs_time_over(sbi, DISABLE_TIME)) {
 		f2fs_down_write(&sbi->gc_lock);
 		err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
@@ -2129,6 +2133,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
 out_unlock:
 	f2fs_up_write(&sbi->gc_lock);
 restore_flag:
+	sbi->gc_mode = gc_mode;
 	sbi->sb->s_flags = s_flags;	/* Restore SB_RDONLY status */
 	return err;
 }
-- 
GitLab


From 86fc59ef818beb0e1945d17f8e734898baba7e4e Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Sun, 13 Mar 2022 15:45:23 -0700
Subject: [PATCH 1561/1586] regmap: add configurable downshift for addresses

Add an additional reg_downshift to be applied to register addresses before
any register accesses. An example of a device that uses this is a VSC7514
chip, which require each register address to be downshifted by two if the
access is performed over a SPI bus.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Link: https://lore.kernel.org/r/20220313224524.399947-2-colin.foster@in-advantage.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h | 1 +
 drivers/base/regmap/regmap.c   | 5 +++++
 include/linux/regmap.h         | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index b1905916f7af8..88f710e7ce31c 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -31,6 +31,7 @@ struct regmap_format {
 	size_t buf_size;
 	size_t reg_bytes;
 	size_t pad_bytes;
+	size_t reg_downshift;
 	size_t val_bytes;
 	void (*format_write)(struct regmap *map,
 			     unsigned int reg, unsigned int val);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 8f9fe5fd47078..1c7c6d6361aff 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -823,6 +823,7 @@ struct regmap *__regmap_init(struct device *dev,
 
 	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
 	map->format.pad_bytes = config->pad_bits / 8;
+	map->format.reg_downshift = config->reg_downshift;
 	map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);
 	map->format.buf_size = DIV_ROUND_UP(config->reg_bits +
 			config->val_bits + config->pad_bits, 8);
@@ -1735,6 +1736,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 			return ret;
 	}
 
+	reg >>= map->format.reg_downshift;
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
 				      map->write_flag_mask);
@@ -1905,6 +1907,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 			return ret;
 	}
 
+	reg >>= map->format.reg_downshift;
 	map->format.format_write(map, reg, val);
 
 	trace_regmap_hw_write_start(map, reg, 1);
@@ -2346,6 +2349,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 		unsigned int reg = regs[i].reg;
 		unsigned int val = regs[i].def;
 		trace_regmap_hw_write_start(map, reg, 1);
+		reg >>= map->format.reg_downshift;
 		map->format.format_reg(u8, reg, map->reg_shift);
 		u8 += reg_bytes + pad_bytes;
 		map->format.format_val(u8, val, 0);
@@ -2673,6 +2677,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			return ret;
 	}
 
+	reg >>= map->format.reg_downshift;
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
 				      map->read_flag_mask);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 22652e5fbc380..40fb9399add66 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -237,6 +237,8 @@ typedef void (*regmap_unlock)(void *);
  * @reg_stride: The register address stride. Valid register addresses are a
  *              multiple of this value. If set to 0, a value of 1 will be
  *              used.
+ * @reg_downshift: The number of bits to downshift the register before
+ *		   performing any operations.
  * @pad_bits: Number of bits of padding between register and value.
  * @val_bits: Number of bits in a register value, mandatory.
  *
@@ -360,6 +362,7 @@ struct regmap_config {
 
 	int reg_bits;
 	int reg_stride;
+	int reg_downshift;
 	int pad_bits;
 	int val_bits;
 
-- 
GitLab


From 0074f3f2b1e43d3cedd97e47fb6980db6d2ba79e Mon Sep 17 00:00:00 2001
From: Colin Foster <colin.foster@in-advantage.com>
Date: Sun, 13 Mar 2022 15:45:24 -0700
Subject: [PATCH 1562/1586] regmap: allow a defined reg_base to be added to
 every address

There's an inconsistency that arises when a register set can be accessed
internally via MMIO, or externally via SPI. The VSC7514 chip allows both
modes of operation. When internally accessed, the system utilizes __iomem,
devm_ioremap_resource, and devm_regmap_init_mmio.

For SPI it isn't possible to utilize memory-mapped IO. To properly operate,
the resource base must be added to the register before every operation.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
Link: https://lore.kernel.org/r/20220313224524.399947-3-colin.foster@in-advantage.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h | 1 +
 drivers/base/regmap/regmap.c   | 6 ++++++
 include/linux/regmap.h         | 3 +++
 3 files changed, 10 insertions(+)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 88f710e7ce31c..b4df36c7b17d1 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -63,6 +63,7 @@ struct regmap {
 	regmap_unlock unlock;
 	void *lock_arg; /* This is passed to lock/unlock functions */
 	gfp_t alloc_flags;
+	unsigned int reg_base;
 
 	struct device *dev; /* Device we do I/O on */
 	void *work_buf;     /* Scratch buffer used to format I/O */
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 1c7c6d6361aff..5e12f7cb51471 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -821,6 +821,8 @@ struct regmap *__regmap_init(struct device *dev,
 	else
 		map->alloc_flags = GFP_KERNEL;
 
+	map->reg_base = config->reg_base;
+
 	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
 	map->format.pad_bytes = config->pad_bits / 8;
 	map->format.reg_downshift = config->reg_downshift;
@@ -1736,6 +1738,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 			return ret;
 	}
 
+	reg += map->reg_base;
 	reg >>= map->format.reg_downshift;
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
@@ -1907,6 +1910,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 			return ret;
 	}
 
+	reg += map->reg_base;
 	reg >>= map->format.reg_downshift;
 	map->format.format_write(map, reg, val);
 
@@ -2349,6 +2353,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 		unsigned int reg = regs[i].reg;
 		unsigned int val = regs[i].def;
 		trace_regmap_hw_write_start(map, reg, 1);
+		reg += map->reg_base;
 		reg >>= map->format.reg_downshift;
 		map->format.format_reg(u8, reg, map->reg_shift);
 		u8 += reg_bytes + pad_bytes;
@@ -2677,6 +2682,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			return ret;
 	}
 
+	reg += map->reg_base;
 	reg >>= map->format.reg_downshift;
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 40fb9399add66..de81a94d7b307 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -239,6 +239,8 @@ typedef void (*regmap_unlock)(void *);
  *              used.
  * @reg_downshift: The number of bits to downshift the register before
  *		   performing any operations.
+ * @reg_base: Value to be added to every register address before performing any
+ *	      operation.
  * @pad_bits: Number of bits of padding between register and value.
  * @val_bits: Number of bits in a register value, mandatory.
  *
@@ -363,6 +365,7 @@ struct regmap_config {
 	int reg_bits;
 	int reg_stride;
 	int reg_downshift;
+	unsigned int reg_base;
 	int pad_bits;
 	int val_bits;
 
-- 
GitLab


From adf3a9e9f556613197583a1884f0de40a8bb6fb9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 14 Mar 2022 17:26:19 -0600
Subject: [PATCH 1563/1586] io_uring: don't check unrelated req->open.how in
 accept request

Looks like a victim of too much copy/paste, we should not be looking
at req->open.how in accept. The point is to check CLOEXEC and error
out, which we don't invalid direct descriptors on exec. Hence any
attempt to get a direct descriptor with CLOEXEC is invalid.

No harm is done here, as req->open.how.flags overlaps with
req->accept.flags, but it's very confusing and might change if either of
those command structs are modified.

Fixes: aaa4db12ef7b ("io_uring: accept directly into fixed file table")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 690bfeaa609aa..2d6ab732f940f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5535,8 +5535,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	accept->nofile = rlimit(RLIMIT_NOFILE);
 
 	accept->file_slot = READ_ONCE(sqe->file_index);
-	if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
-				  (accept->flags & SOCK_CLOEXEC)))
+	if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
 		return -EINVAL;
 	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return -EINVAL;
-- 
GitLab


From dd664099002db909912a23215f8775c97f7f4f10 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 17 Mar 2022 12:20:13 -0700
Subject: [PATCH 1564/1586] binfmt_elf: Don't write past end of notes for
 regset gap

In fill_thread_core_info() the ptrace accessible registers are collected
to be written out as notes in a core file. The note array is allocated
from a size calculated by iterating the user regset view, and counting the
regsets that have a non-zero core_note_type. However, this only allows for
there to be non-zero core_note_type at the end of the regset view. If
there are any gaps in the middle, fill_thread_core_info() will overflow the
note allocation, as it iterates over the size of the view and the
allocation would be smaller than that.

There doesn't appear to be any arch that has gaps such that they exceed
the notes allocation, but the code is brittle and tries to support
something it doesn't. It could be fixed by increasing the allocation size,
but instead just have the note collecting code utilize the array better.
This way the allocation can stay smaller.

Even in the case of no arch's that have gaps in their regset views, this
introduces a change in the resulting indicies of t->notes. It does not
introduce any changes to the core file itself, because any blank notes are
skipped in write_note_info().

In case, the allocation logic between fill_note_info() and
fill_thread_core_info() ever diverges from the usage logic, warn and skip
writing any notes that would overflow the array.

This fix is derrived from an earlier one[0] by Yu-cheng Yu.

[0] https://lore.kernel.org/lkml/20180717162502.32274-1-yu-cheng.yu@intel.com/

Co-developed-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220317192013.13655-4-rick.p.edgecombe@intel.com
---
 fs/binfmt_elf.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ca5296cae9791..37d9c455d5351 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1766,9 +1766,9 @@ static void do_thread_regset_writeback(struct task_struct *task,
 
 static int fill_thread_core_info(struct elf_thread_core_info *t,
 				 const struct user_regset_view *view,
-				 long signr, size_t *total)
+				 long signr, struct elf_note_info *info)
 {
-	unsigned int i;
+	unsigned int note_iter, view_iter;
 
 	/*
 	 * NT_PRSTATUS is the one special case, because the regset data
@@ -1782,17 +1782,17 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 
 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
 		  PRSTATUS_SIZE, &t->prstatus);
-	*total += notesize(&t->notes[0]);
+	info->size += notesize(&t->notes[0]);
 
 	do_thread_regset_writeback(t->task, &view->regsets[0]);
 
 	/*
 	 * Each other regset might generate a note too.  For each regset
-	 * that has no core_note_type or is inactive, we leave t->notes[i]
-	 * all zero and we'll know to skip writing it later.
+	 * that has no core_note_type or is inactive, skip it.
 	 */
-	for (i = 1; i < view->n; ++i) {
-		const struct user_regset *regset = &view->regsets[i];
+	note_iter = 1;
+	for (view_iter = 1; view_iter < view->n; ++view_iter) {
+		const struct user_regset *regset = &view->regsets[view_iter];
 		int note_type = regset->core_note_type;
 		bool is_fpreg = note_type == NT_PRFPREG;
 		void *data;
@@ -1808,13 +1808,17 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 		if (ret < 0)
 			continue;
 
+		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
+			break;
+
 		if (is_fpreg)
 			SET_PR_FPVALID(&t->prstatus);
 
-		fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
+		fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
 			  note_type, ret, data);
 
-		*total += notesize(&t->notes[i]);
+		info->size += notesize(&t->notes[note_iter]);
+		note_iter++;
 	}
 
 	return 1;
@@ -1894,7 +1898,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	 * Now fill in each thread's information.
 	 */
 	for (t = info->thread; t != NULL; t = t->next)
-		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size))
+		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
 			return 0;
 
 	/*
-- 
GitLab


From 5e929367468c8f97cd1ffb0417316cecfebef94b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 18 Mar 2022 11:28:13 -0600
Subject: [PATCH 1565/1586] io_uring: terminate manual loop iterator loop
 correctly for non-vecs

The fix for not advancing the iterator if we're using fixed buffers is
broken in that it can hit a condition where we don't terminate the loop.
This results in io-wq looping forever, asking to read (or write) 0 bytes
for every subsequent loop.

Reported-by: Joel Jaeschke <joel.jaeschke@gmail.com>
Link: https://github.com/axboe/liburing/issues/549
Fixes: 16c8d2df7ec0 ("io_uring: ensure symmetry in handling iter types in loop_rw_iter()")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2d6ab732f940f..5fa736344b677 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3612,13 +3612,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
 				ret = nr;
 			break;
 		}
+		ret += nr;
 		if (!iov_iter_is_bvec(iter)) {
 			iov_iter_advance(iter, nr);
 		} else {
-			req->rw.len -= nr;
 			req->rw.addr += nr;
+			req->rw.len -= nr;
+			if (!req->rw.len)
+				break;
 		}
-		ret += nr;
 		if (nr != iovec.iov_len)
 			break;
 	}
-- 
GitLab


From 3cf6a32f3f2a45944dd5be5c6ac4deb46bcd3bee Mon Sep 17 00:00:00 2001
From: Michael Petlan <mpetlan@redhat.com>
Date: Thu, 17 Mar 2022 14:55:36 +0100
Subject: [PATCH 1566/1586] perf symbols: Fix symbol size calculation condition

Before this patch, the symbol end address fixup to be called, needed two
conditions being met:

  if (prev->end == prev->start && prev->end != curr->start)

Where
  "prev->end == prev->start" means that prev is zero-long
                             (and thus needs a fixup)
and
  "prev->end != curr->start" means that fixup hasn't been applied yet

However, this logic is incorrect in the following situation:

*curr  = {rb_node = {__rb_parent_color = 278218928,
  rb_right = 0x0, rb_left = 0x0},
  start = 0xc000000000062354,
  end = 0xc000000000062354, namelen = 40, type = 2 '\002',
  binding = 0 '\000', idle = 0 '\000', ignore = 0 '\000',
  inlined = 0 '\000', arch_sym = 0 '\000', annotate2 = false,
  name = 0x1159739e "kprobe_optinsn_page\t[__builtin__kprobes]"}

*prev = {rb_node = {__rb_parent_color = 278219041,
  rb_right = 0x109548b0, rb_left = 0x109547c0},
  start = 0xc000000000062354,
  end = 0xc000000000062354, namelen = 12, type = 2 '\002',
  binding = 1 '\001', idle = 0 '\000', ignore = 0 '\000',
  inlined = 0 '\000', arch_sym = 0 '\000', annotate2 = false,
  name = 0x1095486e "optinsn_slot"}

In this case, prev->start == prev->end == curr->start == curr->end,
thus the condition above thinks that "we need a fixup due to zero
length of prev symbol, but it has been probably done, since the
prev->end == curr->start", which is wrong.

After the patch, the execution path proceeds to arch__symbols__fixup_end
function which fixes up the size of prev symbol by adding page_size to
its end offset.

Fixes: 3b01a413c196c910 ("perf symbols: Improve kallsyms symbol end addr calculation")
Signed-off-by: Michael Petlan <mpetlan@redhat.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20220317135536.805-1-mpetlan@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2ed3140a1faa..dfde9eada224a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
 		prev = curr;
 		curr = rb_entry(nd, struct symbol, rb_node);
 
-		if (prev->end == prev->start && prev->end != curr->start)
+		if (prev->end == prev->start || prev->end != curr->start)
 			arch__symbols__fixup_end(prev, curr);
 	}
 
-- 
GitLab


From 8b464eac9765dfc84d0327fa3f3668faa439d1ce Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Mar 2022 16:16:43 -0700
Subject: [PATCH 1567/1586] perf evlist: Avoid iteration for empty evlist.

As seen with 'perf stat --null ..' and reported in:
https://lore.kernel.org/lkml/YjCLcpcX2peeQVCH@kernel.org/

v2. Avoids setting evsel in the empty list case as suggested by Jiri Olsa.

    Committer testing:

Before:

  $  perf stat --null sleep 1
  Segmentation fault (core dumped)
  $

After:

  $  perf stat --null sleep 1

   Performance counter stats for 'sleep 1':

         1.010340646 seconds time elapsed

         0.001420000 seconds user
         0.000000000 seconds sys
  $

Fixes: 472832d2c000b961 ("perf evlist: Refactor evlist__for_each_cpu()")
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20220317231643.550902-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index eaad04e1672a4..41a66a48cbdff 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -346,7 +346,7 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
 {
 	struct evlist_cpu_iterator itr = {
 		.container = evlist,
-		.evsel = evlist__first(evlist),
+		.evsel = NULL,
 		.cpu_map_idx = 0,
 		.evlist_cpu_map_idx = 0,
 		.evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
@@ -354,16 +354,22 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin
 		.affinity = affinity,
 	};
 
-	if (itr.affinity) {
-		itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
-		affinity__set(itr.affinity, itr.cpu.cpu);
-		itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
-		/*
-		 * If this CPU isn't in the evsel's cpu map then advance through
-		 * the list.
-		 */
-		if (itr.cpu_map_idx == -1)
-			evlist_cpu_iterator__next(&itr);
+	if (evlist__empty(evlist)) {
+		/* Ensure the empty list doesn't iterate. */
+		itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
+	} else {
+		itr.evsel = evlist__first(evlist);
+		if (itr.affinity) {
+			itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
+			affinity__set(itr.affinity, itr.cpu.cpu);
+			itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
+			/*
+			 * If this CPU isn't in the evsel's cpu map then advance
+			 * through the list.
+			 */
+			if (itr.cpu_map_idx == -1)
+				evlist_cpu_iterator__next(&itr);
+		}
 	}
 	return itr;
 }
-- 
GitLab


From 7bd1da15d211d439d96eb7cc8a35ce694b71d120 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Mar 2022 15:43:09 -0700
Subject: [PATCH 1568/1586] perf parse-events: Ignore case in topdown.slots
 check

An issue with icelakex metrics:

  https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/tree/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json?h=perf/core&id=65eab2bc7dab326ee892ec5a4c749470b368b51a#n48

That causes the slots not to be first.

Fixes: 94dbfd6781a0e87b ("perf parse-events: Architecture specific leader override")
Reported-by: Caleb Biggers <caleb.biggers@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220317224309.543736-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/evlist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index f924246eff781..8d9b55959256a 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -29,7 +29,7 @@ struct evsel *arch_evlist__leader(struct list_head *list)
 
 	__evlist__for_each_entry(list, evsel) {
 		if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
-			evsel->name && strstr(evsel->name, "slots"))
+			evsel->name && strcasestr(evsel->name, "slots"))
 			return evsel;
 	}
 	return first;
-- 
GitLab


From 411fadd62cecf74935693b7690b416af9bd4a332 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 18 Mar 2022 22:59:59 +0100
Subject: [PATCH 1569/1586] parisc: Avoid flushing cache on cache-less machines

Avoid flushing caches in __flush_cache_page() and __purge_cache_page()
if the machine hasn't data or instruction caches - as e.g. in qemu.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/cache.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 231cebb3fc0de..d41fee3a98746 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -315,6 +315,8 @@ static inline void
 __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 		   unsigned long physaddr)
 {
+	if (!static_branch_likely(&parisc_has_cache))
+		return;
 	preempt_disable();
 	flush_dcache_page_asm(physaddr, vmaddr);
 	if (vma->vm_flags & VM_EXEC)
@@ -326,6 +328,8 @@ static inline void
 __purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 		   unsigned long physaddr)
 {
+	if (!static_branch_likely(&parisc_has_cache))
+		return;
 	preempt_disable();
 	purge_dcache_page_asm(physaddr, vmaddr);
 	if (vma->vm_flags & VM_EXEC)
-- 
GitLab


From 6ba463edccb978e3c0248c3a193b759436b51ac8 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Fri, 18 Mar 2022 19:34:08 +0100
Subject: [PATCH 1570/1586] hwmon: (dell-smm) Add Inspiron 3505 to fan type
 blacklist

Sadly, while firmware 1.5 fixed temperature labels on my
Inspiron 3505, it also caused fan type calls to take
ca. 4 seconds with the fan being at full speed.
Fix the resulting delays by adding the model to the
blacklist.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20220318183408.13286-1-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c5939e68586d0..84cb1ede7bc0b 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -1157,6 +1157,13 @@ static const struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initconst
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 3505",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 3505"),
+		},
+	},
 	{ }
 };
 
-- 
GitLab


From 47178c7722ac528ea08aa82c3ef9ffa178962d7a Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Fri, 4 Mar 2022 10:31:49 +1000
Subject: [PATCH 1571/1586] cifs: fix handlecache and multiuser

In multiuser each individual user has their own tcon structure for the
share and thus their own handle for a cached directory.
When we umount such a share we much make sure to release the pinned down dentry
for each such tcon and not just the master tcon.

Otherwise we will get nasty warnings on umount that dentries are still in use:
[ 3459.590047] BUG: Dentry 00000000115c6f41{i=12000000019d95,n=/}  still in use\
 (2) [unmount of cifs cifs]
...
[ 3459.590492] Call Trace:
[ 3459.590500]  d_walk+0x61/0x2a0
[ 3459.590518]  ? shrink_lock_dentry.part.0+0xe0/0xe0
[ 3459.590526]  shrink_dcache_for_umount+0x49/0x110
[ 3459.590535]  generic_shutdown_super+0x1a/0x110
[ 3459.590542]  kill_anon_super+0x14/0x30
[ 3459.590549]  cifs_kill_sb+0xf5/0x104 [cifs]
[ 3459.590773]  deactivate_locked_super+0x36/0xa0
[ 3459.590782]  cleanup_mnt+0x131/0x190
[ 3459.590789]  task_work_run+0x5c/0x90
[ 3459.590798]  exit_to_user_mode_loop+0x151/0x160
[ 3459.590809]  exit_to_user_mode_prepare+0x83/0xd0
[ 3459.590818]  syscall_exit_to_user_mode+0x12/0x30
[ 3459.590828]  do_syscall_64+0x48/0x90
[ 3459.590833]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Acked-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 082c214786867..a61bb5d683c65 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -254,6 +254,9 @@ static void cifs_kill_sb(struct super_block *sb)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct cifs_tcon *tcon;
 	struct cached_fid *cfid;
+	struct rb_root *root = &cifs_sb->tlink_tree;
+	struct rb_node *node;
+	struct tcon_link *tlink;
 
 	/*
 	 * We ned to release all dentries for the cached directories
@@ -263,17 +266,21 @@ static void cifs_kill_sb(struct super_block *sb)
 		dput(cifs_sb->root);
 		cifs_sb->root = NULL;
 	}
-	tcon = cifs_sb_master_tcon(cifs_sb);
-	if (tcon) {
+	spin_lock(&cifs_sb->tlink_tree_lock);
+	node = rb_first(root);
+	while (node != NULL) {
+		tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+		tcon = tlink_tcon(tlink);
 		cfid = &tcon->crfid;
 		mutex_lock(&cfid->fid_mutex);
 		if (cfid->dentry) {
-
 			dput(cfid->dentry);
 			cfid->dentry = NULL;
 		}
 		mutex_unlock(&cfid->fid_mutex);
+		node = rb_next(node);
 	}
+	spin_unlock(&cifs_sb->tlink_tree_lock);
 
 	kill_anon_super(sb);
 	cifs_umount(cifs_sb);
-- 
GitLab


From 84330d41efb12bc227899e54dbdbe7d9590cb2b7 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 23 Feb 2022 11:14:16 +1000
Subject: [PATCH 1572/1586] cifs: truncate the inode and mapping when we
 simulate fcollapse

RHBZ:1997367

When we collapse a range in smb3_collapse_range() we must make sure
we update the inode size and pagecache accordingly.

If not, both inode size and pagecahce may be stale until it is refreshed.

This can be demonstrated for the inode size by running :

xfs_io -i -f -c "truncate 320k" -c "fcollapse 64k 128k" -c "fiemap -v"  \
/mnt/testfile

where we can see the result of stale data in the fiemap output.
The third line of the output is wrong, all this data should be truncated.

 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        hole               128
   1: [128..383]:      128..383           256   0x1
   2: [384..639]:      hole               256

And the correct output, when the inode size has been updated correctly should
look like this:

 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        hole               128
   1: [128..383]:      128..383           256   0x1

Reported-by: Xiaoli Feng <xifeng@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index af5d0830bc8a8..891b11576e557 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -25,6 +25,7 @@
 #include "smb2glob.h"
 #include "cifs_ioctl.h"
 #include "smbdirect.h"
+#include "fscache.h"
 #include "fs_context.h"
 
 /* Change credits for different ops and return the total number of credits */
@@ -3887,29 +3888,38 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
 {
 	int rc;
 	unsigned int xid;
+	struct inode *inode;
 	struct cifsFileInfo *cfile = file->private_data;
+	struct cifsInodeInfo *cifsi;
 	__le64 eof;
 
 	xid = get_xid();
 
-	if (off >= i_size_read(file->f_inode) ||
-	    off + len >= i_size_read(file->f_inode)) {
+	inode = d_inode(cfile->dentry);
+	cifsi = CIFS_I(inode);
+
+	if (off >= i_size_read(inode) ||
+	    off + len >= i_size_read(inode)) {
 		rc = -EINVAL;
 		goto out;
 	}
 
 	rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
-				  i_size_read(file->f_inode) - off - len, off);
+				  i_size_read(inode) - off - len, off);
 	if (rc < 0)
 		goto out;
 
-	eof = cpu_to_le64(i_size_read(file->f_inode) - len);
+	eof = cpu_to_le64(i_size_read(inode) - len);
 	rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
 			  cfile->fid.volatile_fid, cfile->pid, &eof);
 	if (rc < 0)
 		goto out;
 
 	rc = 0;
+
+	cifsi->server_eof = i_size_read(inode) - len;
+	truncate_setsize(inode, cifsi->server_eof);
+	fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
  out:
 	free_xid(xid);
 	return rc;
-- 
GitLab


From 06a466565d54a1a42168f9033a062a3f5c40e73b Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 7 Mar 2022 18:37:22 +0000
Subject: [PATCH 1573/1586] Adjust cifssb maximum read size

When session gets reconnected during mount then read size in super block fs context
gets set to zero and after negotiate, rsize is not modified which results in
incorrect read with requested bytes as zero. Fixes intermittent failure
of xfstest generic/240

Note that stable requires a different version of this patch which will be
sent to the stable mailing list.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Acked-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c |  3 +++
 fs/cifs/file.c   | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a61bb5d683c65..677c02aa8731f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb)
 	if (rc)
 		goto out_no_root;
 	/* tune readahead according to rsize if readahead size not set on mount */
+	if (cifs_sb->ctx->rsize == 0)
+		cifs_sb->ctx->rsize =
+			tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx);
 	if (cifs_sb->ctx->rasize)
 		sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
 	else
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e7af802dcfa60..a2723f7cb5e9d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3740,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 				break;
 		}
 
+		if (cifs_sb->ctx->rsize == 0)
+			cifs_sb->ctx->rsize =
+				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
+							     cifs_sb->ctx);
+
 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
 						   &rsize, credits);
 		if (rc)
@@ -4474,6 +4479,11 @@ static void cifs_readahead(struct readahead_control *ractl)
 			}
 		}
 
+		if (cifs_sb->ctx->rsize == 0)
+			cifs_sb->ctx->rsize =
+				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
+							     cifs_sb->ctx);
+
 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
 						   &rsize, credits);
 		if (rc)
-- 
GitLab


From 9a14b65d590105d393b63f5320e1594edda7c672 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Tue, 15 Mar 2022 13:44:04 +1000
Subject: [PATCH 1574/1586] cifs: we do not need a spinlock around the tree
 access during umount

Remove the spinlock around the tree traversal as we are calling possibly
sleeping functions.
We do not need a spinlock here as there will be no modifications to this
tree at this point.

This prevents warnings like this to occur in dmesg:
[  653.774996] BUG: sleeping function called from invalid context at kernel/loc\
king/mutex.c:280
[  653.775088] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1827, nam\
e: umount
[  653.775152] preempt_count: 1, expected: 0
[  653.775191] CPU: 0 PID: 1827 Comm: umount Tainted: G        W  OE     5.17.0\
-rc7-00006-g4eb628dd74df #135
[  653.775195] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-\
1.fc33 04/01/2014
[  653.775197] Call Trace:
[  653.775199]  <TASK>
[  653.775202]  dump_stack_lvl+0x34/0x44
[  653.775209]  __might_resched.cold+0x13f/0x172
[  653.775213]  mutex_lock+0x75/0xf0
[  653.775217]  ? __mutex_lock_slowpath+0x10/0x10
[  653.775220]  ? _raw_write_lock_irq+0xd0/0xd0
[  653.775224]  ? dput+0x6b/0x360
[  653.775228]  cifs_kill_sb+0xff/0x1d0 [cifs]
[  653.775285]  deactivate_locked_super+0x85/0x130
[  653.775289]  cleanup_mnt+0x32c/0x4d0
[  653.775292]  ? path_umount+0x228/0x380
[  653.775296]  task_work_run+0xd8/0x180
[  653.775301]  exit_to_user_mode_loop+0x152/0x160
[  653.775306]  exit_to_user_mode_prepare+0x89/0xd0
[  653.775315]  syscall_exit_to_user_mode+0x12/0x30
[  653.775322]  do_syscall_64+0x48/0x90
[  653.775326]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 187af6e98b44e5d8f25e1d41a92db138eb54416f ("cifs: fix handlecache and multiuser")
Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 677c02aa8731f..6e5246122ee2c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -269,7 +269,6 @@ static void cifs_kill_sb(struct super_block *sb)
 		dput(cifs_sb->root);
 		cifs_sb->root = NULL;
 	}
-	spin_lock(&cifs_sb->tlink_tree_lock);
 	node = rb_first(root);
 	while (node != NULL) {
 		tlink = rb_entry(node, struct tcon_link, tl_rbnode);
@@ -283,7 +282,6 @@ static void cifs_kill_sb(struct super_block *sb)
 		mutex_unlock(&cfid->fid_mutex);
 		node = rb_next(node);
 	}
-	spin_unlock(&cifs_sb->tlink_tree_lock);
 
 	kill_anon_super(sb);
 	cifs_umount(cifs_sb);
-- 
GitLab


From dca65818c80cf06e0f08ba2cf94060a5236e73c2 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad@microsoft.com>
Date: Tue, 15 Feb 2022 13:55:40 +0000
Subject: [PATCH 1575/1586] cifs: use a different reconnect helper for
 non-cifsd threads

The cifs_demultiplexer_thread should only call cifs_reconnect.
If any other thread wants to trigger a reconnect, they can do
so by updating the server tcpStatus to CifsNeedReconnect.

The last patch attempted to use the same helper function for
both types of threads, but that causes other issues
with lock dependencies.

This patch creates a new helper for non-cifsd threads, that
will indicate to cifsd that the server needs reconnect.

Fixes: 2a05137a0575 ("cifs: mark sessions for reconnection in helper function")
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_swn.c  |  6 +++---
 fs/cifs/cifsproto.h |  3 +++
 fs/cifs/connect.c   | 42 +++++++++++++++++++++++++++++++++++++++++-
 fs/cifs/dfs_cache.c |  2 +-
 fs/cifs/smb1ops.c   |  2 +-
 fs/cifs/transport.c |  2 +-
 6 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index cdce1609c5c26..180c234c2f46c 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch
 	switch (state) {
 	case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
 		cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
-		cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true);
+		cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true);
 		break;
 	case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
 		cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
-		cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true);
+		cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true);
 		break;
 	case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
 		cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
@@ -498,7 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
 		goto unlock;
 	}
 
-	cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, false);
+	cifs_signal_cifsd_for_reconnect(tcon->ses->server, false);
 
 unlock:
 	mutex_unlock(&tcon->ses->server->srv_mutex);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d3701295402d2..0df3b24a0bf4c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -132,6 +132,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
 			struct smb_hdr *out_buf,
 			int *bytes_returned);
 void
+cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+				      bool all_channels);
+void
 cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 				      bool mark_smb_session);
 extern int cifs_reconnect(struct TCP_Server_Info *server,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3020abfe404a..9964c36343225 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -162,11 +162,51 @@ static void cifs_resolve_server(struct work_struct *work)
 	mutex_unlock(&server->srv_mutex);
 }
 
+/*
+ * Update the tcpStatus for the server.
+ * This is used to signal the cifsd thread to call cifs_reconnect
+ * ONLY cifsd thread should call cifs_reconnect. For any other
+ * thread, use this function
+ *
+ * @server: the tcp ses for which reconnect is needed
+ * @all_channels: if this needs to be done for all channels
+ */
+void
+cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+				bool all_channels)
+{
+	struct TCP_Server_Info *pserver;
+	struct cifs_ses *ses;
+	int i;
+
+	/* If server is a channel, select the primary channel */
+	pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	if (!all_channels) {
+		pserver->tcpStatus = CifsNeedReconnect;
+		spin_unlock(&cifs_tcp_ses_lock);
+		return;
+	}
+
+	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+		spin_lock(&ses->chan_lock);
+		for (i = 0; i < ses->chan_count; i++)
+			ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+		spin_unlock(&ses->chan_lock);
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+}
+
 /*
  * Mark all sessions and tcons for reconnect.
+ * IMPORTANT: make sure that this gets called only from
+ * cifsd thread. For any other thread, use
+ * cifs_signal_cifsd_for_reconnect
  *
+ * @server: the tcp ses for which reconnect is needed
  * @server needs to be previously set to CifsNeedReconnect.
- *
+ * @mark_smb_session: whether even sessions need to be marked
  */
 void
 cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 831f42458bf6d..30e040da4f096 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
 	}
 
 	cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
-	cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, true);
+	cifs_signal_cifsd_for_reconnect(tcon->ses->server, true);
 }
 
 /* Refresh dfs referral of tcon and mark it for reconnect if needed */
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index b2fb7bd119366..c71c9a44bef4b 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -228,7 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
 	spin_unlock(&GlobalMid_Lock);
 
 	if (reconnect) {
-		cifs_mark_tcp_ses_conns_for_reconnect(server, false);
+		cifs_signal_cifsd_for_reconnect(server, false);
 	}
 
 	return mid;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a4c3e027cca25..eeb1a699bd6f2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -430,7 +430,7 @@ unmask:
 		 * be taken as the remainder of this one. We need to kill the
 		 * socket so the server throws away the partial SMB
 		 */
-		cifs_mark_tcp_ses_conns_for_reconnect(server, false);
+		cifs_signal_cifsd_for_reconnect(server, false);
 		trace_smb3_partial_send_reconnect(server->CurrentMid,
 						  server->conn_id, server->hostname);
 	}
-- 
GitLab


From fe83f5eae432ccc8e90082d6ed506d5233547473 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 16 Mar 2022 22:05:52 +0100
Subject: [PATCH 1576/1586] kvm/emulate: Fix SETcc emulation function offsets
 with SLS

The commit in Fixes started adding INT3 after RETs as a mitigation
against straight-line speculation.

The fastop SETcc implementation in kvm's insn emulator uses macro magic
to generate all possible SETcc functions and to jump to them when
emulating the respective instruction.

However, it hardcodes the size and alignment of those functions to 4: a
three-byte SETcc insn and a single-byte RET. BUT, with SLS, there's an
INT3 that gets slapped after the RET, which brings the whole scheme out
of alignment:

  15:   0f 90 c0                seto   %al
  18:   c3                      ret
  19:   cc                      int3
  1a:   0f 1f 00                nopl   (%rax)
  1d:   0f 91 c0                setno  %al
  20:   c3                      ret
  21:   cc                      int3
  22:   0f 1f 00                nopl   (%rax)
  25:   0f 92 c0                setb   %al
  28:   c3                      ret
  29:   cc                      int3

and this explodes like this:

  int3: 0000 [#1] PREEMPT SMP PTI
  CPU: 0 PID: 2435 Comm: qemu-system-x86 Not tainted 5.17.0-rc8-sls #1
  Hardware name: Dell Inc. Precision WorkStation T3400  /0TP412, BIOS A14 04/30/2012
  RIP: 0010:setc+0x5/0x8 [kvm]
  Code: 00 00 0f 1f 00 0f b6 05 43 24 06 00 c3 cc 0f 1f 80 00 00 00 00 0f 90 c0 c3 cc 0f \
	  1f 00 0f 91 c0 c3 cc 0f 1f 00 0f 92 c0 c3 cc <0f> 1f 00 0f 93 c0 c3 cc 0f 1f 00 \
	  0f 94 c0 c3 cc 0f 1f 00 0f 95 c0
  Call Trace:
   <TASK>
   ? x86_emulate_insn [kvm]
   ? x86_emulate_instruction [kvm]
   ? vmx_handle_exit [kvm_intel]
   ? kvm_arch_vcpu_ioctl_run [kvm]
   ? kvm_vcpu_ioctl [kvm]
   ? __x64_sys_ioctl
   ? do_syscall_64
   ? entry_SYSCALL_64_after_hwframe
   </TASK>

Raise the alignment value when SLS is enabled and use a macro for that
instead of hard-coding naked numbers.

Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation")
Reported-by: Jamie Heilman <jamie@audible.transient.net>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Jamie Heilman <jamie@audible.transient.net>
Link: https://lore.kernel.org/r/YjGzJwjrvxg5YZ0Z@audible.transient.net
[Add a comment and a bit of safety checking, since this is going to be changed
 again for IBT support. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5719d8cfdbd90..e86d610dc6b7a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -429,8 +429,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
 	FOP_END
 
 /* Special case for SETcc - 1 instruction per cc */
+
+/*
+ * Depending on .config the SETcc functions look like:
+ *
+ * SETcc %al   [3 bytes]
+ * RET         [1 byte]
+ * INT3        [1 byte; CONFIG_SLS]
+ *
+ * Which gives possible sizes 4 or 5.  When rounded up to the
+ * next power-of-two alignment they become 4 or 8.
+ */
+#define SETCC_LENGTH	(4 + IS_ENABLED(CONFIG_SLS))
+#define SETCC_ALIGN	(4 << IS_ENABLED(CONFIG_SLS))
+static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+
 #define FOP_SETCC(op) \
-	".align 4 \n\t" \
+	".align " __stringify(SETCC_ALIGN) " \n\t" \
 	".type " #op ", @function \n\t" \
 	#op ": \n\t" \
 	#op " %al \n\t" \
@@ -1047,7 +1062,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
-	void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+	void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
 
 	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
 	asm("push %[flags]; popf; " CALL_NOSPEC
-- 
GitLab


From 4fc5f5346592cdc91689455d83885b0af65d71b8 Mon Sep 17 00:00:00 2001
From: Jakob Koschel <jakobkoschel@gmail.com>
Date: Sat, 19 Mar 2022 21:27:04 +0100
Subject: [PATCH 1577/1586] nfsd: fix using the correct variable for sizeof()

While the original code is valid, it is not the obvious choice for the
sizeof() call and in preparation to limit the scope of the list iterator
variable the sizeof should be changed to the size of the destination.

Signed-off-by: Jakob Koschel <jakobkoschel@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4layouts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 6d1b5bb051c56..2c05692a9abf1 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -422,7 +422,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
 	new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
 	if (!new)
 		return nfserr_jukebox;
-	memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
+	memcpy(&new->lo_seg, seg, sizeof(new->lo_seg));
 	new->lo_state = ls;
 
 	spin_lock(&fp->fi_lock);
-- 
GitLab


From f443e374ae131c168a065ea1748feac6b2e76613 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 20 Mar 2022 13:14:17 -0700
Subject: [PATCH 1578/1586] Linux 5.17

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 55a30ca693508..7214f075e1f06 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Superb Owl
 
 # *DOCUMENTATION*
-- 
GitLab


From 53d862fac4a09b9c56cca0433fa9de5732fd05a1 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sat, 19 Mar 2022 19:04:15 +0000
Subject: [PATCH 1579/1586] parisc: Fix invalidate/flush vmap routines

Cache move-in for virtual accesses is controlled by the TLB.  Thus,
we must generally purge TLB entries before flushing.  The flush routines
must use TLB entries that inhibit cache move-in.

V2: Load physical address prior to flushing TLB.  In flush_cache_page,
flush TLB when flushing and purging.

V3: Don't flush when start equals end.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/cache.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index d41fee3a98746..456e879d34a8d 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -611,8 +611,8 @@ void
 flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
 	if (pfn_valid(pfn)) {
+		flush_tlb_page(vma, vmaddr);
 		if (likely(vma->vm_mm->context.space_id)) {
-			flush_tlb_page(vma, vmaddr);
 			__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 		} else {
 			__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
@@ -624,6 +624,7 @@ void flush_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
 	unsigned long end = start + size;
+	unsigned long flags, physaddr;
 
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    (unsigned long)size >= parisc_cache_flush_threshold) {
@@ -632,8 +633,14 @@ void flush_kernel_vmap_range(void *vaddr, int size)
 		return;
 	}
 
-	flush_kernel_dcache_range_asm(start, end);
-	flush_tlb_kernel_range(start, end);
+	while (start < end) {
+		physaddr = lpa(start);
+		purge_tlb_start(flags);
+		pdtlb(SR_KERNEL, start);
+		purge_tlb_end(flags);
+		flush_dcache_page_asm(physaddr, start);
+		start += PAGE_SIZE;
+	}
 }
 EXPORT_SYMBOL(flush_kernel_vmap_range);
 
@@ -641,6 +648,7 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
 	unsigned long end = start + size;
+	unsigned long flags, physaddr;
 
 	if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
 	    (unsigned long)size >= parisc_cache_flush_threshold) {
@@ -649,7 +657,13 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
 		return;
 	}
 
-	purge_kernel_dcache_range_asm(start, end);
-	flush_tlb_kernel_range(start, end);
+	while (start < end) {
+		physaddr = lpa(start);
+		purge_tlb_start(flags);
+		pdtlb(SR_KERNEL, start);
+		purge_tlb_end(flags);
+		purge_dcache_page_asm(physaddr, start);
+		start += PAGE_SIZE;
+	}
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
-- 
GitLab


From a635415a064e77bcfbf43da413fd9dfe0bbed9cb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 21 Mar 2022 08:11:52 +0000
Subject: [PATCH 1580/1586] watch_queue: Fix NULL dereference in error cleanup

In watch_queue_set_size(), the error cleanup code doesn't take account of
the fact that __free_page() can't handle a NULL pointer when trying to free
up buffer pages that did get allocated.

Fix this by only calling __free_page() on the pages actually allocated.

Without the fix, this can lead to something like the following:

BUG: KASAN: null-ptr-deref in __free_pages+0x1f/0x1b0 mm/page_alloc.c:5473
Read of size 4 at addr 0000000000000034 by task syz-executor168/3599
...
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 __kasan_report mm/kasan/report.c:446 [inline]
 kasan_report.cold+0x66/0xdf mm/kasan/report.c:459
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
 instrument_atomic_read include/linux/instrumented.h:71 [inline]
 atomic_read include/linux/atomic/atomic-instrumented.h:27 [inline]
 page_ref_count include/linux/page_ref.h:67 [inline]
 put_page_testzero include/linux/mm.h:717 [inline]
 __free_pages+0x1f/0x1b0 mm/page_alloc.c:5473
 watch_queue_set_size+0x499/0x630 kernel/watch_queue.c:275
 pipe_ioctl+0xac/0x2b0 fs/pipe.c:632
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:874 [inline]
 __se_sys_ioctl fs/ioctl.c:860 [inline]
 __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reported-and-tested-by: syzbot+d55757faa9b80590767b@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
---
 kernel/watch_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 00703444a2194..5848d4795816d 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -271,7 +271,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
 	return 0;
 
 error_p:
-	for (i = 0; i < nr_pages; i++)
+	while (--i >= 0)
 		__free_page(pages[i]);
 	kfree(pages);
 error:
-- 
GitLab


From 3d8dcf278b1ee1eff1e90be848fa2237db4c07a7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 21 Mar 2022 11:18:54 +0000
Subject: [PATCH 1581/1586] watch_queue: Actually free the watch

free_watch() does everything barring actually freeing the watch object.  Fix
this by adding the missing kfree.

kmemleak produces a report something like the following.  Note that as an
address can be seen in the first word, the watch would appear to have gone
through call_rcu().

BUG: memory leak
unreferenced object 0xffff88810ce4a200 (size 96):
  comm "syz-executor352", pid 3605, jiffies 4294947473 (age 13.720s)
  hex dump (first 32 bytes):
    e0 82 48 0d 81 88 ff ff 00 00 00 00 00 00 00 00  ..H.............
    80 a2 e4 0c 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8214e6cc>] kmalloc include/linux/slab.h:581 [inline]
    [<ffffffff8214e6cc>] kzalloc include/linux/slab.h:714 [inline]
    [<ffffffff8214e6cc>] keyctl_watch_key+0xec/0x2e0 security/keys/keyctl.c:1800
    [<ffffffff8214ec84>] __do_sys_keyctl+0x3c4/0x490 security/keys/keyctl.c:2016
    [<ffffffff84493a25>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
    [<ffffffff84493a25>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
    [<ffffffff84600068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Reported-and-tested-by: syzbot+6e2de48f06cdb2884bfc@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
---
 kernel/watch_queue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 5848d4795816d..3990e4df3d7b0 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -395,6 +395,7 @@ static void free_watch(struct rcu_head *rcu)
 	put_watch_queue(rcu_access_pointer(watch->queue));
 	atomic_dec(&watch->cred->user->nr_watches);
 	put_cred(watch->cred);
+	kfree(watch);
 }
 
 static void __put_watch(struct kref *kref)
-- 
GitLab


From c7500c1b53bfc083e8968cdce13a5a9d1ca9bf83 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Feb 2022 16:24:02 -0800
Subject: [PATCH 1582/1586] um: Allow builds with Clang

Add SUBARCH target for Clang+um (which must go last, not alphabetically,
so the other SUBARCHes are assigned). Remove open-coded "DEFINE"
macro, instead using linux/kbuild.h's version which was updated to use
Clang-friendly assembly in commit cf0c3e68aa81 ("kbuild: fix asm-offset
generation to work with clang"). Redefine "DEFINE_LONGS" in terms of
"COMMENT" and "DEFINE" so that the intended coment actually has useful
content. Add a missed "break" to avoid implicit fall-through warnings.

This lets me run KUnit tests with Clang:

$ ./tools/testing/kunit/kunit.py run --make_options LLVM=1
...

Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: linux-um@lists.infradead.org
Cc: linux-kbuild@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Cc: kunit-dev@googlegroups.com
Cc: llvm@lists.linux.dev
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/lkml/Yg2YubZxvYvx7%2Fnm@dev-arch.archlinux-ax161/
Tested-by: David Gow <davidgow@google.com>
Link: https://lore.kernel.org/lkml/CABVgOSk=oFxsbSbQE-v65VwR2+mXeGXDDjzq8t7FShwjJ3+kUg@mail.gmail.com/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
v1: https://lore.kernel.org/lkml/20220217002843.2312603-1-keescook@chromium.org
v2: https://lore.kernel.org/lkml/20220224055831.1854786-1-keescook@chromium.org
v3:
 - use kbuild.h to avoid duplication (Masahiro)
 - fix intended comments (Masahiro)
 - use SUBARCH (Nathan)
---
 arch/um/os-Linux/execvp.c  | 1 +
 arch/x86/um/user-offsets.c | 9 ++++-----
 scripts/Makefile.clang     | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 84a0777c2a450..c09a5fd5e2255 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -93,6 +93,7 @@ int execvp_noalloc(char *buf, const char *file, char *const argv[])
 					   up finding no executable we can use, we want to diagnose
 					   that we did find one but were denied access.  */
 					got_eacces = 1;
+					break;
 				case ENOENT:
 				case ESTALE:
 				case ENOTDIR:
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index bae61554abcc1..e54a9814ccf16 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -8,12 +8,11 @@
 #define __FRAME_OFFSETS
 #include <linux/ptrace.h>
 #include <asm/types.h>
+#include <linux/kbuild.h>
 
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define DEFINE_LONGS(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long)))
+#define DEFINE_LONGS(sym, val)	\
+	COMMENT(#val " / sizeof(unsigned long)");	\
+	DEFINE(sym, val / sizeof(unsigned long))
 
 void foo(void)
 {
diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang
index 51fc23e2e9e50..87285b76adb24 100644
--- a/scripts/Makefile.clang
+++ b/scripts/Makefile.clang
@@ -10,6 +10,7 @@ CLANG_TARGET_FLAGS_powerpc	:= powerpc64le-linux-gnu
 CLANG_TARGET_FLAGS_riscv	:= riscv64-linux-gnu
 CLANG_TARGET_FLAGS_s390		:= s390x-linux-gnu
 CLANG_TARGET_FLAGS_x86		:= x86_64-linux-gnu
+CLANG_TARGET_FLAGS_um		:= $(CLANG_TARGET_FLAGS_$(SUBARCH))
 CLANG_TARGET_FLAGS		:= $(CLANG_TARGET_FLAGS_$(SRCARCH))
 
 ifeq ($(CROSS_COMPILE),)
-- 
GitLab


From 02788ebcf521fe78c24eb221fd1ed7f86792c330 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Feb 2022 16:03:41 -0800
Subject: [PATCH 1583/1586] lib: stackinit: Convert to KUnit

Convert stackinit unit tests to KUnit, for better integration
into the kernel self test framework. Includes a rename of
test_stackinit.c to stackinit_kunit.c, and CONFIG_TEST_STACKINIT to
CONFIG_STACKINIT_KUNIT_TEST.

Adjust expected test results based on which stack initialization method
was chosen:

 $ CMD="./tools/testing/kunit/kunit.py run stackinit --raw_output \
        --arch=x86_64 --kconfig_add"

 $ $CMD | grep stackinit:
 # stackinit: pass:36 fail:0 skip:29 total:65

 $ $CMD CONFIG_GCC_PLUGIN_STRUCTLEAK_USER=y | grep stackinit:
 # stackinit: pass:37 fail:0 skip:28 total:65

 $ $CMD CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF=y | grep stackinit:
 # stackinit: pass:55 fail:0 skip:10 total:65

 $ $CMD CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y | grep stackinit:
 # stackinit: pass:62 fail:0 skip:3 total:65

 $ $CMD CONFIG_INIT_STACK_ALL_PATTERN=y --make_option LLVM=1 | grep stackinit:
 # stackinit: pass:60 fail:0 skip:5 total:65

 $ $CMD CONFIG_INIT_STACK_ALL_ZERO=y --make_option LLVM=1 | grep stackinit:
 # stackinit: pass:60 fail:0 skip:5 total:65

Temporarily remove the userspace-build mode, which will be restored in a
later patch.

Expand the size of the pre-case switch variable so it doesn't get
accidentally cleared.

Cc: David Gow <davidgow@google.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
v1: https://lore.kernel.org/lkml/20220224055145.1853657-1-keescook@chromium.org
v2:
 - split "userspace KUnit stub" into separate header and patch (Daniel)
 - Improve commit log and comments (David)
 - Provide mapping of expected XFAIL tests to CONFIGs (David)
---
 lib/Kconfig.debug                           |  22 +-
 lib/Makefile                                |   4 +-
 lib/{test_stackinit.c => stackinit_kunit.c} | 269 ++++++++------------
 3 files changed, 121 insertions(+), 174 deletions(-)
 rename lib/{test_stackinit.c => stackinit_kunit.c} (66%)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 14d90d03bc8d9..a5556ab052405 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2511,6 +2511,17 @@ config OVERFLOW_KUNIT_TEST
 
 	  If unsure, say N.
 
+config STACKINIT_KUNIT_TEST
+	tristate "Test level of stack variable initialization" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Test if the kernel is zero-initializing stack variables and
+	  padding. Coverage is controlled by compiler flags,
+	  CONFIG_INIT_STACK_ALL_PATTERN, CONFIG_INIT_STACK_ALL_ZERO,
+	  CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
+	  or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
@@ -2602,17 +2613,6 @@ config TEST_OBJAGG
 	  Enable this option to test object aggregation manager on boot
 	  (or module load).
 
-
-config TEST_STACKINIT
-	tristate "Test level of stack variable initialization"
-	help
-	  Test if the kernel is zero-initializing stack variables and
-	  padding. Coverage is controlled by compiler flags,
-	  CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
-	  or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
-
-	  If unsure, say N.
-
 config TEST_MEMINIT
 	tristate "Test heap/page initialization"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index fdfcbfaff32f1..353bc09ce38d5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -93,8 +93,6 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
 obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
-CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable)
-obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
 obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
 obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
 obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
@@ -363,6 +361,8 @@ obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
 obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
 obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
 obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
+CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
+obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
diff --git a/lib/test_stackinit.c b/lib/stackinit_kunit.c
similarity index 66%
rename from lib/test_stackinit.c
rename to lib/stackinit_kunit.c
index a3c74e6a21ffa..35c69aa425b20 100644
--- a/lib/test_stackinit.c
+++ b/lib/stackinit_kunit.c
@@ -2,76 +2,21 @@
 /*
  * Test cases for compiler-based stack variable zeroing via
  * -ftrivial-auto-var-init={zero,pattern} or CONFIG_GCC_PLUGIN_STRUCTLEAK*.
+ * For example, see:
+ * https://www.kernel.org/doc/html/latest/dev-tools/kunit/kunit-tool.html#configuring-building-and-running-tests
+ *	./tools/testing/kunit/kunit.py run stackinit [--raw_output] \
+ *		--make_option LLVM=1 \
+ *		--kconfig_add CONFIG_INIT_STACK_ALL_ZERO=y
  *
- * External build example:
- *	clang -O2 -Wall -ftrivial-auto-var-init=pattern \
- *		-o test_stackinit test_stackinit.c
  */
-#ifdef __KERNEL__
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <kunit/test.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
 
-#else
-
-/* Userspace headers. */
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <sys/types.h>
-
-/* Linux kernel-ism stubs for stand-alone userspace build. */
-#define KBUILD_MODNAME		"stackinit"
-#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
-#define pr_err(fmt, ...)	fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)	fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...)	fprintf(stdout, pr_fmt(fmt), ##__VA_ARGS__)
-#define __init			/**/
-#define __exit			/**/
-#define __user			/**/
-#define noinline		__attribute__((__noinline__))
-#define __aligned(x)		__attribute__((__aligned__(x)))
-#ifdef __clang__
-# define __compiletime_error(message) /**/
-#else
-# define __compiletime_error(message) __attribute__((__error__(message)))
-#endif
-#define __compiletime_assert(condition, msg, prefix, suffix)		\
-	do {								\
-		extern void prefix ## suffix(void) __compiletime_error(msg); \
-		if (!(condition))					\
-			prefix ## suffix();				\
-	} while (0)
-#define _compiletime_assert(condition, msg, prefix, suffix) \
-	__compiletime_assert(condition, msg, prefix, suffix)
-#define compiletime_assert(condition, msg) \
-	_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-#define BUILD_BUG_ON(condition) \
-	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-typedef uint8_t			u8;
-typedef uint16_t		u16;
-typedef uint32_t		u32;
-typedef uint64_t		u64;
-
-#define module_init(func)	static int (*do_init)(void) = func
-#define module_exit(func)	static void (*do_exit)(void) = func
-#define MODULE_LICENSE(str)	int main(void) {		\
-					int rc;			\
-					/* License: str */	\
-					rc = do_init();		\
-					if (rc == 0)		\
-						do_exit();	\
-					return rc;		\
-				}
-
-#endif /* __KERNEL__ */
-
 /* Exfiltration buffer. */
 #define MAX_VAR_SIZE	128
 static u8 check_buf[MAX_VAR_SIZE];
@@ -201,7 +146,7 @@ static bool range_contains(char *haystack_start, size_t haystack_size,
  */
 #define DEFINE_TEST_DRIVER(name, var_type, which, xfail)	\
 /* Returns 0 on success, 1 on failure. */			\
-static noinline __init int test_ ## name (void)			\
+static noinline void test_ ## name (struct kunit *test)		\
 {								\
 	var_type zero INIT_CLONE_ ## which;			\
 	int ignored;						\
@@ -220,10 +165,8 @@ static noinline __init int test_ ## name (void)			\
 	/* Verify all bytes overwritten with 0xFF. */		\
 	for (sum = 0, i = 0; i < target_size; i++)		\
 		sum += (check_buf[i] != 0xFF);			\
-	if (sum) {						\
-		pr_err(#name ": leaf fill was not 0xFF!?\n");	\
-		return 1;					\
-	}							\
+	KUNIT_ASSERT_EQ_MSG(test, sum, 0,			\
+			    "leaf fill was not 0xFF!?\n");	\
 	/* Clear entire check buffer for later bit tests. */	\
 	memset(check_buf, 0x00, sizeof(check_buf));		\
 	/* Extract stack-defined variable contents. */		\
@@ -231,32 +174,29 @@ static noinline __init int test_ ## name (void)			\
 				FETCH_ARG_ ## which(zero));	\
 								\
 	/* Validate that compiler lined up fill and target. */	\
-	if (!range_contains(fill_start, fill_size,		\
-			    target_start, target_size)) {	\
-		pr_err(#name ": stack fill missed target!?\n");	\
-		pr_err(#name ": fill %zu wide\n", fill_size);	\
-		pr_err(#name ": target offset by %d\n",	\
-			(int)((ssize_t)(uintptr_t)fill_start -	\
-			(ssize_t)(uintptr_t)target_start));	\
-		return 1;					\
-	}							\
+	KUNIT_ASSERT_TRUE_MSG(test,				\
+		range_contains(fill_start, fill_size,		\
+			    target_start, target_size),		\
+		"stack fill missed target!? "			\
+		"(fill %zu wide, target offset by %d)\n",	\
+		fill_size,					\
+		(int)((ssize_t)(uintptr_t)fill_start -		\
+		      (ssize_t)(uintptr_t)target_start));	\
 								\
 	/* Look for any bytes still 0xFF in check region. */	\
 	for (sum = 0, i = 0; i < target_size; i++)		\
 		sum += (check_buf[i] == 0xFF);			\
 								\
-	if (sum == 0) {						\
-		pr_info(#name " ok\n");				\
-		return 0;					\
-	} else {						\
-		pr_warn(#name " %sFAIL (uninit bytes: %d)\n",	\
-			(xfail) ? "X" : "", sum);		\
-		return (xfail) ? 0 : 1;				\
-	}							\
+	if (sum != 0 && xfail)					\
+		kunit_skip(test,				\
+			   "XFAIL uninit bytes: %d\n",		\
+			   sum);				\
+	KUNIT_ASSERT_EQ_MSG(test, sum, 0,			\
+		"uninit bytes: %d\n", sum);			\
 }
 #define DEFINE_TEST(name, var_type, which, init_level, xfail)	\
 /* no-op to force compiler into ignoring "uninitialized" vars */\
-static noinline __init DO_NOTHING_TYPE_ ## which(var_type)	\
+static noinline DO_NOTHING_TYPE_ ## which(var_type)		\
 do_nothing_ ## name(var_type *ptr)				\
 {								\
 	/* Will always be true, but compiler doesn't know. */	\
@@ -265,9 +205,8 @@ do_nothing_ ## name(var_type *ptr)				\
 	else							\
 		return DO_NOTHING_RETURN_ ## which(ptr + 1);	\
 }								\
-static noinline __init int leaf_ ## name(unsigned long sp,	\
-					 bool fill,		\
-					 var_type *arg)		\
+static noinline int leaf_ ## name(unsigned long sp, bool fill,	\
+				  var_type *arg)		\
 {								\
 	char buf[VAR_BUFFER];					\
 	var_type var						\
@@ -341,6 +280,27 @@ struct test_user {
 	unsigned long four;
 };
 
+#define ALWAYS_PASS	WANT_SUCCESS
+#define ALWAYS_FAIL	XFAIL
+
+#ifdef CONFIG_INIT_STACK_NONE
+# define USER_PASS	XFAIL
+# define BYREF_PASS	XFAIL
+# define STRONG_PASS	XFAIL
+#elif defined(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER)
+# define USER_PASS	WANT_SUCCESS
+# define BYREF_PASS	XFAIL
+# define STRONG_PASS	XFAIL
+#elif defined(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF)
+# define USER_PASS	WANT_SUCCESS
+# define BYREF_PASS	WANT_SUCCESS
+# define STRONG_PASS	XFAIL
+#else
+# define USER_PASS	WANT_SUCCESS
+# define BYREF_PASS	WANT_SUCCESS
+# define STRONG_PASS	WANT_SUCCESS
+#endif
+
 #define DEFINE_SCALAR_TEST(name, init, xfail)			\
 		DEFINE_TEST(name ## _ ## init, name, SCALAR,	\
 			    init, xfail)
@@ -364,27 +324,26 @@ struct test_user {
 		DEFINE_STRUCT_TEST(trailing_hole, init, xfail);	\
 		DEFINE_STRUCT_TEST(packed, init, xfail)
 
-#define DEFINE_STRUCT_INITIALIZER_TESTS(base)			\
+#define DEFINE_STRUCT_INITIALIZER_TESTS(base, xfail)		\
 		DEFINE_STRUCT_TESTS(base ## _ ## partial,	\
-				    WANT_SUCCESS);		\
-		DEFINE_STRUCT_TESTS(base ## _ ## all,		\
-				    WANT_SUCCESS)
+				    xfail);			\
+		DEFINE_STRUCT_TESTS(base ## _ ## all, xfail)
 
 /* These should be fully initialized all the time! */
-DEFINE_SCALAR_TESTS(zero, WANT_SUCCESS);
-DEFINE_STRUCT_TESTS(zero, WANT_SUCCESS);
+DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS);
+DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS);
 /* Struct initializers: padding may be left uninitialized. */
-DEFINE_STRUCT_INITIALIZER_TESTS(static);
-DEFINE_STRUCT_INITIALIZER_TESTS(dynamic);
-DEFINE_STRUCT_INITIALIZER_TESTS(runtime);
-DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static);
-DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic);
-DEFINE_STRUCT_TESTS(assigned_copy, XFAIL);
+DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS);
+DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS);
+DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL);
 /* No initialization without compiler instrumentation. */
-DEFINE_SCALAR_TESTS(none, WANT_SUCCESS);
-DEFINE_STRUCT_TESTS(none, WANT_SUCCESS);
+DEFINE_SCALAR_TESTS(none, STRONG_PASS);
+DEFINE_STRUCT_TESTS(none, BYREF_PASS);
 /* Initialization of members with __user attribute. */
-DEFINE_TEST(user, struct test_user, STRUCT, none, WANT_SUCCESS);
+DEFINE_TEST(user, struct test_user, STRUCT, none, USER_PASS);
 
 /*
  * Check two uses through a variable declaration outside either path,
@@ -398,7 +357,7 @@ static int noinline __leaf_switch_none(int path, bool fill)
 		 * This is intentionally unreachable. To silence the
 		 * warning, build with -Wno-switch-unreachable
 		 */
-		uint64_t var;
+		uint64_t var[10];
 
 	case 1:
 		target_start = &var;
@@ -423,19 +382,19 @@ static int noinline __leaf_switch_none(int path, bool fill)
 		memcpy(check_buf, target_start, target_size);
 		break;
 	default:
-		var = 5;
-		return var & forced_mask;
+		var[1] = 5;
+		return var[1] & forced_mask;
 	}
 	return 0;
 }
 
-static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill,
+static noinline int leaf_switch_1_none(unsigned long sp, bool fill,
 					      uint64_t *arg)
 {
 	return __leaf_switch_none(1, fill);
 }
 
-static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
+static noinline int leaf_switch_2_none(unsigned long sp, bool fill,
 					      uint64_t *arg)
 {
 	return __leaf_switch_none(2, fill);
@@ -447,68 +406,56 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
  * non-code areas (i.e. in a switch statement before the first "case").
  * https://bugs.llvm.org/show_bug.cgi?id=44916
  */
-DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, XFAIL);
-DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, XFAIL);
-
-static int __init test_stackinit_init(void)
-{
-	unsigned int failures = 0;
-
-#define test_scalars(init)	do {				\
-		failures += test_u8_ ## init ();		\
-		failures += test_u16_ ## init ();		\
-		failures += test_u32_ ## init ();		\
-		failures += test_u64_ ## init ();		\
-		failures += test_char_array_ ## init ();	\
-	} while (0)
-
-#define test_structs(init)	do {				\
-		failures += test_small_hole_ ## init ();	\
-		failures += test_big_hole_ ## init ();		\
-		failures += test_trailing_hole_ ## init ();	\
-		failures += test_packed_ ## init ();		\
-	} while (0)
-
+DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, ALWAYS_FAIL);
+DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL);
+
+#define KUNIT_test_scalars(init)			\
+		KUNIT_CASE(test_u8_ ## init),		\
+		KUNIT_CASE(test_u16_ ## init),		\
+		KUNIT_CASE(test_u32_ ## init),		\
+		KUNIT_CASE(test_u64_ ## init),		\
+		KUNIT_CASE(test_char_array_ ## init)
+
+#define KUNIT_test_structs(init)			\
+		KUNIT_CASE(test_small_hole_ ## init),	\
+		KUNIT_CASE(test_big_hole_ ## init),	\
+		KUNIT_CASE(test_trailing_hole_ ## init),\
+		KUNIT_CASE(test_packed_ ## init)	\
+
+static struct kunit_case stackinit_test_cases[] = {
 	/* These are explicitly initialized and should always pass. */
-	test_scalars(zero);
-	test_structs(zero);
+	KUNIT_test_scalars(zero),
+	KUNIT_test_structs(zero),
 	/* Padding here appears to be accidentally always initialized? */
-	test_structs(dynamic_partial);
-	test_structs(assigned_dynamic_partial);
+	KUNIT_test_structs(dynamic_partial),
+	KUNIT_test_structs(assigned_dynamic_partial),
 	/* Padding initialization depends on compiler behaviors. */
-	test_structs(static_partial);
-	test_structs(static_all);
-	test_structs(dynamic_all);
-	test_structs(runtime_partial);
-	test_structs(runtime_all);
-	test_structs(assigned_static_partial);
-	test_structs(assigned_static_all);
-	test_structs(assigned_dynamic_all);
+	KUNIT_test_structs(static_partial),
+	KUNIT_test_structs(static_all),
+	KUNIT_test_structs(dynamic_all),
+	KUNIT_test_structs(runtime_partial),
+	KUNIT_test_structs(runtime_all),
+	KUNIT_test_structs(assigned_static_partial),
+	KUNIT_test_structs(assigned_static_all),
+	KUNIT_test_structs(assigned_dynamic_all),
 	/* Everything fails this since it effectively performs a memcpy(). */
-	test_structs(assigned_copy);
-
+	KUNIT_test_structs(assigned_copy),
 	/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
-	test_scalars(none);
-	failures += test_switch_1_none();
-	failures += test_switch_2_none();
-
+	KUNIT_test_scalars(none),
+	KUNIT_CASE(test_switch_1_none),
+	KUNIT_CASE(test_switch_2_none),
 	/* STRUCTLEAK_BYREF should cover from here down. */
-	test_structs(none);
-
+	KUNIT_test_structs(none),
 	/* STRUCTLEAK will only cover this. */
-	failures += test_user();
-
-	if (failures == 0)
-		pr_info("all tests passed!\n");
-	else
-		pr_err("failures: %u\n", failures);
+	KUNIT_CASE(test_user),
+	{}
+};
 
-	return failures ? -EINVAL : 0;
-}
-module_init(test_stackinit_init);
+static struct kunit_suite stackinit_test_suite = {
+	.name = "stackinit",
+	.test_cases = stackinit_test_cases,
+};
 
-static void __exit test_stackinit_exit(void)
-{ }
-module_exit(test_stackinit_exit);
+kunit_test_suites(&stackinit_test_suite);
 
 MODULE_LICENSE("GPL");
-- 
GitLab


From e60aeb2dee1a4c28591b8f97d4248787aed10769 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Fri, 18 Mar 2022 12:13:23 -0700
Subject: [PATCH 1584/1586] f2fs: make gc_urgent and gc_segment_mode sysfs node
 readable

Changed a way of showing values of them to use strings.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/sysfs.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index fe29bcb70f469..fb950a13375f4 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -41,6 +41,16 @@ enum {
 	ATGC_INFO,	/* struct atgc_management */
 };
 
+static const char *gc_mode_names[MAX_GC_MODE] = {
+	"GC_NORMAL",
+	"GC_IDLE_CB",
+	"GC_IDLE_GREEDY",
+	"GC_IDLE_AT",
+	"GC_URGENT_HIGH",
+	"GC_URGENT_LOW",
+	"GC_URGENT_MID"
+};
+
 struct f2fs_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
@@ -316,8 +326,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 		return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
 #endif
 
+	if (!strcmp(a->attr.name, "gc_urgent"))
+		return sysfs_emit(buf, "%s\n",
+				gc_mode_names[sbi->gc_mode]);
+
 	if (!strcmp(a->attr.name, "gc_segment_mode"))
-		return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode);
+		return sysfs_emit(buf, "%s\n",
+				gc_mode_names[sbi->gc_segment_mode]);
 
 	if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
 		return sysfs_emit(buf, "%u\n",
-- 
GitLab


From 5b5b4f85b01604389f7a0f11ef180a725bf0e2d4 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Mon, 21 Mar 2022 23:22:11 +0800
Subject: [PATCH 1585/1586] f2fs: fix to do sanity check on
 .cp_pack_total_block_count

As bughunter reported in bugzilla:

https://bugzilla.kernel.org/show_bug.cgi?id=215709

f2fs may hang when mounting a fuzzed image, the dmesg shows as below:

__filemap_get_folio+0x3a9/0x590
pagecache_get_page+0x18/0x60
__get_meta_page+0x95/0x460 [f2fs]
get_checkpoint_version+0x2a/0x1e0 [f2fs]
validate_checkpoint+0x8e/0x2a0 [f2fs]
f2fs_get_valid_checkpoint+0xd0/0x620 [f2fs]
f2fs_fill_super+0xc01/0x1d40 [f2fs]
mount_bdev+0x18a/0x1c0
f2fs_mount+0x15/0x20 [f2fs]
legacy_get_tree+0x28/0x50
vfs_get_tree+0x27/0xc0
path_mount+0x480/0xaa0
do_mount+0x7c/0xa0
__x64_sys_mount+0x8b/0xe0
do_syscall_64+0x38/0xc0
entry_SYSCALL_64_after_hwframe+0x44/0xae

The root cause is cp_pack_total_block_count field in checkpoint was fuzzed
to one, as calcuated, two cp pack block locates in the same block address,
so then read latter cp pack block, it will block on the page lock due to
the lock has already held when reading previous cp pack block, fix it by
adding sanity check for cp_pack_total_block_count.

Cc: stable@vger.kernel.org
Signed-off-by: Chao Yu <chao.yu@oppo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 871eee35a32fc..aba1b8a1ce66b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -875,6 +875,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
 	struct f2fs_checkpoint *cp_block = NULL;
 	unsigned long long cur_version = 0, pre_version = 0;
+	unsigned int cp_blocks;
 	int err;
 
 	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
@@ -882,15 +883,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 	if (err)
 		return NULL;
 
-	if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
-					sbi->blocks_per_seg) {
+	cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
+
+	if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
 		f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
 			  le32_to_cpu(cp_block->cp_pack_total_block_count));
 		goto invalid_cp;
 	}
 	pre_version = *version;
 
-	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+	cp_addr += cp_blocks - 1;
 	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
 					&cp_page_2, version);
 	if (err)
-- 
GitLab


From 0313bc278dac7cd9ce83a8d384581dc043156965 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 22 Mar 2022 09:17:20 -0700
Subject: [PATCH 1586/1586] Revert "random: block in /dev/urandom"

This reverts commit 6f98a4bfee72c22f50aedb39fb761567969865fe.

It turns out we still can't do this.  Way too many platforms that don't
have any real source of randomness at boot and no jitter entropy because
they don't even have a cycle counter.

As reported by Guenter Roeck:

 "This causes a large number of qemu boot test failures for various
  architectures (arm, m68k, microblaze, sparc32, xtensa are the ones I
  observed).

  Common denominator is that boot hangs at 'Saving random seed:'"

This isn't hugely unexpected - we tried it, it failed, so now we'll
revert it.

Link: https://lore.kernel.org/all/20220322155820.GA1745955@roeck-us.net/
Reported-and-bisected-by: Guenter Roeck <linux@roeck-us.net>
Cc: Jason Donenfeld <Jason@zx2c4.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mem.c     |  2 +-
 drivers/char/random.c  | 72 ++++++++++++++++++++++++++++++++----------
 include/linux/random.h |  2 +-
 3 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 9f586025dbe62..cc296f0823bd0 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -707,7 +707,7 @@ static const struct memdev {
 	 [5] = { "zero", 0666, &zero_fops, FMODE_NOWAIT },
 	 [7] = { "full", 0666, &full_fops, 0 },
 	 [8] = { "random", 0666, &random_fops, 0 },
-	 [9] = { "urandom", 0666, &random_fops, 0 },
+	 [9] = { "urandom", 0666, &urandom_fops, 0 },
 #ifdef CONFIG_PRINTK
 	[11] = { "kmsg", 0644, &kmsg_fops, 0 },
 #endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85f4cca6e7075..66ce7c03a1424 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -89,14 +89,17 @@ static RAW_NOTIFIER_HEAD(random_ready_chain);
 /* Control how we warn userspace. */
 static struct ratelimit_state unseeded_warning =
 	RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3);
+static struct ratelimit_state urandom_warning =
+	RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
 static int ratelimit_disable __read_mostly;
 module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
 MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");
 
 /*
  * Returns whether or not the input pool has been seeded and thus guaranteed
- * to supply cryptographically secure random numbers. This applies to
- * get_random_bytes() and get_random_{u32,u64,int,long}().
+ * to supply cryptographically secure random numbers. This applies to: the
+ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+ * ,u64,int,long} family of functions.
  *
  * Returns: true if the input pool has been seeded.
  *          false if the input pool has not been seeded.
@@ -112,10 +115,10 @@ static void try_to_generate_entropy(void);
 
 /*
  * Wait for the input pool to be seeded and thus guaranteed to supply
- * cryptographically secure random numbers. This applies to
- * get_random_bytes() and get_random_{u32,u64,int,long}(). Using any
- * of these functions without first calling this function means that
- * the returned numbers might not be cryptographically secure.
+ * cryptographically secure random numbers. This applies to: the /dev/urandom
+ * device, the get_random_bytes function, and the get_random_{u32,u64,int,long}
+ * family of functions. Using any of these functions without first calling
+ * this function forfeits the guarantee of security.
  *
  * Returns: 0 if the input pool has been seeded.
  *          -ERESTARTSYS if the function was interrupted by a signal.
@@ -220,10 +223,10 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
  *	unsigned long get_random_long()
  *
  * These interfaces will return the requested number of random bytes
- * into the given buffer or as a return value. The returned numbers are
- * the same as those of getrandom(0). The integer family of functions may
- * be higher performance for one-off random integers, because they do a
- * bit of buffering and do not invoke reseeding.
+ * into the given buffer or as a return value. This is equivalent to
+ * a read from /dev/urandom. The integer family of functions may be
+ * higher performance for one-off random integers, because they do a
+ * bit of buffering.
  *
  *********************************************************************/
 
@@ -300,6 +303,11 @@ static void crng_reseed(bool force)
 				  unseeded_warning.missed);
 			unseeded_warning.missed = 0;
 		}
+		if (urandom_warning.missed) {
+			pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
+				  urandom_warning.missed);
+			urandom_warning.missed = 0;
+		}
 	}
 }
 
@@ -979,8 +987,10 @@ int __init rand_initialize(void)
 		pr_notice("crng init done (trusting CPU's manufacturer)\n");
 	}
 
-	if (ratelimit_disable)
+	if (ratelimit_disable) {
+		urandom_warning.interval = 0;
 		unseeded_warning.interval = 0;
+	}
 	return 0;
 }
 
@@ -1420,16 +1430,20 @@ static void try_to_generate_entropy(void)
  * getrandom(2) is the primary modern interface into the RNG and should
  * be used in preference to anything else.
  *
- * Reading from /dev/random and /dev/urandom both have the same effect
- * as calling getrandom(2) with flags=0. (In earlier versions, however,
- * they each had different semantics.)
+ * Reading from /dev/random has the same functionality as calling
+ * getrandom(2) with flags=0. In earlier versions, however, it had
+ * vastly different semantics and should therefore be avoided, to
+ * prevent backwards compatibility issues.
+ *
+ * Reading from /dev/urandom has the same functionality as calling
+ * getrandom(2) with flags=GRND_INSECURE. Because it does not block
+ * waiting for the RNG to be ready, it should not be used.
  *
  * Writing to either /dev/random or /dev/urandom adds entropy to
  * the input pool but does not credit it.
  *
- * Polling on /dev/random or /dev/urandom indicates when the RNG
- * is initialized, on the read side, and when it wants new entropy,
- * on the write side.
+ * Polling on /dev/random indicates when the RNG is initialized, on
+ * the read side, and when it wants new entropy, on the write side.
  *
  * Both /dev/random and /dev/urandom have the same set of ioctls for
  * adding entropy, getting the entropy count, zeroing the count, and
@@ -1514,6 +1528,21 @@ static ssize_t random_write(struct file *file, const char __user *buffer,
 	return (ssize_t)count;
 }
 
+static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes,
+			    loff_t *ppos)
+{
+	static int maxwarn = 10;
+
+	if (!crng_ready() && maxwarn > 0) {
+		maxwarn--;
+		if (__ratelimit(&urandom_warning))
+			pr_notice("%s: uninitialized urandom read (%zd bytes read)\n",
+				  current->comm, nbytes);
+	}
+
+	return get_random_bytes_user(buf, nbytes);
+}
+
 static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes,
 			   loff_t *ppos)
 {
@@ -1600,6 +1629,15 @@ const struct file_operations random_fops = {
 	.llseek = noop_llseek,
 };
 
+const struct file_operations urandom_fops = {
+	.read = urandom_read,
+	.write = random_write,
+	.unlocked_ioctl = random_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
+	.fasync = random_fasync,
+	.llseek = noop_llseek,
+};
+
 
 /********************************************************************
  *
diff --git a/include/linux/random.h b/include/linux/random.h
index c0baffe7afb1d..f673fbb838b35 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -48,7 +48,7 @@ extern int unregister_random_ready_notifier(struct notifier_block *nb);
 extern size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes);
 
 #ifndef MODULE
-extern const struct file_operations random_fops;
+extern const struct file_operations random_fops, urandom_fops;
 #endif
 
 u32 get_random_u32(void);
-- 
GitLab